In [1]:
import warnings
warnings.filterwarnings("ignore")
import scipy.interpolate
import pandas as pd
import numpy as np
import os
import itertools
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error

In [2]:
def vectorize_od(input_od):
    """
    Function used to convert an OD matrix to its vector form

    params: 
        input_od (pd.DataFrame)
    returns: 
        od (as a vector)(np.array)
    """
    od = input_od.copy()
    od.reset_index(inplace=True)
    od = od.melt(id_vars=['id'])
    od.drop(od[(od['id'] == 'Total') | (od['variable'] == 'Total')].index, inplace=True)
    od['OD'] = (od['id'].astype(str) + od['variable'].astype(str)).astype("int64")
    columns = od.columns.to_list()
    od = od[columns[-1:] + columns[2:3]]
    od = od.sort_values("OD")
    od.drop("OD", axis=1, inplace=True)
    od.columns = range(od.columns.size)
    od.reset_index(inplace=True)
    od = od.drop("index", axis=1)
    return od

In [3]:
def has_consecutive_numbers(lst):
    for i in range(len(lst) - 1):
        if (lst[i] + 1 == lst[i+1]):
            return True
    return False

def filter_consecutive_lists(lists):
    result = []
    for sublist in lists:
        if not has_consecutive_numbers(sublist):
            result.append(list(sublist))
    return result

In [4]:
combinations = list(itertools.combinations(range(20), 4))
combinations = filter_consecutive_lists(combinations)

In [5]:
for index in range(len(combinations)):
    if 0 not in combinations[index]:
        combinations[index].insert(0,0)
    if 19 not in combinations[index]:
        combinations[index].append(19)

In [6]:
combinations = filter_consecutive_lists(combinations)

In [7]:
len(combinations)

1548

## Generate Total demand vector (D)

In [8]:
os.chdir("Experiment pattern ODs/")

In [9]:
filenames = os.listdir("4/")
filepath = os.path.join(os.getcwd(), "9", filenames[1])
od_series = vectorize_od(pd.read_csv(filepath, index_col = "id"))
for i in range(2, len(filenames)-1):
    filepath = os.path.join(os.getcwd(), "9", filenames[i])
    new = vectorize_od(pd.read_csv(filepath, index_col = "id"))
    new = new.rename({0:i}, axis = 1)
    od_series = pd.concat([od_series, new], axis = 1)
    
total_od = od_series.sum(axis = 1)

## Generate incidence vector (I)

In [10]:
def interpolate(hook_pts, data, length):
    """
    Returns interpolated list of values 
    
    param hook_pts: List containing the hook points of a dataset to be interpolated
    param data: List containing the values at the given hook points
    param length: Length of required dataset
    """
    
    function = scipy.interpolate.PchipInterpolator(hook_pts, data)
    x_new = np.array(range(length))
    interpolated_data_cubspl = function(x_new)
    return interpolated_data_cubspl

In [11]:
od_key_intervals = {i: [] for i in range(len(od_series))}
for od_index in tqdm(range(len(od_series))):
    time_series = od_series.iloc[od_index, :]
    errors = []
    if total_od[od_index] != 0:
        for combination in combinations:
            data_points = list(od_series.iloc[od_index, list(combination)])
            interpolated_series = interpolate(list(combination), data_points, 20)
            errors.append(mean_squared_error(time_series, interpolated_series))
        od_key_intervals[od_index] = combinations[errors.index(min(errors))]
    else:
        continue    

  0%|          | 0/80656 [00:00<?, ?it/s]

## Generate Weight vector (W)

In [12]:
W = {interval: 0 for interval in range(20)}
for od in range(len(od_series)):
    for interval in range(20):
        if interval in od_key_intervals[od]:
            W[interval]+= total_od[od]

## Get final key intervals

In [13]:
threshold = sorted(W.values(), reverse = True)[4]
min_threshold = sorted(W.values(), reverse = True)[9]
key_intervals = []
start = 0
sum = 0
interval = 0
while interval < 20:
    if W[interval] >= threshold:
        key_intervals.append(interval)
        start = interval
        sum = 0
        interval = interval + 1
    elif W[interval] < min_threshold:
        interval += 1
        continue
    else:
        sum = sum+W[interval]
        start = interval
        while W[interval+1] > min_threshold and W[interval+1]<= threshold:
            interval = interval + 1
            sum = sum+W[interval]
        if sum >= threshold:
            middle = (start+interval)//2
            key_intervals.append(middle)
            interval = middle
            sum = 0
            interval = interval + 1

In [None]:
pd.DataFrame(list(W.values())).to_csv("Pattern 9 weight factor.csv", index = False)