In [1]:
import numpy as np
import scipy as sp

import matplotlib.pyplot as plt
import matplotlib.axes as axe
import pandas as pd
import datetime as dt
import gurobipy as gp
from gurobipy import GRB
import cvxpy as cp
import yaml

import random
from itertools import chain, combinations, tee
import time

import os

# Origin-Destination Pair Generation

In [2]:
start_time = time.time()

directory_path = '../data/pems___101_N_Sep_2024/'
file_name = 'data_capped_consolidated.csv'

df_data_consistent = pd.read_csv(directory_path + file_name, index_col=0)

end_time = time.time()

print()
print("Time:", end_time - start_time)




Time: 0.021007776260375977


In [3]:
num_data = len(df_data_consistent.columns)

flow_data_consistent = np.float_(df_data_consistent.iloc[-1].tolist())

# print(num_data)

station_type_list = list(df_data_consistent.loc["Station Type"])
origin_list = [0] + [index for index in range(num_data) if station_type_list[index] == "on"]
destination_list = [index for index in range(num_data) if station_type_list[index] == "off"] + [num_data - 1]
mainline_list = [index for index in range(num_data) if station_type_list[index] == "main"][1:-1]

print("origin_list:", origin_list)
print("destination_list:", destination_list)

# Define constraint matrix
# for index in range(num_data):

counter = 0
od_pairs_dict = {}
for origin_index in origin_list:
    for destination_index in destination_list:
        if destination_index > origin_index:
            od_pairs_dict[counter] = [origin_index, destination_index]
            counter += 1
            
od_pairs_dict


origin_list: [0, 3, 7, 9, 14, 16, 18, 20, 23, 25, 29, 31, 33, 37, 40, 44, 47, 49, 51, 55]
destination_list: [1, 5, 11, 13, 22, 27, 35, 39, 42, 46, 53, 57, 59, 60]


{0: [0, 1],
 1: [0, 5],
 2: [0, 11],
 3: [0, 13],
 4: [0, 22],
 5: [0, 27],
 6: [0, 35],
 7: [0, 39],
 8: [0, 42],
 9: [0, 46],
 10: [0, 53],
 11: [0, 57],
 12: [0, 59],
 13: [0, 60],
 14: [3, 5],
 15: [3, 11],
 16: [3, 13],
 17: [3, 22],
 18: [3, 27],
 19: [3, 35],
 20: [3, 39],
 21: [3, 42],
 22: [3, 46],
 23: [3, 53],
 24: [3, 57],
 25: [3, 59],
 26: [3, 60],
 27: [7, 11],
 28: [7, 13],
 29: [7, 22],
 30: [7, 27],
 31: [7, 35],
 32: [7, 39],
 33: [7, 42],
 34: [7, 46],
 35: [7, 53],
 36: [7, 57],
 37: [7, 59],
 38: [7, 60],
 39: [9, 11],
 40: [9, 13],
 41: [9, 22],
 42: [9, 27],
 43: [9, 35],
 44: [9, 39],
 45: [9, 42],
 46: [9, 46],
 47: [9, 53],
 48: [9, 57],
 49: [9, 59],
 50: [9, 60],
 51: [14, 22],
 52: [14, 27],
 53: [14, 35],
 54: [14, 39],
 55: [14, 42],
 56: [14, 46],
 57: [14, 53],
 58: [14, 57],
 59: [14, 59],
 60: [14, 60],
 61: [16, 22],
 62: [16, 27],
 63: [16, 35],
 64: [16, 39],
 65: [16, 42],
 66: [16, 46],
 67: [16, 53],
 68: [16, 57],
 69: [16, 59],
 70: [16, 60],

In [4]:
num_od = len(od_pairs_dict)

constraint_matrix = np.zeros((num_data, num_od))
for i in range(num_data):
    if station_type_list[i] == "on" or i == 0:
        indices_set_to_one = [counter for counter in range(num_od) if od_pairs_dict[counter][0] == i]
        constraint_matrix[i, indices_set_to_one] = 1
    elif station_type_list[i] == "off" or i == num_data - 1:
        indices_set_to_one = [counter for counter in range(num_od) if od_pairs_dict[counter][1] == i]
        constraint_matrix[i, indices_set_to_one] = 1
    else:
        indices_set_to_one = [counter for counter in range(num_od) \
                              if od_pairs_dict[counter][0] < i and od_pairs_dict[counter][1] > i]
        constraint_matrix[i, indices_set_to_one] = 1
        
constraint_matrix

array([[1., 1., 1., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [0., 1., 1., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 1., 1.],
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 0., 1.]])

## Optimization Problem - Hard Constraints for Flows:

In [5]:
# Optimization Problem:

# Variables:
num_od = len(od_pairs_dict)
data_od = cp.Variable(num_od)

# Cost Function:
func = 0.0
for k in range(num_od):
    func += cp.entr(data_od[k]) + data_od[k]

# Objective:
objective = cp.Maximize(func)

# Constraints:
constraints = []
constraints += [constraint_matrix @ data_od == flow_data_consistent]
constraints += [data_od >= 0.0]

# Problem:
prob = cp.Problem(objective, constraints)

# Solve:
result = prob.solve()

if prob.status == "infeasible":
    print("Problem is infeasible") 
else:
    print("Problem is feasible") 

# Extract Values:

data_od_values = data_od.value
print("data_od_values.shape:", data_od_values.shape)
print("data_od_values:", data_od_values)

Problem is infeasible


AttributeError: 'NoneType' object has no attribute 'shape'

## Optimization Problem - Soft Constraints for Flows:

In [42]:
# Optimization Problem:

# Variables:
num_od = len(od_pairs_dict)
data_od = cp.Variable(num_od)

# Cost Function:
func = 0.0
for k in range(num_od):
    func += - cp.entr(data_od[k]) - data_od[k]
    
flow_constraint_factor = 1.0E+3
data_mean = np.float_(df_data_consistent.loc["Mean (capped flow)", :])
data_std = list(np.float_(df_data_consistent.loc["Std Dev (capped flow)", :]))
print("data_mean.shape", data_mean.shape)
print("len(data_std)", len(data_std))
print("constraint_matrix.shape", constraint_matrix.shape)

kernel = np.diag([1/std**2 for std in data_std])
print("kernel.shape", kernel.shape)

func += flow_constraint_factor * cp.quad_form(constraint_matrix @ data_od - data_mean, kernel)

# Objective:
objective = cp.Minimize(func)

# Constraints:
constraints = []
# constraints += [constraint_matrix @ data_od == flow_data_consistent]
constraints += [data_od >= 0.0]

# Problem:
prob = cp.Problem(objective, constraints)

# Solve:
result = prob.solve()

if prob.status == "infeasible":
    print("Problem is infeasible") 
else:
    print("Problem is feasible") 

# Extract Values:

data_od_values = data_od.value
print("data_od_values.shape:", data_od_values.shape)
print()
print("data_od_values:", data_od_values)

data_mean.shape (61,)
len(data_std) 61
constraint_matrix.shape (61, 166)
kernel.shape (61, 61)
Problem is feasible
data_od_values.shape: (166,)

data_od_values: [5.82297676e+02 7.63480536e+01 5.48249846e+02 5.32487188e+02
 3.03604392e+02 4.61749248e+02 2.17782563e+02 2.48417025e+02
 3.28424750e+01 2.88268135e+02 3.45631505e+02 1.76287464e+02
 3.61396771e+02 1.20569185e+03 1.35317496e+01 9.71729804e+01
 9.43788698e+01 5.38111941e+01 8.18410743e+01 3.85997516e+01
 4.40299841e+01 5.82096488e+00 5.10936590e+01 6.12613334e+01
 3.12452309e+01 6.40463385e+01 2.13704129e+02 3.63166888e+01
 3.52722251e+01 2.01103696e+01 3.05863043e+01 1.44254498e+01
 1.64547886e+01 2.17447229e+00 1.90948796e+01 2.28945987e+01
 1.16768015e+01 2.39354439e+01 7.98741681e+01 2.73109373e+01
 2.65254750e+01 1.51233243e+01 2.30015297e+01 1.08481430e+01
 1.23742763e+01 1.63498751e+00 1.43597359e+01 1.72171908e+01
 8.78117977e+00 1.79999428e+01 6.00629247e+01 4.68543975e+00
 7.12699162e+00 3.36039489e+00 3.83342123e+00 

In [43]:
print("Residual:", np.linalg.norm(constraint_matrix @ data_od_values - data_mean))

Residual: 5105.858753630363


In [44]:
## Store data in pandas:
# Start counter
# End counter
# Start station
# End station
# O-D flow

dict_od_data_to_save = {}
dict_od_data_to_save["Data Category"] \
    = ["Start Index", "End Index", "Start Station", "End Station", "O-D Flow"]

for counter in range(num_od):
    print("counter:", counter)
    dict_od_data_to_save[counter] = []
    
    start_index = od_pairs_dict[counter][0]
    end_index = od_pairs_dict[counter][1]
    print("start_index:", start_index)
    print("end_index:", end_index)
    print()
    
    dict_od_data_to_save[counter] += [start_index]
    dict_od_data_to_save[counter] += [end_index]
    dict_od_data_to_save[counter] += [df_data_consistent.loc["Start Station", str(start_index)]]
    dict_od_data_to_save[counter] += [df_data_consistent.loc["End Station", str(end_index)]]
    dict_od_data_to_save[counter] += [data_od_values[counter]]

dict_od_data_to_save

counter: 0
start_index: 0
end_index: 1

counter: 1
start_index: 0
end_index: 5

counter: 2
start_index: 0
end_index: 11

counter: 3
start_index: 0
end_index: 13

counter: 4
start_index: 0
end_index: 22

counter: 5
start_index: 0
end_index: 27

counter: 6
start_index: 0
end_index: 35

counter: 7
start_index: 0
end_index: 39

counter: 8
start_index: 0
end_index: 42

counter: 9
start_index: 0
end_index: 46

counter: 10
start_index: 0
end_index: 53

counter: 11
start_index: 0
end_index: 57

counter: 12
start_index: 0
end_index: 59

counter: 13
start_index: 0
end_index: 60

counter: 14
start_index: 3
end_index: 5

counter: 15
start_index: 3
end_index: 11

counter: 16
start_index: 3
end_index: 13

counter: 17
start_index: 3
end_index: 22

counter: 18
start_index: 3
end_index: 27

counter: 19
start_index: 3
end_index: 35

counter: 20
start_index: 3
end_index: 39

counter: 21
start_index: 3
end_index: 42

counter: 22
start_index: 3
end_index: 46

counter: 23
start_index: 3
end_index: 53

count

{'Data Category': ['Start Index',
  'End Index',
  'Start Station',
  'End Station',
  'O-D Flow'],
 0: [0,
  1,
  '001___402376_Palo_Alto___main',
  '004___425696_Palo_Alto___off',
  582.2976761845143],
 1: [0,
  5,
  '001___402376_Palo_Alto___main',
  '010___403059_East_Palo_Alto___off',
  76.34805358017009],
 2: [0,
  11,
  '001___402376_Palo_Alto___main',
  '027___403206_Redwood_City___off',
  548.2498455146657],
 3: [0,
  13,
  '001___402376_Palo_Alto___main',
  '032___410094_Redwood_City___off',
  532.4871879463963],
 4: [0,
  22,
  '001___402376_Palo_Alto___main',
  '045___410111_Redwood_City___off',
  303.6043922010056],
 5: [0,
  27,
  '001___402376_Palo_Alto___main',
  '051___409888_San_Mateo___off',
  461.74924758604055],
 6: [0,
  35,
  '001___402376_Palo_Alto___main',
  '068___405845_San_Mateo___off',
  217.78256283355168],
 7: [0,
  39,
  '001___402376_Palo_Alto___main',
  '076___403287_San_Mateo___off',
  248.41702462356056],
 8: [0,
  42,
  '001___402376_Palo_Alto___mai

In [45]:
df_od_data_to_save = pd.DataFrame(dict_od_data_to_save)
df_od_data_to_save.to_csv(directory_path + 'data_od.csv', index = False)


# Scratch Work