CASA 0002 - Urban Simulation
Final Asssessment - London Underground Resilience
Gavin Rolls
9 February 2024

Environment Setup

In [15]:
import pandas as pd
import numpy as np
import scipy
import matplotlib.pyplot as plt
import statsmodels.api as sm
import statsmodels.formula.api as smf

# Part II - Spatial Interaction Models

Data Loading & Preprocessing

In [16]:
#Reading in flows
flows = pd.read_csv("./Data/london_flows.csv")

#Check data loaded
flows.head()

Unnamed: 0,station_origin,station_destination,flows,population,jobs,distance
0,Abbey Road,Bank and Monument,0,599,78549,8131.525097
1,Abbey Road,Beckton,1,599,442,8510.121774
2,Abbey Road,Blackwall,3,599,665,3775.448872
3,Abbey Road,Canary Wharf,1,599,58772,5086.51422
4,Abbey Road,Canning Town,37,599,15428,2228.923167


In [17]:
#Metric Calculations(Taken from Prac3)
def CalcRSqaured(observed, estimated):
    """Calculate the r^2 from a series of observed and estimated target values
    inputs:
    Observed: Series of actual observed values
    estimated: Series of predicted values"""
    
    r, p = scipy.stats.pearsonr(observed, estimated)
    R2 = r **2
    
    return R2

def CalcRMSE(observed, estimated):
    """Calculate Root Mean Square Error between a series of observed and estimated values
    inputs:
    Observed: Series of actual observed values
    estimated: Series of predicted values"""
    
    res = (observed -estimated)**2
    RMSE = round(np.sqrt(res.mean()), 3)
    
    return RMSE

Set up Origin Constrained Model with Exponential Decay.

I will use population as our origin mass factor and employment count as our destination mass factor to simulate maximum commute flow during the morning rush.

In [18]:
#Calculate log of destination employment count with small addition to solve div by zero error
flows['log_dest_jobs'] = np.log(flows['jobs'] + .001)

#Formula for origin constrained equation (with exponential decay)
eq_form = 'flows ~ station_origin + log_dest_jobs + distance-1'

#Doubly Constrained Model with smf
sim = smf.glm(formula = eq_form, data = flows, family=sm.families.Poisson()).fit()

print(sim.summary())

                 Generalized Linear Model Regression Results                  
Dep. Variable:                  flows   No. Observations:                61474
Model:                            GLM   Df Residuals:                    61073
Model Family:                 Poisson   Df Model:                          400
Link Function:                    Log   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:            -9.2195e+05
Date:                Fri, 23 Feb 2024   Deviance:                   1.6717e+06
Time:                        15:40:51   Pearson chi2:                 2.42e+06
No. Iterations:                    26   Pseudo R-squ. (CS):              1.000
Covariance Type:            nonrobust                                         
                                                  coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------------------------------------------------------------------

In [27]:
#Generate Predictions and add to flows dataframe
flows["flows_default"] = np.round(sim.mu)

#Summary Statistics
print("RSquared =")
print(CalcRSqaured(flows["flows"], flows["flows_default"]))
print("RMSE =")
print(CalcRMSE(flows["flows"], flows["flows_default"]))

RSquared =
0.4482759616475261
RMSE =
97.845


Our log_jobs parameter is 0.7509 and our distance parameter is -0.002 (Remember beta is the inverse of this)

In [20]:
#Matrix View (Code Taken from Practical 3)
matrix = flows.pivot_table(values ="flows_default", index="station_origin", columns = "station_destination",
                                    aggfunc=np.sum, margins=True)
matrix

  matrix = flows.pivot_table(values ="OriginConstrainedExp", index="station_origin", columns = "station_destination",
  matrix = flows.pivot_table(values ="OriginConstrainedExp", index="station_origin", columns = "station_destination",
  matrix = flows.pivot_table(values ="OriginConstrainedExp", index="station_origin", columns = "station_destination",


station_destination,Abbey Road,Acton Central,Acton Town,Aldgate,Aldgate East,All Saints,Alperton,Amersham,Anerley,Angel,...,Wimbledon,Wimbledon Park,Wood Green,Wood Lane,Wood Street,Woodford,Woodgrange Park,Woodside Park,Woolwich Arsenal,All
station_origin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Abbey Road,,,,,,,,,,,...,,,,,,,,,8.0,598.0
Acton Central,,,,,,,,,,,...,,,,,,,0.0,,,1223.0
Acton Town,,,,13.0,13.0,,14.0,0.0,,16.0,...,13.0,3.0,2.0,20.0,,0.0,,1.0,,3745.0
Aldgate,,,1.0,,37.0,,,0.0,,27.0,...,2.0,,3.0,2.0,,1.0,,1.0,,2884.0
Aldgate East,,,1.0,40.0,,,0.0,0.0,,29.0,...,2.0,1.0,3.0,2.0,,1.0,,1.0,,3165.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Woodford,,,1.0,41.0,52.0,,,,,30.0,...,2.0,,6.0,,,,,,,4861.0
Woodgrange Park,,1.0,,,,,,,,,...,,,,,,,,,,534.0
Woodside Park,,,2.0,19.0,19.0,,0.0,,,31.0,...,3.0,,4.0,,,,,,,3099.0
Woolwich Arsenal,34.0,,,,,37.0,,,,,...,,,,,,,,,,7894.0


## Scenario A - Decrease in Jobs at Canary Wharf

In [25]:
#Create new jobs column for scenario a and cut Canary Wharf's jobs in half
flows["jobs_scenario_a"] = flows["jobs"]

#Find Canary Wharf Rows
cw_indices = flows.index[flows['station_destination'] == 'Canary Wharf']

#Divide by Two
flows.loc[cw_indices, 'jobs_scenario_a'] /= 2

#Check it's worked
flows.head()

Unnamed: 0,station_origin,station_destination,flows,population,jobs,distance,log_dest_jobs,OriginConstrainedExp,jobs_scenario_a
0,Abbey Road,Bank and Monument,0,599,78549,8131.525097,11.271478,78.0,78549
1,Abbey Road,Beckton,1,599,442,8510.121774,6.091312,2.0,442
2,Abbey Road,Blackwall,3,599,665,3775.448872,6.499789,4.0,665
3,Abbey Road,Canary Wharf,1,599,58772,5086.51422,10.981421,99.0,29386
4,Abbey Road,Canning Town,37,599,15428,2228.923167,9.643939,56.0,15428


In [31]:
#Compute new flows using 'raw' equation

#Set parameters 
params = sim.params
alpha_i = params[:2]
gamma = params[-2]
beta = -params[-1]

coefs = pd.DataFrame(sim.params)
coefs.reset_index(inplace=True)
coefs.rename(columns = {0:"alpha_i", "index":"coef"}, inplace = True)

#GR Edit - Strip coef column content correctly
coefs["coef"] = coefs["coef"].str.lstrip('station_origin[').str.rstrip(']')

flows = flows.merge(coefs, left_on="station_origin", right_on="coef", how = "left")
flows.drop(columns = ["coef"], inplace = True)

#Calculate new flows with change @ Canary Wharf
flows["flows_scenario_a"] = np.exp(flows["alpha_i"]+gamma*flows["log_dest_jobs_scenario_a"] - beta*flows["distance"])

flows.head()

  gamma = params[-2]
  beta = -params[-1]


Unnamed: 0,station_origin,station_destination,flows,population,jobs,distance,log_dest_jobs,OriginConstrainedExp,jobs_scenario_a,log_dest_jobs_scenario_a,FlowsDefault,alpha_i_x,flows_scenario_a,alpha_i_y,alpha_i
0,Abbey Road,Bank and Monument,0,599,78549,8131.525097,11.271478,78.0,78549,11.271478,78.0,-2.881022,77.979903,-2.881022,-2.881022
1,Abbey Road,Beckton,1,599,442,8510.121774,6.091312,2.0,442,6.091312,2.0,-2.881022,1.506097,-2.881022,-2.881022
2,Abbey Road,Blackwall,3,599,665,3775.448872,6.499789,4.0,665,6.499789,4.0,-2.881022,4.180041,-2.881022,-2.881022
3,Abbey Road,Canary Wharf,1,599,58772,5086.51422,10.981421,99.0,29386,10.288274,99.0,-2.881022,58.991613,-2.881022,-2.881022
4,Abbey Road,Canning Town,37,599,15428,2228.923167,9.643939,56.0,15428,9.643939,56.0,-2.881022,55.954205,-2.881022,-2.881022
