In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import geopandas as gpd
import seaborn as sns

import statsmodels.api as sm
import statsmodels.formula.api as smf
import scipy.stats
import numpy as np
from math import sqrt

In [2]:
cdata = pd.read_csv("london_flows.csv")
cdatasub = cdata.copy()

## scenario A

In [64]:
#take the variables and produce logarithms of them
x_variables = ["population", "jobs", "distance"]
log_x_vars = []
for x in x_variables:
    cdatasub[f"log_{x}"] = np.log(cdata[x])
    log_x_vars.append(f"log_{x}")

In [65]:
#create the formula (the "-1" indicates no intercept in the regression model).
formula = 'flows ~ station_origin + log_jobs + log_distance-1'
#run a production constrained sim
prodSim = smf.glm(formula = formula, data=cdatasub, family=sm.families.Poisson()).fit()
#let's have a look at it's summary
#print(prodSim.summary())

In [66]:

#We can do this by pulling out the parameter values
coefs = pd.DataFrame(prodSim.params)
coefs.reset_index(inplace=True)
coefs.rename(columns = {0:"alpha_i", "index":"coef"}, inplace = True)
to_repl = ["(station_origin)", "\[", "\]"]
for x in to_repl:
    coefs["coef"] = coefs["coef"].str.replace(x, "")
#then once you have done this you can join them back into the dataframes
cdatasub = cdatasub.merge(coefs, left_on="station_origin", right_on="coef", how = "left")
cdatasub.drop(columns = ["coef"], inplace = True)
#check this has worked
cdatasub.head()

  coefs["coef"] = coefs["coef"].str.replace(x, "")


Unnamed: 0,station_origin,station_destination,flows,population,jobs,distance,log_population,log_jobs,log_distance,alpha_i
0,Abbey Road,Bank and Monument,1,599,78549,8131.525097,6.395262,11.271478,9.003504,3.257207
1,Abbey Road,Beckton,1,599,442,8510.121774,6.395262,6.09131,9.049012,3.257207
2,Abbey Road,Blackwall,3,599,665,3775.448872,6.395262,6.499787,8.236275,3.257207
3,Abbey Road,Canary Wharf,1,599,58772,5086.51422,6.395262,10.981421,8.534348,3.257207
4,Abbey Road,Canning Town,37,599,15428,2228.923167,6.395262,9.643939,7.709274,3.257207


In [87]:

gamma = prodSim.params[399]
beta = -prodSim.params[400]
print(beta)

0.8569660931816073


In [68]:
O_i = pd.DataFrame(cdatasub.groupby(["station_origin"])["flows"].agg(np.sum))
O_i.rename(columns={"flows":"O_i"}, inplace = True)
cdatasub = cdatasub.merge(O_i, on = "station_origin", how = "left" )
cdatasub.head()

Unnamed: 0,station_origin,station_destination,flows,population,jobs,distance,log_population,log_jobs,log_distance,alpha_i,O_i
0,Abbey Road,Bank and Monument,1,599,78549,8131.525097,6.395262,11.271478,9.003504,3.257207,603
1,Abbey Road,Beckton,1,599,442,8510.121774,6.395262,6.09131,9.049012,3.257207,603
2,Abbey Road,Blackwall,3,599,665,3775.448872,6.395262,6.499787,8.236275,3.257207,603
3,Abbey Road,Canary Wharf,1,599,58772,5086.51422,6.395262,10.981421,8.534348,3.257207,603
4,Abbey Road,Canning Town,37,599,15428,2228.923167,6.395262,9.643939,7.709274,3.257207,603


In [69]:
cdatasub["prodsimest1"] = np.exp(cdatasub["alpha_i"]+gamma*cdatasub["log_jobs"] 
                                 - beta*cdatasub["log_distance"])
cdatasub["prodsimest1"] = round(cdatasub["prodsimest1"])
#or you could do it the easy way like we did last week with the fitted column (See previous practical)
cdatasub.head(10)

Unnamed: 0,station_origin,station_destination,flows,population,jobs,distance,log_population,log_jobs,log_distance,alpha_i,O_i,prodsimest1
0,Abbey Road,Bank and Monument,1,599,78549,8131.525097,6.395262,11.271478,9.003504,3.257207,603,56.0
1,Abbey Road,Beckton,1,599,442,8510.121774,6.395262,6.09131,9.049012,3.257207,603,1.0
2,Abbey Road,Blackwall,3,599,665,3775.448872,6.395262,6.499787,8.236275,3.257207,603,3.0
3,Abbey Road,Canary Wharf,1,599,58772,5086.51422,6.395262,10.981421,8.534348,3.257207,603,67.0
4,Abbey Road,Canning Town,37,599,15428,2228.923167,6.395262,9.643939,7.709274,3.257207,603,50.0
5,Abbey Road,Crossharbour,1,599,1208,6686.47556,6.395262,7.096721,8.807842,3.257207,603,3.0
6,Abbey Road,Custom House,1,599,845,3824.85563,6.395262,6.739337,8.249276,3.257207,603,4.0
7,Abbey Road,Cutty Sark,2,599,1748,8503.898909,6.395262,7.466228,9.04828,3.257207,603,3.0
8,Abbey Road,Cyprus,7,599,850,6532.099618,6.395262,6.745236,8.784484,3.257207,603,2.0
9,Abbey Road,Devons Road,1,599,611,3958.324171,6.395262,6.415097,8.283576,3.257207,603,3.0


In [74]:
cdatasubmat0 = cdatasub.pivot_table(values ="prodsimest1", index="station_origin", columns = "station_destination",
                            aggfunc=np.sum, margins=True)
cdatasubmat0

station_origin
Abbey Road             67.0
Acton Central           NaN
Acton Town             73.0
Aldgate                59.0
Aldgate East           82.0
                     ...   
Woodford              180.0
Woodgrange Park         NaN
Woodside Park          76.0
Woolwich Arsenal     1404.0
All                 49696.0
Name: Canary Wharf, Length: 400, dtype: float64

In [75]:

def new_job(row):
    if row["station_destination"] == "Canary Wharf":
        val = 25000
    else:
        val = row["jobs"]
    return val
        
cdatasub["Dj3_destjobScenario"] = cdatasub.apply(new_job, axis =1)
cdatasub.head(10)

Unnamed: 0,station_origin,station_destination,flows,population,jobs,distance,log_population,log_jobs,log_distance,alpha_i,O_i,prodsimest1,Dj3_destjobScenario
0,Abbey Road,Bank and Monument,1,599,78549,8131.525097,6.395262,11.271478,9.003504,3.257207,603,56.0,78549
1,Abbey Road,Beckton,1,599,442,8510.121774,6.395262,6.09131,9.049012,3.257207,603,1.0,442
2,Abbey Road,Blackwall,3,599,665,3775.448872,6.395262,6.499787,8.236275,3.257207,603,3.0,665
3,Abbey Road,Canary Wharf,1,599,58772,5086.51422,6.395262,10.981421,8.534348,3.257207,603,67.0,25000
4,Abbey Road,Canning Town,37,599,15428,2228.923167,6.395262,9.643939,7.709274,3.257207,603,50.0,15428
5,Abbey Road,Crossharbour,1,599,1208,6686.47556,6.395262,7.096721,8.807842,3.257207,603,3.0,1208
6,Abbey Road,Custom House,1,599,845,3824.85563,6.395262,6.739337,8.249276,3.257207,603,4.0,845
7,Abbey Road,Cutty Sark,2,599,1748,8503.898909,6.395262,7.466228,9.04828,3.257207,603,3.0,1748
8,Abbey Road,Cyprus,7,599,850,6532.099618,6.395262,6.745236,8.784484,3.257207,603,2.0,850
9,Abbey Road,Devons Road,1,599,611,3958.324171,6.395262,6.415097,8.283576,3.257207,603,3.0,611


In [76]:
cdatasub["prodsimest2"] = np.exp(cdatasub["alpha_i"]+gamma*np.log(cdatasub["Dj3_destjobScenario"]) - beta*cdatasub["log_distance"])

cdatasub["prodsimest2"] = round(cdatasub["prodsimest2"],0)
#now we can convert the pivot table into a matrix
cdatasubmat4 = cdatasub.pivot_table(values ="prodsimest2", index="station_origin", columns = "station_destination",
                            aggfunc=np.sum, margins=True)
cdatasubmat4

station_destination,Abbey Road,Acton Central,Acton Town,Aldgate,Aldgate East,All Saints,Alperton,Amersham,Anerley,Angel,...,Wimbledon,Wimbledon Park,Wood Green,Wood Lane,Wood Street,Woodford,Woodgrange Park,Woodside Park,Woolwich Arsenal,All
station_origin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Abbey Road,,,,,,,,,,,...,,,,,,,,,6.0,574.0
Acton Central,,,,,,,,,,,...,,,,,,,1.0,,,1247.0
Acton Town,,,,19.0,19.0,,9.0,1.0,,20.0,...,17.0,3.0,5.0,13.0,,2.0,,2.0,,3768.0
Aldgate,,,2.0,,48.0,,,0.0,,23.0,...,4.0,,3.0,2.0,,1.0,,1.0,,2967.0
Aldgate East,,,2.0,54.0,,,1.0,0.0,,24.0,...,5.0,1.0,3.0,3.0,,1.0,,1.0,,3219.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Woodford,,,8.0,35.0,39.0,,,,,33.0,...,16.0,,10.0,,,,,,,4807.0
Woodgrange Park,,4.0,,,,,,,,,...,,,,,,,,,,541.0
Woodside Park,,,6.0,21.0,21.0,,2.0,,,25.0,...,12.0,,7.0,,,,,,,3084.0
Woolwich Arsenal,31.0,,,,,35.0,,,,,...,,,,,,,,,,7232.0


In [77]:
#calculate some new Dj^gamma and d_ij^beta values
Dj2_gamma = cdatasub["jobs"]**gamma
dist_beta = cdatasub["distance"]**-beta
#calcualte the first stage of the Ai values
cdatasub["Ai1"] = Dj2_gamma * dist_beta
#now do the sum over all js bit
A_i = pd.DataFrame(cdatasub.groupby(["station_origin"])["Ai1"].agg(np.sum))
#now divide into 1
A_i["Ai1"] = 1/A_i["Ai1"]
A_i.rename(columns={"Ai1":"A_i"}, inplace=True)
#and write the A_i values back into the dataframe
cdatasub = cdatasub.merge(A_i, left_on="station_origin", right_index=True, how="left")

In [78]:
#to check everything works, recreate the original estimates
cdatasub["prodsimest3"] = cdatasub["A_i"]*cdatasub["O_i"]*Dj2_gamma*dist_beta
#round
cdatasub["prodsimest3"] = round(cdatasub["prodsimest3"])
#check
cdatasub[["prodsimest1", "prodsimest3"]]

Unnamed: 0,prodsimest1,prodsimest3
0,56.0,56.0
1,1.0,1.0
2,3.0,3.0
3,67.0,67.0
4,50.0,50.0
...,...,...
61451,125.0,125.0
61452,263.0,263.0
61453,34.0,34.0
61454,102.0,102.0


In [79]:
#calculate some new Dj^gamma and d_ij^beta values
Dj3_gamma = cdatasub["Dj3_destjobScenario"]**gamma
#calcualte the first stage of the Ai values
cdatasub["Ai1"] = Dj3_gamma * dist_beta
#now do the sum over all js bit
A_i = pd.DataFrame(cdatasub.groupby(["station_origin"])["Ai1"].agg(np.sum))
#now divide into 1
A_i["Ai1"] = 1/A_i["Ai1"]
A_i.rename(columns={"Ai1":"A_i2"}, inplace=True)
#and write the A_i values back into the dataframe
cdatasub = cdatasub.merge(A_i, left_on="station_origin", right_index=True, how="left")

In [80]:
#to check everything works, recreate the original estimates
cdatasub["prodsimest4"] = cdatasub["A_i2"]*cdatasub["O_i"]*Dj3_gamma*dist_beta
#round
cdatasub["prodsimest4"] = round(cdatasub["prodsimest4"])

In [81]:
cdatasubmat5 = cdatasub.pivot_table(values ="prodsimest4", index="station_origin", columns = "station_destination",
                            aggfunc=np.sum, margins=True)

cdatasubmat5

station_destination,Abbey Road,Acton Central,Acton Town,Aldgate,Aldgate East,All Saints,Alperton,Amersham,Anerley,Angel,...,Wimbledon,Wimbledon Park,Wood Green,Wood Lane,Wood Street,Woodford,Woodgrange Park,Woodside Park,Woolwich Arsenal,All
station_origin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Abbey Road,,,,,,,,,,,...,,,,,,,,,6.0,600.0
Acton Central,,,,,,,,,,,...,,,,,,,1.0,,,1247.0
Acton Town,,,,19.0,19.0,,9.0,1.0,,21.0,...,17.0,3.0,5.0,13.0,,2.0,,2.0,,3795.0
Aldgate,,,2.0,,49.0,,,0.0,,23.0,...,4.0,,3.0,2.0,,1.0,,1.0,,2994.0
Aldgate East,,,3.0,54.0,,,1.0,0.0,,24.0,...,5.0,1.0,3.0,3.0,,1.0,,1.0,,3265.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Woodford,,,8.0,36.0,40.0,,,,,33.0,...,16.0,,10.0,,,,,,,4897.0
Woodgrange Park,,4.0,,,,,,,,,...,,,,,,,,,,541.0
Woodside Park,,,6.0,21.0,21.0,,2.0,,,25.0,...,12.0,,7.0,,,,,,,3119.0
Woolwich Arsenal,34.0,,,,,38.0,,,,,...,,,,,,,,,,7895.0


In [86]:
cdatasubmat0

station_destination,Abbey Road,Acton Central,Acton Town,Aldgate,Aldgate East,All Saints,Alperton,Amersham,Anerley,Angel,...,Wimbledon,Wimbledon Park,Wood Green,Wood Lane,Wood Street,Woodford,Woodgrange Park,Woodside Park,Woolwich Arsenal,All
station_origin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Abbey Road,,,,,,,,,,,...,,,,,,,,,6.0,606.0
Acton Central,,,,,,,,,,,...,,,,,,,1.0,,,1247.0
Acton Town,,,,19.0,19.0,,9.0,1.0,,20.0,...,17.0,3.0,5.0,13.0,,2.0,,2.0,,3803.0
Aldgate,,,2.0,,48.0,,,0.0,,23.0,...,4.0,,3.0,2.0,,1.0,,1.0,,2995.0
Aldgate East,,,2.0,54.0,,,1.0,0.0,,24.0,...,5.0,1.0,3.0,3.0,,1.0,,1.0,,3258.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Woodford,,,8.0,35.0,39.0,,,,,33.0,...,16.0,,10.0,,,,,,,4892.0
Woodgrange Park,,4.0,,,,,,,,,...,,,,,,,,,,541.0
Woodside Park,,,6.0,21.0,21.0,,2.0,,,25.0,...,12.0,,7.0,,,,,,,3120.0
Woolwich Arsenal,31.0,,,,,35.0,,,,,...,,,,,,,,,,7898.0


In [62]:
cdatasubmat5['Canary Wharf']

station_origin
Abbey Road             37.0
Acton Central           NaN
Acton Town             39.0
Aldgate                31.0
Aldgate East           44.0
                     ...   
Woodford               96.0
Woodgrange Park         NaN
Woodside Park          41.0
Woolwich Arsenal      806.0
All                 27752.0
Name: Canary Wharf, Length: 400, dtype: float64

In [82]:
newdata1=pd.DataFrame(cdatasubmat5['Canary Wharf'].values,index=cdatasubmat5.index,columns=[['flows']])

In [84]:
#newdata1.to_csv('cw1.csv')

In [85]:
newdata0=pd.DataFrame(cdatasubmat0['Canary Wharf'].values,index=cdatasubmat5.index,columns=[['flows']])
newdata0.to_csv('cw0.csv')

# Scenario B

In [3]:
cdatasub1 = cdata.copy()
#create some Oi and Dj columns in the dataframe and store row and column totals in them:
#to create O_i, take cdatasub ...then... group by origcodenew ...then... summarise by calculating the sum of Total
O_i = pd.DataFrame(cdatasub1.groupby(["station_origin"])["flows"].agg(np.sum))
O_i.rename(columns={"flows":"O_i"}, inplace = True)
cdatasub1 = cdatasub1.merge(O_i, on = "station_origin", how = "left" )

D_j = pd.DataFrame(cdatasub1.groupby(["station_destination"])["flows"].agg(np.sum))
D_j.rename(columns={"flows":"D_j"}, inplace = True)
cdatasub1 = cdatasub1.merge(D_j, on = "station_destination", how = "left" )

In [4]:
# Run a doubly constrained SIM with a negative exponential cost function.
doubsim_form = "flows ~ station_origin + station_destination + distance -1"
doubsim1 = smf.glm(formula=doubsim_form, data = cdatasub1, family = sm.families.Poisson()).fit()
#print(doubsim1.summary())

In [5]:

# Here is the entropy maximising approach for a known beta.
# Plug in the required values in this function to solve.

def balance_doubly_constrained(pd, orig_field, dest_field, Oi_field, Dj_field, cij_field, beta, 
                               cost_function, Ai_name = "Ai_new", Bj_name = "Bj_new", converge=0.001):
    # Define some variables
    Oi = pd[[orig_field, Oi_field]]
    Dj = pd[[dest_field,Dj_field]]    
    if cost_function.lower() in ['power','pow']:
        beta_cij = np.exp(beta * np.log(pd[cij_field]))
    elif cost_function.lower() in ['exponential','exp']:
        beta_cij = np.exp(beta * pd[cij_field])
    else:
        return "Cost function not specified properly, use 'exp' or 'pow'"
    
    # Create some helper variables
    cnvg = 1
    iteration = 0
    # Now iteratively rebalance the Ai and Bj terms until convergence
    while cnvg > converge:
        if iteration == 0:
            # This first condition sets starting values for Ai and Bj
            # NB sets starting value of Ai assuming Bj is a vector of 1s.
            # We've already established beta_cij with the appropriate cost function, so...
            Oi = Oi.assign(Ai = Dj[Dj_field] * beta_cij)
            # Aggregate Ai and take inverse
            Ai = 1.0/Oi.groupby(orig_field)['Ai'].sum().to_frame()
            # Merge new Ais 
            Oi = Oi.merge(Ai,left_on = orig_field, right_index = True, suffixes = ('','_old'))
            # Drop the temporary Ai field we created, leaving Ai_old
            Oi.drop('Ai', axis=1, inplace=True)
            
            # Now set up Bjs using starting values of Ai
            Dj = Dj.assign(Bj = Oi['Ai_old'] * Oi[Oi_field] * beta_cij)
            # Aggregate Bj and take inverse
            Bj = 1.0/Dj.groupby(dest_field)['Bj'].sum().to_frame()
            # Merge new Bjs
            Dj = Dj.merge(Bj,left_on = dest_field, right_index = True, suffixes = ('','_old'))
            # Drop the temporary Bj field we created, leaving Bj_old
            Dj.drop('Bj', axis=1, inplace=True)
            
            # Increment loop
            iteration += 1
        else:
            # This bit is the iterated bit of the loop which refines the values of Ai and Bj
            # First Ai
            Oi['Ai'] = Dj['Bj_old'] * Dj[Dj_field] * beta_cij
            # Aggregate Ai and take inverse
            Ai = 1.0/Oi.groupby(orig_field)['Ai'].sum().to_frame()
            # Drop temporary Ai
            Oi.drop('Ai', axis=1, inplace=True)
            # Merge new Ais 
            Oi = Oi.merge(Ai,left_on = orig_field, right_index = True)
            # Calculate the difference between old and new Ais
            Oi['diff'] = np.absolute((Oi['Ai_old'] - Oi['Ai'])/Oi['Ai_old'])
            # Set new Ais to Ai_old
            Oi['Ai_old'] = Oi['Ai']
            # Drop the temporary Ai field we created, leaving Ai_old
            Oi.drop('Ai', axis=1, inplace=True)
            
            # Then Bj
            Dj['Bj'] = Oi['Ai_old'] * Oi[Oi_field] * beta_cij
            # Aggregate Bj and take inverse
            Bj = 1.0/Dj.groupby(dest_field)['Bj'].sum().to_frame()
            # Drop temporary Bj
            Dj.drop('Bj', axis=1, inplace=True)
            # Merge new Bjs
            Dj = Dj.merge(Bj,left_on = dest_field, right_index = True)
            # Calculate the difference between old and new Bjs
            Dj['diff'] = np.absolute((Dj['Bj_old'] - Dj['Bj'])/Dj['Bj_old'])
            # Set new Bjs to Bj_old
            Dj['Bj_old'] = Dj['Bj']
            # Drop the temporary Bj field we created, leaving Bj_old
            Dj.drop('Bj', axis=1, inplace=True)
            
            # Assign higher sum difference from Ai or Bj to cnvg
            cnvg = np.maximum(Oi['diff'].sum(),Dj['diff'].sum())
            
            # Print and increment loop
            print("Iteration:", iteration)
            iteration += 1

    # When the while loop finishes add the computed Ai_old and Bj_old to the dataframe and return
    pd[Ai_name] = Oi['Ai_old']
    pd[Bj_name] = Dj['Bj_old']
    return pd

In [6]:
# Use the beta we got from the negative exponential model
beta = -doubsim1.params[-1]
#beta = -0.00014
# Get the balancing factors. NB Setting of new field names for Ai and Bj.
cdatasub1 = balance_doubly_constrained(cdatasub1,'station_origin','station_destination','O_i','D_j','distance',-beta,'exponential','Ai_exp','Bj_exp')

# Now predict the model again using the new Ai and Dj fields.
cdatasub1['SIM_est_exp'] = np.round(cdatasub1['O_i'] * cdatasub1['Ai_exp'] * cdatasub1['D_j'] * cdatasub1['Bj_exp'] * 
                                   np.exp(cdatasub1['distance']*-beta))
# Check out the matrix
cdatasubmat1 = pd.pivot_table(cdatasub1,values='SIM_est_exp',index ='station_origin',columns='station_destination',fill_value=0,aggfunc=sum,margins=True)
cdatasubmat1

Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Iteration: 5
Iteration: 6
Iteration: 7
Iteration: 8
Iteration: 9
Iteration: 10
Iteration: 11
Iteration: 12
Iteration: 13
Iteration: 14
Iteration: 15
Iteration: 16
Iteration: 17
Iteration: 18
Iteration: 19
Iteration: 20
Iteration: 21
Iteration: 22
Iteration: 23


station_destination,Abbey Road,Acton Central,Acton Town,Aldgate,Aldgate East,All Saints,Alperton,Amersham,Anerley,Angel,...,Wimbledon,Wimbledon Park,Wood Green,Wood Lane,Wood Street,Woodford,Woodgrange Park,Woodside Park,Woolwich Arsenal,All
station_origin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Abbey Road,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,31,603.0
Acton Central,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,1252.0
Acton Town,0,0,0,11,10,0,18,0,0,13,...,40,4,2,18,0,0,0,1,0,3792.0
Aldgate,0,0,2,0,32,0,0,0,0,24,...,8,0,3,2,0,1,0,1,0,2994.0
Aldgate East,0,0,2,37,0,0,0,0,0,25,...,8,1,4,2,0,1,0,1,0,3256.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Woodford,0,0,2,37,43,0,0,0,0,25,...,8,0,7,0,0,0,0,0,0,4894.0
Woodgrange Park,0,2,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,541.0
Woodside Park,0,0,2,17,16,0,1,0,0,25,...,10,0,5,0,0,0,0,0,0,3120.0
Woolwich Arsenal,29,0,0,0,0,30,0,0,0,0,...,0,0,0,0,0,0,0,0,0,7893.0


In [7]:
print(beta)

0.0001469789967357215


In [8]:
bm_d0 = cdatasubmat1['Bank and Monument']
bm_d0 

station_origin
Abbey Road             79
Acton Central           0
Acton Town            106
Aldgate               262
Aldgate East          272
                    ...  
Woodford              272
Woodgrange Park         0
Woodside Park         138
Woolwich Arsenal      879
All                 78552
Name: Bank and Monument, Length: 400, dtype: int64

In [9]:
bm_d0=pd.DataFrame(cdatasubmat1['Bank and Monument'].values,index=cdatasubmat1.index,columns=[['flows0']])
bm_d0.to_csv('bm1.csv')

In [10]:
wtl_o0=pd.DataFrame(cdatasubmat1['Waterloo'].values,index=cdatasubmat1.index,columns=[['flows0']])
wtl_o0.to_csv('wtl1.csv')

In [12]:
cw_d0=pd.DataFrame(cdatasubmat1['Canary Wharf'].values,index=cdatasubmat1.index,columns=[['flows0']])
cw_d0.to_csv('cwd1.csv')

In [14]:
# Use the beta we got from the negative exponential model
beta = -doubsim1.params[-1]*5
#beta = -0.00014
# Get the balancing factors. NB Setting of new field names for Ai and Bj.
cdatasub1 = balance_doubly_constrained(cdatasub1,'station_origin','station_destination','O_i','D_j','distance',-beta,'exponential','Ai_exp','Bj_exp')

# Now predict the model again using the new Ai and Dj fields.
cdatasub1['SIM_est_exp'] = np.round(cdatasub1['O_i'] * cdatasub1['Ai_exp'] * cdatasub1['D_j'] * cdatasub1['Bj_exp'] * 
                                   np.exp(cdatasub1['distance']*-beta))
# Check out the matrix
cdatasubmat6 = pd.pivot_table(cdatasub1,values='SIM_est_exp',index ='station_origin',columns='station_destination',fill_value=0,aggfunc=sum,margins=True)
cdatasubmat6

Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Iteration: 5
Iteration: 6
Iteration: 7
Iteration: 8
Iteration: 9
Iteration: 10
Iteration: 11
Iteration: 12
Iteration: 13
Iteration: 14
Iteration: 15
Iteration: 16
Iteration: 17
Iteration: 18
Iteration: 19
Iteration: 20
Iteration: 21
Iteration: 22
Iteration: 23
Iteration: 24
Iteration: 25
Iteration: 26
Iteration: 27
Iteration: 28
Iteration: 29
Iteration: 30
Iteration: 31
Iteration: 32
Iteration: 33
Iteration: 34
Iteration: 35
Iteration: 36
Iteration: 37
Iteration: 38
Iteration: 39
Iteration: 40
Iteration: 41
Iteration: 42
Iteration: 43
Iteration: 44
Iteration: 45
Iteration: 46
Iteration: 47
Iteration: 48
Iteration: 49
Iteration: 50
Iteration: 51
Iteration: 52
Iteration: 53
Iteration: 54
Iteration: 55
Iteration: 56
Iteration: 57
Iteration: 58
Iteration: 59
Iteration: 60
Iteration: 61
Iteration: 62
Iteration: 63
Iteration: 64
Iteration: 65
Iteration: 66
Iteration: 67
Iteration: 68
Iteration: 69
Iteration: 70
Iteration: 71
Iteration: 72
I

station_destination,Abbey Road,Acton Central,Acton Town,Aldgate,Aldgate East,All Saints,Alperton,Amersham,Anerley,Angel,...,Wimbledon,Wimbledon Park,Wood Green,Wood Lane,Wood Street,Woodford,Woodgrange Park,Woodside Park,Woolwich Arsenal,All
station_origin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Abbey Road,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,7,604.0
Acton Central,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1244.0
Acton Town,0,0,0,0,0,0,1,0,0,0,...,1,0,0,43,0,0,0,0,0,3797.0
Aldgate,0,0,0,0,64,0,0,0,0,20,...,0,0,0,0,0,0,0,0,0,2998.0
Aldgate East,0,0,0,114,0,0,0,0,0,20,...,0,0,0,0,0,0,0,0,0,3258.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Woodford,0,0,0,32,64,0,0,0,0,6,...,0,0,0,0,0,0,0,0,0,4885.0
Woodgrange Park,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,541.0
Woodside Park,0,0,0,3,2,0,0,0,0,24,...,0,0,0,0,0,0,0,0,0,3117.0
Woolwich Arsenal,4,0,0,0,0,12,0,0,0,0,...,0,0,0,0,0,0,0,0,0,7893.0


In [113]:
bm_d5=pd.DataFrame(cdatasubmat6['Bank and Monument'].values,index=cdatasubmat6.index,columns=[['flows5']])
bm_d5.to_csv('bm5.csv')

In [120]:
wtl_o5=pd.DataFrame(cdatasubmat6['Waterloo'].values,index=cdatasubmat6.index,columns=[['flows5']])
wtl_o5.to_csv('wtl5.csv')

In [15]:
wtl_o5=pd.DataFrame(cdatasubmat6['Canary Wharf'].values,index=cdatasubmat6.index,columns=[['flows5']])
wtl_o5.to_csv('cw5.csv')

In [16]:
# Use the beta we got from the negative exponential model
beta = -doubsim1.params[-1]*10
#beta = -0.00014
# Get the balancing factors. NB Setting of new field names for Ai and Bj.
cdatasub1 = balance_doubly_constrained(cdatasub1,'station_origin','station_destination','O_i','D_j','distance',-beta,'exponential','Ai_exp','Bj_exp')

# Now predict the model again using the new Ai and Dj fields.
cdatasub1['SIM_est_exp'] = np.round(cdatasub1['O_i'] * cdatasub1['Ai_exp'] * cdatasub1['D_j'] * cdatasub1['Bj_exp'] * 
                                   np.exp(cdatasub1['distance']*-beta))
# Check out the matrix
cdatasubmat10 = pd.pivot_table(cdatasub1,values='SIM_est_exp',index ='station_origin',columns='station_destination',fill_value=0,aggfunc=sum,margins=True)
cdatasubmat10

Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Iteration: 5
Iteration: 6
Iteration: 7
Iteration: 8
Iteration: 9
Iteration: 10
Iteration: 11
Iteration: 12
Iteration: 13
Iteration: 14
Iteration: 15
Iteration: 16
Iteration: 17
Iteration: 18
Iteration: 19
Iteration: 20
Iteration: 21
Iteration: 22
Iteration: 23
Iteration: 24
Iteration: 25
Iteration: 26
Iteration: 27
Iteration: 28
Iteration: 29
Iteration: 30
Iteration: 31
Iteration: 32
Iteration: 33
Iteration: 34
Iteration: 35
Iteration: 36
Iteration: 37
Iteration: 38
Iteration: 39
Iteration: 40
Iteration: 41
Iteration: 42
Iteration: 43
Iteration: 44
Iteration: 45
Iteration: 46
Iteration: 47
Iteration: 48
Iteration: 49
Iteration: 50
Iteration: 51
Iteration: 52
Iteration: 53
Iteration: 54
Iteration: 55
Iteration: 56
Iteration: 57
Iteration: 58
Iteration: 59
Iteration: 60
Iteration: 61
Iteration: 62
Iteration: 63
Iteration: 64
Iteration: 65
Iteration: 66
Iteration: 67
Iteration: 68
Iteration: 69
Iteration: 70
Iteration: 71
Iteration: 72
I

station_destination,Abbey Road,Acton Central,Acton Town,Aldgate,Aldgate East,All Saints,Alperton,Amersham,Anerley,Angel,...,Wimbledon,Wimbledon Park,Wood Green,Wood Lane,Wood Street,Woodford,Woodgrange Park,Woodside Park,Woolwich Arsenal,All
station_origin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Abbey Road,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,600.0
Acton Central,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1247.0
Acton Town,0,0,0,0,0,0,0,0,0,0,...,0,0,0,30,0,0,0,0,0,3791.0
Aldgate,0,0,0,0,32,0,0,0,0,4,...,0,0,0,0,0,0,0,0,0,2998.0
Aldgate East,0,0,0,154,0,0,0,0,0,4,...,0,0,0,0,0,0,0,0,0,3258.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Woodford,0,0,0,30,76,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,4896.0
Woodgrange Park,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,545.0
Woodside Park,0,0,0,0,0,0,0,0,0,19,...,0,0,0,0,0,0,0,0,0,3121.0
Woolwich Arsenal,0,0,0,0,0,5,0,0,0,0,...,0,0,0,0,0,0,0,0,0,7893.0


In [121]:
bm_d10=pd.DataFrame(cdatasubmat10['Bank and Monument'].values,index=cdatasubmat10.index,columns=[['flows10']])
bm_d10.to_csv('bm10.csv')

In [122]:
wtl_o10=pd.DataFrame(cdatasubmat10['Waterloo'].values,index=cdatasubmat10.index,columns=[['flows10']])
wtl_o10.to_csv('wtl10.csv')

In [17]:
wtl_o10=pd.DataFrame(cdatasubmat10['Canary Wharf'].values,index=cdatasubmat10.index,columns=[['flows10']])
wtl_o10.to_csv('cw10.csv')

In [18]:
# Use the beta we got from the negative exponential model
beta = -doubsim1.params[-1]*2
#beta = -0.00014
# Get the balancing factors. NB Setting of new field names for Ai and Bj.
cdatasub1 = balance_doubly_constrained(cdatasub1,'station_origin','station_destination','O_i','D_j','distance',-beta,'exponential','Ai_exp','Bj_exp')

# Now predict the model again using the new Ai and Dj fields.
cdatasub1['SIM_est_exp'] = np.round(cdatasub1['O_i'] * cdatasub1['Ai_exp'] * cdatasub1['D_j'] * cdatasub1['Bj_exp'] * 
                                   np.exp(cdatasub1['distance']*-beta))
# Check out the matrix
cdatasubmat2 = pd.pivot_table(cdatasub1,values='SIM_est_exp',index ='station_origin',columns='station_destination',fill_value=0,aggfunc=sum,margins=True)
cdatasubmat2

Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Iteration: 5
Iteration: 6
Iteration: 7
Iteration: 8
Iteration: 9
Iteration: 10
Iteration: 11
Iteration: 12
Iteration: 13
Iteration: 14
Iteration: 15
Iteration: 16
Iteration: 17
Iteration: 18
Iteration: 19
Iteration: 20
Iteration: 21
Iteration: 22
Iteration: 23
Iteration: 24
Iteration: 25
Iteration: 26
Iteration: 27
Iteration: 28
Iteration: 29
Iteration: 30
Iteration: 31
Iteration: 32
Iteration: 33
Iteration: 34
Iteration: 35
Iteration: 36
Iteration: 37
Iteration: 38
Iteration: 39
Iteration: 40
Iteration: 41
Iteration: 42
Iteration: 43
Iteration: 44
Iteration: 45


station_destination,Abbey Road,Acton Central,Acton Town,Aldgate,Aldgate East,All Saints,Alperton,Amersham,Anerley,Angel,...,Wimbledon,Wimbledon Park,Wood Green,Wood Lane,Wood Street,Woodford,Woodgrange Park,Woodside Park,Woolwich Arsenal,All
station_origin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Abbey Road,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,23,604.0
Acton Central,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1247.0
Acton Town,0,0,0,4,3,0,16,0,0,5,...,30,2,0,32,0,0,0,0,0,3791.0
Aldgate,0,0,0,0,48,0,0,0,0,28,...,2,0,1,1,0,0,0,0,0,2990.0
Aldgate East,0,0,0,59,0,0,0,0,0,28,...,2,0,1,1,0,0,0,0,0,3251.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Woodford,0,0,0,36,50,0,0,0,0,18,...,1,0,2,0,0,0,0,0,0,4886.0
Woodgrange Park,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,544.0
Woodside Park,0,0,0,10,9,0,0,0,0,24,...,2,0,1,0,0,0,0,0,0,3121.0
Woolwich Arsenal,20,0,0,0,0,22,0,0,0,0,...,0,0,0,0,0,0,0,0,0,7893.0


In [123]:
bm_d2=pd.DataFrame(cdatasubmat2['Bank and Monument'].values,index=cdatasubmat2.index,columns=[['flows2']])
bm_d2.to_csv('bm2.csv')
wtl_o2=pd.DataFrame(cdatasubmat2['Waterloo'].values,index=cdatasubmat2.index,columns=[['flows2']])
wtl_o2.to_csv('wtl2.csv')

In [19]:
wtl_o2=pd.DataFrame(cdatasubmat2['Canary Wharf'].values,index=cdatasubmat2.index,columns=[['flows2']])
wtl_o2.to_csv('cw2.csv')