In [15]:
import numpy as np
import pandas as pd
import os
import geopandas as gpd
from scipy import stats
import scipy.optimize

import powerlaw
import pickle5 as pickle
import seaborn as sns

In [16]:
def load_obj(name):
    with open('resources/' + name + '.pkl', 'rb') as f:
        return pickle.load(f)


In [10]:
pclookup = pd.read_csv("resources/PCD_OA_LSOA_MSOA_LAD_FEB20_UK_LU.csv", encoding = "ISO-8859-1", low_memory=False)
pclookup = pclookup[['lsoa11cd','msoa11cd']].copy().drop_duplicates()

comp_data = pd.read_csv("resources/newdata_companyhouse.csv")

def convert_to_msoa(data):

    data_df = pd.DataFrame(data)

    data_df['lsoa11cd'] = comp_data['lsoa11cd']
    data_row = pclookup.merge(data_df, left_on='lsoa11cd', right_on='lsoa11cd', how='right')
    data_row = data_row.groupby('msoa11cd', as_index=False).sum()
    data_trans = data_row.drop('msoa11cd', axis=1).T

    data_trans['lsoa11cd'] = comp_data['lsoa11cd']
    data_col = pclookup.merge(data_trans, left_on='lsoa11cd', right_on='lsoa11cd', how='right')
    data_col = data_col.groupby('msoa11cd', as_index=False).sum()
    data_col = data_col.drop('msoa11cd',axis=1).T

    data_msoa = data_col.to_numpy()
    data_msoa[np.where(np.isinf(data_msoa))[0], np.where(np.isinf(data_msoa))[1]] = 0


    return data_msoa

# B matrices 

In [12]:
commute = pd.read_csv("resources/SCR_Commute_msoa_to_msoa.csv")
B_com = (
        commute
        .pivot_table(index="O_Code", columns="D_Code")#, values="Commuters", aggfunc=len)
        .fillna(0)
        .astype(int)
    )
B_com = B_com.to_numpy()
B_com[np.diag_indices_from(B_com)] = 0

B_bf = pd.read_csv('lsoa2lsoa_zerosadded.csv').drop('lsoa11cd',axis=1)
B_bf = B_bf.to_numpy()
B_bf = convert_to_msoa(B_bf)

# Adjacency matrices 

In [22]:
def attractivity_median_sampler(oa, edu_ratios, income_params, size):
    """
    Parameters
    ----------
    oa : Integer of oa

    Returns
    -------
    attractivity

    """
    edu = np.random.choice(4, size = size, p=edu_ratios[oa]) #where p values are effectively the ratio of people with a given education level
    income = stats.beta.rvs(income_params[oa, 0], income_params[oa, 1], loc = income_params[oa, 2], scale = income_params[oa, 3], size=size)

    attractivity = np.power(income, -edu)

    return np.median(attractivity)


def median_attractivity(edu_ratios, income_params): #,fit = None):

    """
    Average individual attractivity / lsoa (taken as a sample of 1000 ppl)
    Sample is directinal - matrix not symmetrical
    """

    attractivity = np.zeros((len(income_params)))
    size = 10000

    for i in range(len(income_params)):
        attractivity[i] = attractivity_median_sampler(i, edu_ratios, income_params, size)

    attractivity = attractivity.reshape((len(attractivity),1))

    return attractivity

def bus_adjacency(stoproute,lsoa_list,route_freqs):
    # Create matrix that combines location data and route frequencies
    combine = pd.merge(stoproute, route_freqs, on='line')
    combine = combine.drop_duplicates(['line', 'naptan_sto'])
    combine = combine.rename(columns={'geo_code':'lsoa11cd'})

    # Create adjacency matrix LSOA x route
    bstopfreq = combine[['lsoa11cd', 'naptan_sto', 'line', 'average']]
    adj = pd.pivot(bstopfreq,index=["lsoa11cd", "naptan_sto"], columns="line", values="average").fillna(0)
    adj = adj.astype(float)
    adj = adj.groupby(level="lsoa11cd").mean()
    bus2route = pd.merge(lsoa_list, adj, how='left',on='lsoa11cd').set_index('lsoa11cd')

    #Adjacency matrix LSOA x LSOA
    bus2route = np.array(bus2route)
    bus2routeT = bus2route.transpose()
    lsoa2lsoa = np.dot(bus2route,bus2routeT)**0.5 #check that this actually does whay I think it does
    lsoa2lsoa[np.diag_indices_from(lsoa2lsoa)] = 0

    lsoa2lsoa = pd.DataFrame(lsoa2lsoa)
    lsoa2lsoa = lsoa2lsoa.fillna(0)

    #m values created
    m_bus = np.round(lsoa2lsoa.copy(),0)
    m_bus[m_bus>0]=np.log10(m_bus[m_bus>0])
    m_bus=1-(m_bus/np.max(np.max(m_bus)))
    m_bus[m_bus==0]=np.min(np.min(m_bus[m_bus!=0]))
    
    return m_bus.values

## Choose m

In [25]:
# car input network
m_paths_car = np.ones(np.shape(np.load("resources/newdata_m_paths_bus.npy")))

# bus freq input network
stoproute = pd.read_csv('resources/stoproute_withareacodes.csv')
lsoa_list = pd.read_csv("resources/E47000002_KS101EW.csv")['lsoa11cd']
route_freqs = pd.read_csv('resources/Bus_routes_frequency.csv', usecols= ["line","average"]).astype(str)
m_paths_bus = bus_adjacency(stoproute, lsoa_list, route_freqs)

  return reduction(axis=axis, out=out, **passkwargs)
  return reduction(axis=axis, out=out, **passkwargs)


In [26]:
lsoa_data = load_obj("newdata_lsoa_data")
income_params, edu_counts, edu_ratios = lsoa_data['income_params'], lsoa_data['edu_counts'], lsoa_data['edu_ratios']
comp_ratio = np.load("resources/newdata_companyhouse.npy")
paths_matrix = load_obj("newdata_ave_paths")

# avg attractivity
attractivity_avg = median_attractivity(edu_ratios, income_params)# 1)  ## no alpha and xmin returned

#population amplification
pop = np.asarray(edu_counts).reshape((len(edu_counts), 1))
pop = np.matmul(pop, pop.transpose())

#connectivity matrix
attractivity_product = np.matmul(attractivity_avg, attractivity_avg.transpose())
attractivity_product = np.multiply(attractivity_product, comp_ratio)

#ensure 0 on diagonal?
connectivity_bus = np.divide(attractivity_product, np.power(paths_matrix, m_paths_bus))
connectivity_bus[np.where(np.isinf(connectivity_bus))[0], np.where(np.isinf(connectivity_bus))[1]] = 0
connectivity_bus[np.diag_indices_from(connectivity_bus)] = 0

connectivity_car = np.divide(attractivity_product, np.power(paths_matrix, m_paths_car))
connectivity_car[np.where(np.isinf(connectivity_car))[0], np.where(np.isinf(connectivity_car))[1]] = 0
connectivity_car[np.diag_indices_from(connectivity_car)] = 0

  connectivity_bus = np.divide(attractivity_product, np.power(paths_matrix, m_paths_bus))
  connectivity_bus = np.divide(attractivity_product, np.power(paths_matrix, m_paths_bus))
  connectivity_car = np.divide(attractivity_product, np.power(paths_matrix, m_paths_car))
  connectivity_car = np.divide(attractivity_product, np.power(paths_matrix, m_paths_car))


In [29]:
theta_bus_com = 1.96
theta_car_com = 0.21
theta_bus_bf = 1.91
theta_car_bf = 0.131

In [31]:
def frob_prod(theta, connectivity, pop, B): #, low_bound, high_bound, step):
#     prod_max = 0
#     prod_F = 0

    #adjacency matrix
    adjacency = np.zeros_like(connectivity)   
    adjacency[np.where(connectivity>theta)] = 1

    #assuming population amplificator is defined in main code
    adjacency = np.multiply(adjacency,pop)

    #convert data to msoa2msoa - assume lsoas and msoas already sorted
    adjacency_msoa = convert_to_msoa(adjacency)

    #normalising the data/row
    A = adjacency_msoa/adjacency_msoa.sum(axis=1)[:,None]
    A[np.isnan(A)] = 0
    B_norm = B/B.sum(axis=1)[:,None]
    B_norm[np.isnan(B_norm)] = 0

    #Frobenius product
    prod_F = np.sum(np.multiply(A, B_norm))
        
    return prod_F

# Frobenius product output for each theta

In [32]:
F_prod_bus_comm = frob_prod(theta_bus_com, connectivity_bus, pop, B_com)
F_prod_car_comm = frob_prod(theta_car_com, connectivity_car, pop, B_com)
F_prod_bus_bf = frob_prod(theta_bus_bf, connectivity_bus, pop, B_bf)
F_prod_car_bf = frob_prod(theta_car_bf, connectivity_car, pop, B_bf)

F_prod_bus_comm, F_prod_car_comm, F_prod_bus_bf, F_prod_car_bf

  A = adjacency_msoa/adjacency_msoa.sum(axis=1)[:,None]
  A = adjacency_msoa/adjacency_msoa.sum(axis=1)[:,None]
  A = adjacency_msoa/adjacency_msoa.sum(axis=1)[:,None]
  A = adjacency_msoa/adjacency_msoa.sum(axis=1)[:,None]


(6.0776451156038895, 4.1121853756840725, 11.72684445240872, 4.843270949661027)