In [None]:
import os
os.environ['USE_PYGEOS'] = '0'
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import json
import glob
import swifter
import networkx as nx
import geopandas as gpd
import matplotlib as mpl
import dask.dataframe as dd
import matplotlib.cm as cm
import matplotlib.pyplot as plt

from math import comb
from pyproj import Proj
from datetime import date
from copy import deepcopy
from functools import partial
from argparse import Namespace
from scipy.spatial import KDTree
from itertools import islice,product
# from tqdm.notebook import tqdm as tqdmT
from tqdm.auto import tqdm
from networkx.classes.function import path_weight
from scipy.spatial.distance import squareform,pdist
from mpl_toolkits.axes_grid1 import make_axes_locatable
from shapely.geometry import Point,LineString, Polygon, mapping, MultiPoint, box


from ticodm.utils import *
from ticodm.notebook_functions import *
from ticodm.spatial_interaction_model import *
from ticodm.spatial_interaction_model_mcmc import *
from ticodm.contingency_table_mcmc import ContingencyTableMarkovChainMonteCarlo
from ticodm.contingency_table import instantiate_ct

# mpl.rcParams['agg.path.chunksize'] = 10000

%matplotlib inline

# Cambridge commuter (LSOAs to MSOAs)
## Import table and geometries

In [None]:
geometry_name = 'lsoas_to_msoas'
dataset = f'cambridge_work_commuter_{geometry_name}'
table_filename = 'lower_super_output_areas_work_flows_cambridge_2011'
# 'middle_super_output_areas_work_flows_cambridge_2011'
# 'lower_super_output_areas_work_flows_cambridge_2011'
# 'output_areas_work_flows_cambridge_2011'

# Define directory
lsoas_table_path = f'../data/raw/cambridge_commuter/lower_super_output_areas_work_flows_cambridge_2011.csv'
lsoas_geometries_path = f'../data/raw/cambridge_commuter/cambridge_lsoas.geojson'
msoas_table_path = f'../data/raw/cambridge_commuter/middle_super_output_areas_work_flows_cambridge_2011.csv'
msoas_geometries_path = f'../data/raw/cambridge_commuter/cambridge_msoas.geojson'

In [None]:
# Read in destination attraction
employment = pd.read_csv('../data/raw/cambridge_commuter/employment_survey_msoa.csv',header=None)
employment.columns = ['lsoa_id','number_of_jobs']
employment['lsoa_id'] = employment['lsoa_id'].apply(lambda x: x.split(' : ')[0])
employment = employment.sort_values('lsoa_id')
number_of_jobs = employment['number_of_jobs'].values

In [None]:
# Read table
lsoas_table = pd.read_csv(lsoas_table_path,index_col=0)
# Store first column
origin_geometry_ids = sorted(lsoas_table.columns.values)
# Sort columns and rows alphabetically
lsoas_table = lsoas_table[origin_geometry_ids]
lsoas_table = lsoas_table.sort_index()
# Convert to array
# lsoas_table = lsoas_table.values

# Read table
msoas_table = pd.read_csv(msoas_table_path,index_col=0)
# Store first column
destination_geometry_ids = sorted(msoas_table.columns.values)
# Sort columns and rows alphabetically
msoas_table = msoas_table[destination_geometry_ids]
msoas_table = msoas_table.sort_index()
# Convert to array
# msoas_table = msoas_table.values

# Read geometries
lsoas = gpd.read_file(lsoas_geometries_path)
msoas = gpd.read_file(msoas_geometries_path)
# Reproject
lsoas = lsoas.set_crs('epsg:27700',allow_override=True)
msoas = msoas.set_crs('epsg:27700',allow_override=True)

# LSOAS
lsoas = lsoas.rename(columns={"LSOA11CD":"geometry_id"})
lsoas['geometry_id'] = lsoas['geometry_id'].astype(str)
lsoas = lsoas.set_index("geometry_id")
# Reindex by table geometry ids
lsoas = lsoas.reindex(origin_geometry_ids)
lsoas = lsoas.reset_index()
# Extract centroids
lsoas["centroid"] = lsoas.centroid
lsoas["LONG"] = lsoas.centroid.x
lsoas["LAT"] = lsoas.centroid.y
# Get all relevant columns
lsoas = lsoas[["geometry_id","LONG","LAT","geometry"]]
lsoas['centroid'] = gpd.points_from_xy(x=lsoas['LONG'],y=lsoas['LAT'])

# MSOAS
msoas = msoas.rename(columns={"code":"geometry_id"})
msoas['geometry_id'] = msoas['geometry_id'].astype(str)
msoas = msoas.set_index("geometry_id")
# Reindex by table geometry ids
msoas = msoas.reindex(destination_geometry_ids)
msoas = msoas.reset_index()
# Extract centroids
msoas["centroid"] = msoas.centroid
msoas["LONG"] = msoas.centroid.x
msoas["LAT"] = msoas.centroid.y
# Get all relevant columns
msoas = msoas[["geometry_id","LONG","LAT","geometry"]]
msoas['centroid'] = gpd.points_from_xy(x=msoas['LONG'],y=msoas['LAT'])

# Import geometry lookup tables
geometry_conversions = pd.read_csv('../data/raw/cambridge_commuter/OA11_LSOA11_MSOA11_LAD11_EW_LUv2.csv')
mapper = dict(zip(geometry_conversions.LSOA11CD,geometry_conversions.MSOA11CD))
# Create LSOA to MSOA table
lsoas_to_msoas_table_df = lsoas_table.rename(mapper,axis=1)
lsoas_to_msoas_table_df = lsoas_to_msoas_table_df.groupby(lsoas_to_msoas_table_df.columns, axis=1).sum()
lsoas_to_msoas_table = lsoas_to_msoas_table_df.values

# Add rowsums and column sums to geometires
lsoas['origin_demand'] = lsoas_to_msoas_table.sum(axis=1)
lsoas['geometry_type'] = 'lsoa'
msoas['destination_demand'] = lsoas_to_msoas_table.sum(axis=0)
msoas['number_of_jobs'] = number_of_jobs
msoas['geometry_type'] = 'msoa'
# Get dimensions
I,J = np.shape(lsoas_to_msoas_table)


In [None]:
# Merge msoas and lsoas
geometries = pd.concat([msoas,lsoas])

# Export

In [None]:
lsoas_to_msoas_table_df.to_csv(f"../data/raw/cambridge_commuter/lower_super_output_areas_to_medium_super_output_areas_work_flows_cambridge_2011.csv",index=True)

In [None]:
geometries.drop(columns=['centroid']).set_index('geometry_id').to_file(f'../data/inputs/{dataset}/{geometry_name}.geojson', driver='GeoJSON')