In [32]:
import numpy as np
import pandas as pd
from tqdm import tqdm
import copy
import json

# Import data

In [33]:
# Choose filepath
filepath = "../data/raw/commuter/jobs_and_job_density.csv"

# "../data/raw/commuter/regional_gross_domestic_product.csv"
# "../data/raw/commuter/jobs_and_job_density.csv"
# "../data/raw/commuter/total_final_energy_consumption_by_borough.csv"

In [34]:
# Import Borough json
with open("../data/raw/misc/london_boroughs.json") as json_file:
    borough_coordinates = json.load(json_file)
    
data = pd.read_csv(filepath)

# Get Borough names

In [35]:
# Collect borough names
boroughs = []
boroughs_no_spaces = []

for f in tqdm(borough_coordinates['features']):
    boroughs.append(f['properties']['name'])
    boroughs_no_spaces.append(f['properties']['name'].replace(" ", ""))


100%|██████████| 33/33 [00:00<00:00, 33709.70it/s]


# Obtain emissions data by borough and year

In [36]:
# Select initial year
init_year = 2001
# Select final year
final_year = 2005
# Select variable
var = "number_of_jobs"
# Select dataset
dataset = "commuter"
# Flag for normalising data
normalise = False

In [37]:
if "jobs_and_job_density" in filepath:
    # Map borough names
    data.loc[:,'name']=data['area'].map(dict(zip(np.sort(boroughs), np.sort(boroughs_no_spaces))))
    
    # Slice dataframe to get only London Boroughs
    data = copy.deepcopy(data[data['name'].isin(boroughs_no_spaces)])

    # Get initial and final year data
    init_year_data = copy.deepcopy(data[data['year']==init_year].sort_values(by='area'))
    final_year_data = copy.deepcopy(data[data['year']==final_year].sort_values(by='area'))

    # # Convert to numpy arrays
    init_year_array = init_year_data[var].values
    final_year_array = final_year_data[var].values

elif "total_final_energy_consumption_by_borough" in filepath:
    # Map borough names
    data.loc[:,'name']=data['Area'].map(dict(zip(np.sort(boroughs), np.sort(boroughs_no_spaces))))

    # Get initial and final year data
    init_year_data = copy.deepcopy(data[data['Year']==init_year].sort_values(by='Area'))
    final_year_data = copy.deepcopy(data[data['Year']==final_year].sort_values(by='Area'))

    # # Convert to numpy arrays
    init_year_array = init_year_data['Value'].values
    final_year_array = final_year_data['Value'].values

elif "regional_gross_domestic_product" in filepath:
    # Map borough names
    energy.loc[:,'name']=energy['Area'].map(dict(zip(np.sort(boroughs), np.sort(boroughs_no_spaces))))

    # Get initial and final year data
    init_year_data = copy.deepcopy(energy[energy['Year']==init_year].sort_values(by='Area'))
    final_year_data = copy.deepcopy(energy[energy['Year']==final_year].sort_values(by='Area'))

    # # Convert to numpy arrays
    init_year_array = init_year_data['Value'].values
    final_year_array = final_year_data['Value'].values
    
    

In [38]:
# Normalise data if required
if normalise:
    # Normalise data to sum up to 1
    init_year_array = init_year_array/np.sum(init_year_array)
    final_year_array = final_year_array/np.sum(final_year_array)

    # Take logs
    init_year_array = np.log(init_year_array)
    final_year_array = np.log(final_year_array)

# Sanity check

In [39]:
assert len(init_year_array) == len(boroughs)
assert len(final_year_array) == len(boroughs)

# Export data

In [40]:
# Save to txt
np.savetxt(f'../data/input/{dataset}/initial_destination_sizes.txt',init_year_array)
np.savetxt(f'../data/input/{dataset}/final_destination_sizes.txt',final_year_array)

In [41]:
# Export semantic meaning of destination sizes
with open(f"../data/input/{dataset}/destination_sizes_info.txt", "w") as text_file:
    print(f"destination_sizes_data_filepath,{filepath}", file=text_file)
    print(f"destination_sizes_variable,{var}", file=text_file)
    print(f"initial_destination_sizes_year,{str(init_year)}", file=text_file)
    print(f"final_destination_sizes_year,{str(final_year)}", file=text_file)