In [1]:
import os
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
from shapely.geometry import Polygon
from shapely.geometry.point import Point
from matplotlib.lines import Line2D
import matplotlib.patches as mpatches
from matplotlib_scalebar.scalebar import ScaleBar

In [2]:
#creating spatial connection matrix
def connection_matrixz_standardized_process(df):
    mt = df.iloc[:,1:].values.copy()
    mt_T = mt.T
    df_T = df.copy()
    df_T.iloc[:,1:] = mt_T
    df_T['commuting_sum'] = 0
    
    for i in range(len(df_T)):
        df_T.loc[i,'commuting_sum'] = sum(df_T.iloc[i,1:333])
    
    isolated_szone = []
    for x in range(len(df_T)):
        if df_T.loc[x,'commuting_sum'] != 0:
            df_T.iloc[x,1:333] = df_T.iloc[x,1:333] / df_T.loc[x,'commuting_sum']
            
    for y in range(len(df_T)):
        if df_T.loc[y,'commuting_sum'] == 0:
            isolated_szone.append(df_T.loc[y,'SUBZONE_N'])
        
    mt2 = df_T.iloc[:,1:333].values.copy()
    row,col = np.diag_indices_from(mt2)
    mt2[row,col] = 0
    df_T.iloc[:,1:333] = mt2.copy()
    df_T = df_T.drop('commuting_sum',axis = 1)
    return df_T,isolated_szone

In [3]:
# connection matrix for clusters
mt_path_2020 = r'D:\NUS\project\LTA_data\commuting_flow_matrix\sum\2020'
mt_path_2021 = r'D:\NUS\project\LTA_data\commuting_flow_matrix\sum\2021'
lis_file_2020 = os.listdir(path=mt_path_2020)
lis_file_2021 = os.listdir(path=mt_path_2021)
mt_2020 = pd.read_csv(mt_path_2020+'\\'+lis_file_2020[0])
mt_2021 = pd.read_csv(mt_path_2021+'\\'+lis_file_2021[0])
for i in lis_file_2020:
    mt_tmp = pd.read_csv(mt_path_2020+'\\'+i)
    mt_2020.iloc[:,1:] = mt_2020.iloc[:,1:]+mt_tmp.iloc[:,1:]
mt_2020.iloc[:,1:] = mt_2020.iloc[:,1:]/12
    
for i in lis_file_2021:
    mt_tmp = pd.read_csv(mt_path_2021+'\\'+i)
    mt_2021.iloc[:,1:] = mt_2021.iloc[:,1:]+mt_tmp.iloc[:,1:]
mt_2021.iloc[:,1:] = mt_2021.iloc[:,1:]/12

In [4]:
#connection matrix for covid visit
mt_path_2021 = r'D:\NUS\project\LTA_data\commuting_flow_matrix\sum\2021'
mt_path_2022 = r'D:\NUS\project\LTA_data\commuting_flow_matrix\sum\2022'
lis_file_2021 = os.listdir(path=mt_path_2021)
lis_file_2022 = os.listdir(path=mt_path_2022)
mt_2021_10 = pd.read_csv(mt_path_2021+'\\'+lis_file_2021[1])
mt_2021_11 = pd.read_csv(mt_path_2021+'\\'+lis_file_2021[2])
mt_2021_12 = pd.read_csv(mt_path_2021+'\\'+lis_file_2021[3])
mt_2022_01 = pd.read_csv(mt_path_2022+'\\'+lis_file_2022[0])
mt_2022_02 = pd.read_csv(mt_path_2022+'\\'+lis_file_2022[1])
mt_2022_03 = pd.read_csv(mt_path_2022+'\\'+lis_file_2022[2])
mt_2022_04 = pd.read_csv(mt_path_2022+'\\'+lis_file_2022[3])

mt_phase1 = mt_2021_10

mt_phase2 = mt_2021_11
mt_phase2.iloc[:,1:] = mt_phase2.iloc[:,1:]+mt_2021_12.iloc[:,1:]
mt_phase2.iloc[:,1:] = mt_phase2.iloc[:,1:]+mt_2022_01.iloc[:,1:]
mt_phase2.iloc[:,1:] = mt_phase2.iloc[:,1:]/3

mt_phase3 = mt_2022_02
mt_phase3.iloc[:,1:] = mt_phase3.iloc[:,1:]+mt_2022_02.iloc[:,1:]
mt_phase3.iloc[:,1:] = mt_phase3.iloc[:,1:]/2

mt_phase4 = mt_2022_03
mt_phase4.iloc[:,1:] = mt_phase4.iloc[:,1:]+mt_2022_04.iloc[:,1:]
mt_phase4.iloc[:,1:] = mt_phase4.iloc[:,1:]/2

In [5]:
mt_cf_2020,isolated_szone = connection_matrixz_standardized_process(mt_2020)
print("the number of isolated subzone:{}".format(len(isolated_szone)))

mt_cf_2021,isolated_szone = connection_matrixz_standardized_process(mt_2021)
print("the number of isolated subzone:{}".format(len(isolated_szone)))

mt_phase1,isolated_szone = connection_matrixz_standardized_process(mt_phase1)
print("the number of isolated subzone:{}".format(len(isolated_szone)))

mt_phase2,isolated_szone = connection_matrixz_standardized_process(mt_phase2)
print("the number of isolated subzone:{}".format(len(isolated_szone)))

mt_phase3,isolated_szone = connection_matrixz_standardized_process(mt_phase3)
print("the number of isolated subzone:{}".format(len(isolated_szone)))

mt_phase4,isolated_szone = connection_matrixz_standardized_process(mt_phase4)
print("the number of isolated subzone:{}".format(len(isolated_szone)))

the number of isolated subzone:19
the number of isolated subzone:22
the number of isolated subzone:32
the number of isolated subzone:22
the number of isolated subzone:22
the number of isolated subzone:22


In [6]:
mt_cf_2020.to_csv(r'D:\NUS\project\LTA_data\commuting_flow_matrix\mt_clusters_2020.csv',index = False)
mt_cf_2021.to_csv(r'D:\NUS\project\LTA_data\commuting_flow_matrix\mt_clusters_2021.csv',index = False)
mt_phase1.to_csv(r'D:\NUS\project\LTA_data\commuting_flow_matrix\mt_phase1.csv',index = False)
mt_phase2.to_csv(r'D:\NUS\project\LTA_data\commuting_flow_matrix\mt_phase2.csv',index = False)
mt_phase3.to_csv(r'D:\NUS\project\LTA_data\commuting_flow_matrix\mt_phase3.csv',index = False)
mt_phase4.to_csv(r'D:\NUS\project\LTA_data\commuting_flow_matrix\mt_phase4.csv',index = False)
