# Risk Flow Matrix Modeling

In [10]:
import pandas as pd
import numpy as np
import math
from collections import defaultdict

## Data Preprocessing

### Read the preprocessed dataset:
 - "origin_dest_trips_census_tract_level.csv"
 - "tracts_4.csv"

In [11]:
# this dataset is for the nested hashmap
OD_p1_df = pd.read_csv('Streetlight_Data/clean_data/streetlight_OD_p1.csv')[['RESIDENCE','WORKPLACE','JOBS']]
OD_p2_df = pd.read_csv('Streetlight_Data/clean_data/streetlight_OD_p2.csv')[['RESIDENCE','WORKPLACE','JOBS']]
OD_p3_df = pd.read_csv('Streetlight_Data/clean_data/streetlight_OD_p3.csv')[['RESIDENCE','WORKPLACE','JOBS']]
OD_p4_df = pd.read_csv('Streetlight_Data/clean_data/streetlight_OD_p4.csv')[['RESIDENCE','WORKPLACE','JOBS']]

In [3]:
OD_p1_df

Unnamed: 0,RESIDENCE,WORKPLACE,JOBS
0,17001000100,17001000100,5005.177622
1,17001000100,17001000201,432.101665
2,17001000100,17001000202,324.076249
3,17001000100,17001000400,360.084721
4,17001000100,17001000500,4285.008180
...,...,...,...
80423,17203030700,17113005102,46.336570
80424,17203030700,17143001600,36.275552
80425,17203030700,17179021201,155.851852
80426,17203030700,17203030601,480.273810


In [12]:
ct_info_cases_data = pd.read_csv("Statistical_Analysis/data/ct_info_cases_data.csv")
ct_cases_df = ct_info_cases_data[['GEOID','countyFIPS','estimate_cases1','estimate_cases2','estimate_cases3','estimate_cases4']]
ct_cases_df

Unnamed: 0,GEOID,countyFIPS,estimate_cases1,estimate_cases2,estimate_cases3,estimate_cases4
0,17091011700,17091,0.030119,27.107335,38.311700,116.471181
1,17091011800,17091,0.023156,20.840201,29.454151,89.543398
2,17119400951,17119,0.018442,10.548614,17.187603,142.941103
3,17119400952,17119,0.012385,7.084098,11.542621,95.994478
4,17135957500,17135,0.000000,4.240201,4.783816,74.692765
...,...,...,...,...,...,...
3118,17037000100,17037,0.000000,25.466793,35.104603,177.373983
3119,17037001500,17037,0.000000,14.444589,19.911088,100.605297
3120,17037000400,17037,0.000000,32.713750,45.094142,227.848402
3121,17037000300,17037,0.000000,10.168505,14.016736,70.822746


### Create a dictionary: to record each origin->destination: number of trips

use the dataset: OD_census_tract_level 

1. Generate a unique set of origins in commute trips

2. Generate a doubly nested hashmap (python dictionary: key-value pair): \
 first mapping: **origin -> destination** \
 second mapping: **destination -> number of trips**

In [13]:
#function to fo defaultdict(defaultdict(int))
# when {} is empty, enable
# df['a']['b']+=1
def defaultdict_float():
    return defaultdict(float)

In [None]:
# anuallly aggregated data 
# origin -> destination (county level) frequency dictionary
# rename the origin-> destination df
OD = OD_p1_df

nu_OD_DblMap = defaultdict(defaultdict_float)
num_ct = OD_p1_df.shape[0]

for i in range(num_ct):
    fip_o = OD.iloc[i,0]
    fip_d = OD.iloc[i,1]
    num_trips = OD.iloc[i,2]   
    num_cases = float(ct_cases_df[ct_cases_df['GEOID'] == fip_o]['estimate_cases1'])
    if num_cases == 0:
        continue
    else:
        nu_OD_DblMap[fip_o][fip_d] = num_trips * num_cases

In [7]:
de_OD_Map = defaultdict(float)
for fip_d in nu_OD_DblMap:
    sum = 0
    for fip_o in nu_OD_DblMap:
        sum += nu_OD_DblMap[fip_o][fip_d]
    de_OD_Map[fip_d] = sum  

In [8]:
Risk_DblMap = defaultdict(defaultdict_float)
for fip_o in nu_OD_DblMap:
    for fip_d in nu_OD_DblMap[fip_o]:
        nu = nu_OD_DblMap[fip_o][fip_d]
        de = de_OD_Map[fip_d]
        if de == 0 or nu == 0:
            continue
        else:               
            Risk_DblMap[fip_o][fip_d] = nu/de

In [14]:
# new
def getUnnormalizedRisk(OD, estimate_cases, apple_mobility):
    nu_OD_DblMap = defaultdict(defaultdict_float)
    num_ct = OD.shape[0]

    for i in range(num_ct):
        fip_o = OD.iloc[i,0]
        fip_d = OD.iloc[i,1]
        num_trips = OD.iloc[i,2]   
        num_cases = float(ct_cases_df[ct_cases_df['GEOID'] == fip_o][estimate_cases])
        if num_cases == 0:
            continue
        else:
            nu_OD_DblMap[fip_o][fip_d] = num_trips * num_cases * apple_mobility
    return nu_OD_DblMap

In [15]:
def getRisk(OD, estimate_cases, apple_mobility):      
    nu_OD_DblMap = defaultdict(defaultdict_float)
    num_ct = OD.shape[0]
    for i in range(num_ct):
        fip_o = OD.iloc[i,0]
        fip_d = OD.iloc[i,1]
        num_trips = OD.iloc[i,2]   
        num_cases = float(ct_cases_df[ct_cases_df['GEOID'] == fip_o][estimate_cases])
        if num_cases == 0:
            continue
        else:
            nu_OD_DblMap[fip_o][fip_d] = num_trips * num_cases * apple_mobility

    de_OD_Map = defaultdict(float)
    for fip_d in nu_OD_DblMap:
        sum = 0
        for fip_o in nu_OD_DblMap:
            sum += nu_OD_DblMap[fip_o][fip_d]
        de_OD_Map[fip_d] = sum  

    Risk_DblMap = defaultdict(defaultdict_float)
    for fip_o in nu_OD_DblMap:
        for fip_d in nu_OD_DblMap[fip_o]:
            nu = nu_OD_DblMap[fip_o][fip_d]
            de = de_OD_Map[fip_d]
            if de == 0 or nu == 0:
                continue
            else:               
                Risk_DblMap[fip_o][fip_d] = nu/de
    return Risk_DblMap

In [16]:
def getRisk(OD, estimate_cases, apple_mobility):      
    nu_OD_DblMap = defaultdict(defaultdict_float)
    num_ct = OD.shape[0]
    for i in range(num_ct):
        fip_o = OD.iloc[i,0]
        fip_d = OD.iloc[i,1]
        num_trips = OD.iloc[i,2]   
        num_cases = float(ct_cases_df[ct_cases_df['GEOID'] == fip_o][estimate_cases])
        if num_cases == 0:
            continue
        else:
            nu_OD_DblMap[fip_o][fip_d] = num_trips * num_cases * apple_mobility

    de_OD_Map = defaultdict(float)
    for fip_d in nu_OD_DblMap:
        sum = 0
        for fip_o in nu_OD_DblMap:
            sum += nu_OD_DblMap[fip_o][fip_d]
        de_OD_Map[fip_d] = sum  

    Risk_DblMap = defaultdict(defaultdict_float)
    for fip_o in nu_OD_DblMap:
        for fip_d in nu_OD_DblMap[fip_o]:
            nu = nu_OD_DblMap[fip_o][fip_d]
            de = de_OD_Map[fip_d]
            if de == 0 or nu == 0:
                continue
            else:               
                Risk_DblMap[fip_o][fip_d] = nu/de
    return Risk_DblMap

In [17]:
unor_Risk_DblMap1 =  getUnnormalizedRisk(OD = OD_p1_df, 
                       estimate_cases = 'estimate_cases1',
                       apple_mobility = 109.278621/100)
unor_Risk_DblMap2 =  getUnnormalizedRisk(OD = OD_p2_df, 
                       estimate_cases = 'estimate_cases2',
                       apple_mobility = 73.2133333/100)
unor_Risk_DblMap3 =  getUnnormalizedRisk(OD = OD_p3_df, 
                       estimate_cases = 'estimate_cases3',
                       apple_mobility = 130.970323/100)
unor_Risk_DblMap4 =  getUnnormalizedRisk(OD = OD_p4_df,
                       estimate_cases = 'estimate_cases4',
                       apple_mobility = 139.87/100)         

In [20]:
list_unor_Risk_DblMap = [unor_Risk_DblMap1, unor_Risk_DblMap2, unor_Risk_DblMap3, unor_Risk_DblMap4]

In [21]:
Risk_DblMap1 = getRisk(OD = OD_p1_df, 
                       estimate_cases = 'estimate_cases1',
                       apple_mobility = 109.278621/100)
Risk_DblMap2 = getRisk(OD = OD_p2_df, 
                       estimate_cases = 'estimate_cases2',
                       apple_mobility = 73.2133333/100)
Risk_DblMap3 = getRisk(OD = OD_p3_df, 
                       estimate_cases = 'estimate_cases3',
                       apple_mobility = 130.970323/100)
Risk_DblMap4 = getRisk(OD = OD_p4_df,
                       estimate_cases = 'estimate_cases4',
                       apple_mobility = 139.87/100)

In [22]:
list_Risk_DblMap = [Risk_DblMap1, Risk_DblMap2, Risk_DblMap3, Risk_DblMap4]

### Transfer double risk map to csv file and save to path

In [23]:
def saveRiskflowToCSV(Risk_DblMap, path):
    riskflow_df = pd.DataFrame(columns = ('START', 'END', 'RISK_FLOW'))
    for fip_o in Risk_DblMap:
        for fip_d in Risk_DblMap[fip_o]:
            riskflow = Risk_DblMap[fip_o][fip_d]
        new_row = pd.DataFrame({'START':[fip_o], 'END':[fip_d], 'RISK_FLOW': [riskflow] })
        riskflow_df = riskflow_df.append(new_row)
    riskflow_df.to_csv(path)

In [24]:
saveRiskflowToCSV(Risk_DblMap = unor_Risk_DblMap1, 
                  path = 'Streetlight_Data/clean_data/unnorm/unor_riskflow1_df.csv')
saveRiskflowToCSV(Risk_DblMap = unor_Risk_DblMap2, 
                  path = 'Streetlight_Data/clean_data/unnorm/unor_riskflow2_df.csv')
saveRiskflowToCSV(Risk_DblMap = unor_Risk_DblMap3, 
                  path = 'Streetlight_Data/clean_data/unnorm/unor_riskflow3_df.csv')
saveRiskflowToCSV(Risk_DblMap = unor_Risk_DblMap4, 
                  path = 'Streetlight_Data/clean_data/unnorm/unor_riskflow4_df.csv')

In [25]:
saveRiskflowToCSV(Risk_DblMap = Risk_DblMap1, 
                  path = 'Streetlight_Data/clean_data/riskflow1_df.csv')
saveRiskflowToCSV(Risk_DblMap = Risk_DblMap2, 
                  path = 'Streetlight_Data/clean_data/riskflow2_df.csv')
saveRiskflowToCSV(Risk_DblMap = Risk_DblMap3, 
                  path = 'Streetlight_Data/clean_data/riskflow3_df.csv')
saveRiskflowToCSV(Risk_DblMap = Risk_DblMap4, 
                  path = 'Streetlight_Data/clean_data/riskflow4_df.csv')

### Calculaye exposure entropy

In [14]:
# Exposure_Map = defaultdict(defaultdict_float)
de_Risk_Map = defaultdict(float)
for fip_d in Risk_DblMap:
    sum = 0
    for fip_o in Risk_DblMap[fip_o]:
        sum += Risk_DblMap[fip_o][fip_d]
    de_Risk_Map[fip_d] = sum 

RuntimeError: dictionary changed size during iteration

In [None]:
reverse_Risk_DblMap = defaultdict(defaultdict_float)
for fip_o in Risk_DblMap:
    for fip_d in Risk_DblMap[fip_o]:
        risk = Risk_DblMap[fip_o][fip_d]   
        if risk == 0:
            continue
        else:
            reverse_Risk_DblMap[fip_d][fip_o] = risk

In [None]:
np.sum(list(reverse_Risk_DblMap[17031811701].values()))

In [None]:
Exposure_Map = {}
for fip_d in reverse_Risk_DblMap:
    v = np.array(list(reverse_Risk_DblMap[fip_d].values()))
    logv = np.log(v)
    entropy = - np.sum(v * logv)
    Exposure_Map[fip_d] = entropy

In [33]:
def getExposure(Risk_DblMap):
    reverse_Risk_DblMap = defaultdict(defaultdict_float)
    for fip_o in Risk_DblMap:
        for fip_d in Risk_DblMap[fip_o]:
            risk = Risk_DblMap[fip_o][fip_d]   
            if risk == 0:
                continue
            else:
                reverse_Risk_DblMap[fip_d][fip_o] = risk

    Exposure_Map = {}
    for fip_d in reverse_Risk_DblMap:
        v = np.array(list(reverse_Risk_DblMap[fip_d].values()))
        logv = np.log(v)
        entropy = abs(np.sum(v * logv))
        Exposure_Map[fip_d] = entropy
    return Exposure_Map

In [34]:
def saveExposureToCSV(list_Risk_DblMap, path):
    Exposure_Map1 = getExposure(list_Risk_DblMap[0])
    Exposure_Map2 = getExposure(list_Risk_DblMap[1])
    Exposure_Map3 = getExposure(list_Risk_DblMap[2])
    Exposure_Map4 = getExposure(list_Risk_DblMap[3])
    
    exposure_df = pd.DataFrame(columns = ('FIPS', 'exposure1', 'exposure2', 'exposure3', 'exposure4'))
    
    for i, fip_d in enumerate(Exposure_Map1): 
        FIPS = str(fip_d)
        exposure1 = Exposure_Map1[fip_d]
        exposure2 = Exposure_Map2[fip_d]
        exposure3 = Exposure_Map3[fip_d]
        exposure4 = Exposure_Map4[fip_d] 
        new_row = pd.DataFrame({'FIPS':[FIPS], 'exposure1':[exposure1],
                            'exposure2':[exposure2], 'exposure3':[exposure3],
                            'exposure4':[exposure4]})
        exposure_df = exposure_df.append(new_row)
    
    exposure_df.to_csv(path)
    

In [28]:
saveExposureToCSV(list_unor_Risk_DblMap, 'Streetlight_Data/clean_data/unnorm/exposure_df.csv')

In [29]:
saveExposureToCSV(list_Risk_DblMap, 'Streetlight_Data/clean_data/risk/exposure_df.csv')

KeyError: 17031330100

---

In [31]:
Exposure_Map1 = getExposure(Risk_DblMap1)
Exposure_Map2 = getExposure(Risk_DblMap2)
Exposure_Map3 = getExposure(Risk_DblMap3)
Exposure_Map4 = getExposure(Risk_DblMap4)

In [32]:
Exposure_Map1 

{17001000100: 3.006090085750307,
 17001000201: 0.8180134462594445,
 17001000202: 1.4557261944294986,
 17001000400: 1.8175432349229261,
 17001000500: 0.8727567390442894,
 17001000600: 1.6050691647773498,
 17001000700: 0.1360197802414866,
 17001000800: 0.6841003151803807,
 17001000900: 1.603601995738549,
 17001001001: 1.7973317073601516,
 17001001002: 2.136522603200904,
 17001001100: 5.478133284614947,
 17001010100: 1.0669116384684065,
 17001010200: 0.6200377714021749,
 17001010300: 1.6248618168714828,
 17001010400: 2.359216228093035,
 17001010500: 2.225116630313485,
 17001010600: 4.486212776853551,
 17019000200: 0.34135134726962935,
 17019000301: 0.5550422586166961,
 17019000302: 0.15701247606842117,
 17019000401: 0.7718562679336207,
 17019000402: 0.22781762155631138,
 17019000500: 1.2224360784895547,
 17019000700: 1.139757769643935,
 17019000800: 3.3038444580532684,
 17019000901: 3.96817824470242,
 17019000902: 3.6945980846020845,
 17019001000: 3.7983391227802406,
 17019001100: 0.88641

In [17]:
exposure_df = pd.DataFrame(columns = ('FIPS', 'exposure1', 'exposure2', 'exposure3', 'exposure4'))

In [15]:
for i, fip_d in enumerate(Exposure_Map1): 
    FIPS = str(fip_d)
    exposure1 = Exposure_Map1[fip_d]
    exposure2 = Exposure_Map2[fip_d]
    exposure3 = Exposure_Map3[fip_d]
    exposure4 = Exposure_Map4[fip_d] 
    new_row = pd.DataFrame({'FIPS':[FIPS], 'exposure1':[exposure1],
                        'exposure2':[exposure2], 'exposure3':[exposure3],
                        'exposure4':[exposure4]})
    exposure_df = exposure_df.append( new_row )

In [16]:
exposure_df.to_csv('Streetlight_Data/clean_data/exposure_df.csv')