# Process Vulnerability Time Series Data

In [352]:
# Import packages
import os
import glob
import numpy as np
import pandas as pd
import geopandas as gpd
import mapclassify as mc
from datetime import datetime
from datetime import timedelta

Step 1: Load Shapefile Data and Get Date List

In [353]:
# Define file path
modelFolderPath = "Vulnerability_Animation"

In [354]:
# Read updated shapefile and get the date list
vulNameList = glob.glob(os.path.join(modelFolderPath,"Vulnerability_index_*.shp"))
vulList = []
for i in range(0, len(vulNameList)):
    vulList.append(glob.glob(os.path.join(modelFolderPath,"Vulnerability_index_*.shp"))[i][44:52])

vulList = sorted(vulList, key=lambda x: datetime.strptime(x, "%m%d%Y"))
print(vulList)

['05272020', '05282020', '05292020', '05302020', '05312020', '06012020', '06022020', '06032020', '06042020', '06052020', '06062020', '06072020', '06082020', '06092020']


Step 2: Find Missing Dates

In [355]:
# Define functions to find the indices of missing dates
def findIndexOfMissingDate(nameList):
    IndexOfMissingDateList = []
    for i in range(0, len(nameList)-1):
        if (datetime.strptime(nameList[i+1], "%m%d%Y").date() != datetime.strptime(vulList[i], "%m%d%Y").date()+timedelta(days=1)):
            IndexOfMissingDateList.append(i)
    return IndexOfMissingDateList
def findRangeOfMissingDate(index,nameList):
    delta = datetime.strptime(nameList[index+1], "%m%d%Y").date()-datetime.strptime(vulList[index], "%m%d%Y").date()
    return delta.days-1

print(findIndexOfMissingDate(vulList))

[]


Step 3: Specify the Start and End of Time Series

In [356]:
# Define functions to select the first and last date
def selectFirstDate(nameList):
    dmin = datetime.strptime(nameList[0], "%m%d%Y").date()
    index = 0
    for i in range(0, len(nameList)):
        d = datetime.strptime(nameList[i], "%m%d%Y").date()
        if (d<=dmin):
            index = i
            dmin = d
    return index

def selectLastDate(nameList):
    dmax = datetime.strptime(nameList[0], "%m%d%Y").date()
    index = 0
    for i in range(0, len(nameList)):
        d = datetime.strptime(nameList[i], "%m%d%Y").date()
        if (d>=dmax):
            index = i
            dmax = d
    return index

Step 4: Generate Time Series Data

In [357]:
# Read data into dataframe
firstIndex = selectFirstDate(vulList)
lastIndex = selectLastDate(vulList)
vul_df = gpd.read_file(glob.glob(os.path.join(modelFolderPath,"Vulnerability_index_"+vulList[firstIndex]+"*.shp"))[0])
vul_df

Unnamed: 0,index,GEOID,Pop,RPL_THEMES,HIVCase,county_cod,nearest_os,pop_1,acc_valu_1,HIV_norm,confirmed_,total_test,demographi,covid,covid_norm,values,geometry
0,Adams,17001001100,8254,0.5014,47.0,001,233442606,8254.0,0.230973,0.001823,44,4114,"{""age"": [{""age_group"": ""Unknown"", ""count"": 0, ...",88.278075,0.019906,0.321038,"POLYGON ((-91.37781 39.89897, -91.37779 39.900..."
1,Adams,17001010500,2924,0.0819,47.0,001,233428915,2924.0,0.230973,0.001823,44,4114,"{""age"": [{""age_group"": ""Unknown"", ""count"": 0, ...",31.272727,0.007052,0.190046,"POLYGON ((-91.48813 40.02458, -91.48724 40.024..."
2,Adams,17001010200,3486,0.5252,47.0,001,233466063,3486.0,0.000000,0.001823,44,4114,"{""age"": [{""age_group"": ""Unknown"", ""count"": 0, ...",37.283422,0.008407,0.361105,"POLYGON ((-91.14779 40.06499, -91.14778 40.065..."
3,Adams,17001010300,5845,0.2692,47.0,001,233524013,5845.0,0.050814,0.001823,44,4114,"{""age"": [{""age_group"": ""Unknown"", ""count"": 0, ...",62.513369,0.014096,0.276909,"POLYGON ((-91.26571 39.76017, -91.26571 39.760..."
4,Adams,17001000500,2181,0.8217,47.0,001,233452389,2181.0,0.230973,0.001823,44,4114,"{""age"": [{""age_group"": ""Unknown"", ""count"": 0, ...",23.326203,0.005260,0.411269,"POLYGON ((-91.40341 39.94942, -91.40340 39.950..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3116,Woodford,17203030502,2452,0.2743,19.0,203,237580508,2452.0,0.111471,0.000737,18,1020,"{""age"": [{""age_group"": ""Unknown"", ""count"": 0, ...",43.270588,0.009757,0.266208,"POLYGON ((-89.56362 40.78580, -89.56279 40.791..."
3117,Woodford,17203030100,2476,0.2836,19.0,203,237589335,2476.0,0.001838,0.000737,18,1020,"{""age"": [{""age_group"": ""Unknown"", ""count"": 0, ...",43.694118,0.009852,0.288728,"POLYGON ((-89.49981 40.87485, -89.49963 40.874..."
3118,Woodford,17203030300,2873,0.2451,19.0,203,237592978,2873.0,0.005680,0.000737,18,1020,"{""age"": [{""age_group"": ""Unknown"", ""count"": 0, ...",50.700000,0.011432,0.277047,"POLYGON ((-89.27410 40.83500, -89.27409 40.835..."
3119,Woodford,17203030501,7881,0.0675,19.0,203,237579742,7881.0,0.111471,0.000737,18,1020,"{""age"": [{""age_group"": ""Unknown"", ""count"": 0, ...",139.076471,0.031360,0.212809,"POLYGON ((-89.51273 40.75691, -89.51273 40.757..."


In [358]:
# Only keep necessary columns
vul = vul_df[['GEOID','index','values','pop_1','geometry']]
vul

Unnamed: 0,index,values,pop_1,geometry
0,Adams,0.321038,8254.0,"POLYGON ((-91.37781 39.89897, -91.37779 39.900..."
1,Adams,0.190046,2924.0,"POLYGON ((-91.48813 40.02458, -91.48724 40.024..."
2,Adams,0.361105,3486.0,"POLYGON ((-91.14779 40.06499, -91.14778 40.065..."
3,Adams,0.276909,5845.0,"POLYGON ((-91.26571 39.76017, -91.26571 39.760..."
4,Adams,0.411269,2181.0,"POLYGON ((-91.40341 39.94942, -91.40340 39.950..."
...,...,...,...,...
3116,Woodford,0.266208,2452.0,"POLYGON ((-89.56362 40.78580, -89.56279 40.791..."
3117,Woodford,0.288728,2476.0,"POLYGON ((-89.49981 40.87485, -89.49963 40.874..."
3118,Woodford,0.277047,2873.0,"POLYGON ((-89.27410 40.83500, -89.27409 40.835..."
3119,Woodford,0.212809,7881.0,"POLYGON ((-89.51273 40.75691, -89.51273 40.757..."


In [359]:
# Generate time series data, find missing dates and fill the corresponding values with zero
missingDates = findIndexOfMissingDate(vulList)
for i in range(1, len(vulList)):
    vul_next = gpd.read_file(glob.glob(os.path.join(modelFolderPath,"Vulnerability_index_"+vulList[i]+"*.shp"))[0])
    vul['values'] = pd.concat([vul['values'].astype(str), vul_next['values'].astype(str)], axis=1).apply(lambda x: ','.join(x), axis=1)
    print(vul['values'])
    if i in missingDates:
        missingRange = findRangeOfMissingDate(i,vulList)
        for j in range(0,missingRange):
            vul_next['values'] = 0
            vul['values'] = pd.concat([vul['values'].astype(str), vul_next['values'].astype(str)], axis=1).apply(lambda x: ','.join(x), axis=1)
vul

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.


0       0.321037700552695,0.321077475031174
1        0.190046104929943,0.19006019513773
2       0.361105089044322,0.361121887424741
3       0.276909347951266,0.276937513910402
4       0.411269368429435,0.411279878259525
                       ...                 
3116    0.266208146246386,0.266143312243825
3117    0.288727863727449,0.288662395134324
3118    0.277047089935418,0.276971124156723
3119    0.212809354942309,0.212600971266051
3120    0.336681712820695,0.336569258084605
Name: values, Length: 3121, dtype: object
0       0.321037700552695,0.321077475031174,0.32105123...
1       0.190046104929943,0.19006019513773,0.190050897...
2       0.361105089044322,0.361121887424741,0.36111080...
3       0.276909347951266,0.276937513910402,0.27691892...
4       0.411269368429435,0.411279878259525,0.41127294...
                              ...                        
3116    0.266208146246386,0.266143312243825,0.26632519...
3117    0.288727863727449,0.288662395134324,0.28884605...
3118    0.

Unnamed: 0,index,values,pop_1,geometry
0,Adams,"0.321037700552695,0.321077475031174,0.32105123...",8254.0,"POLYGON ((-91.37781 39.89897, -91.37779 39.900..."
1,Adams,"0.190046104929943,0.19006019513773,0.190050897...",2924.0,"POLYGON ((-91.48813 40.02458, -91.48724 40.024..."
2,Adams,"0.361105089044322,0.361121887424741,0.36111080...",3486.0,"POLYGON ((-91.14779 40.06499, -91.14778 40.065..."
3,Adams,"0.276909347951266,0.276937513910402,0.27691892...",5845.0,"POLYGON ((-91.26571 39.76017, -91.26571 39.760..."
4,Adams,"0.411269368429435,0.411279878259525,0.41127294...",2181.0,"POLYGON ((-91.40341 39.94942, -91.40340 39.950..."
...,...,...,...,...
3116,Woodford,"0.266208146246386,0.266143312243825,0.26632519...",2452.0,"POLYGON ((-89.56362 40.78580, -89.56279 40.791..."
3117,Woodford,"0.288727863727449,0.288662395134324,0.28884605...",2476.0,"POLYGON ((-89.49981 40.87485, -89.49963 40.874..."
3118,Woodford,"0.277047089935418,0.276971124156723,0.27718423...",2873.0,"POLYGON ((-89.27410 40.83500, -89.27409 40.835..."
3119,Woodford,"0.212809354942309,0.212600971266051,0.21318555...",7881.0,"POLYGON ((-89.51273 40.75691, -89.51273 40.757..."


In [361]:
# Add necessary columns and change column names
firstDate = datetime.strptime(vulList[firstIndex], "%m%d%Y").date()
lastDate = datetime.strptime(vulList[lastIndex], "%m%d%Y").date()
vul_last = gpd.read_file(glob.glob(os.path.join(modelFolderPath,"Vulnerability_index_"+vulList[lastIndex]+"*.shp"))[0])
vul["today_vul"] = vul_last[["values"]]
vul.loc[vul["today_vul"]<0, "today_vul"] = 0
vul["start"] = firstDate.strftime("%Y-%m-%d")
vul["end"] = lastDate.strftime("%Y-%m-%d")
vul["dt_unit"] = "day"
vul = vul.rename(columns={"index": "NAME", "values": "cases_ts", "pop_1":"population"})
vul

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the 

Unnamed: 0,NAME,cases_ts,population,geometry,today_vul,dt_start,dt_end,dt_unit
0,Adams,"0.321037700552695,0.321077475031174,0.32105123...",8254.0,"POLYGON ((-91.37781 39.89897, -91.37779 39.900...",0.321420,2020-05-27,2020-06-09,day
1,Adams,"0.190046104929943,0.19006019513773,0.190050897...",2924.0,"POLYGON ((-91.48813 40.02458, -91.48724 40.024...",0.190182,2020-05-27,2020-06-09,day
2,Adams,"0.361105089044322,0.361121887424741,0.36111080...",3486.0,"POLYGON ((-91.14779 40.06499, -91.14778 40.065...",0.361267,2020-05-27,2020-06-09,day
3,Adams,"0.276909347951266,0.276937513910402,0.27691892...",5845.0,"POLYGON ((-91.26571 39.76017, -91.26571 39.760...",0.277180,2020-05-27,2020-06-09,day
4,Adams,"0.411269368429435,0.411279878259525,0.41127294...",2181.0,"POLYGON ((-91.40341 39.94942, -91.40340 39.950...",0.411370,2020-05-27,2020-06-09,day
...,...,...,...,...,...,...,...,...
3116,Woodford,"0.266208146246386,0.266143312243825,0.26632519...",2452.0,"POLYGON ((-89.56362 40.78580, -89.56279 40.791...",0.265885,2020-05-27,2020-06-09,day
3117,Woodford,"0.288727863727449,0.288662395134324,0.28884605...",2476.0,"POLYGON ((-89.49981 40.87485, -89.49963 40.874...",0.288402,2020-05-27,2020-06-09,day
3118,Woodford,"0.277047089935418,0.276971124156723,0.27718423...",2873.0,"POLYGON ((-89.27410 40.83500, -89.27409 40.835...",0.276669,2020-05-27,2020-06-09,day
3119,Woodford,"0.212809354942309,0.212600971266051,0.21318555...",7881.0,"POLYGON ((-89.51273 40.75691, -89.51273 40.757...",0.211771,2020-05-27,2020-06-09,day


Step 5: Simplify Geometries

In [363]:
# Simplify geometries by generalization
geom_simp_series = vul["geometry"].simplify(0.001)
geom_simp_df = geom_simp_series.to_frame(name="geometry")
vul_simp = vul.assign(geometry=geom_simp_df['geometry'])

Step 6: Output File

In [364]:
# Save file
vul_simp.to_file("vulnerability.geojson", driver="GeoJSON")
print("done")