# Importing CSSEGI data into communities
[Index](0-index.ipynb)

In this notebook, we assign CSSEGI data to the communities constructed in [1-clustering](1-clustering.ipynb). Data reported in the CSSEGI dataset is assigned to a community based on the latitude and longitude. Data can be accessed [here](https://github.com/CSSEGISandData/COVID-19).

## Imports and global variables

In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
from pathlib import Path
import sys
import numpy as np
import pandas as pd
import geopandas as gpd
import datetime
import json

import matplotlib.pyplot as plt
import matplotlib.colors as mco
import matplotlib.gridspec as mgs
import matplotlib.ticker as ticker
import matplotlib.cm as cm
from matplotlib import animation
plt.rcParams['svg.fonttype'] = 'none'

from IPython.display import HTML
from IPython.display import Image

In [None]:
sys.path.append('../code')
from functions import plot_omega_profile, plot_omega_map

In [None]:
datadir = Path('../data')
if not datadir.is_dir():
    raise ValueError("Data dir doesn'nt exist!")

In [None]:
resdir = Path('../results/')
if not resdir.is_dir():
    raise ValueError('No results directory!')

In [None]:
complevel=7
complib='zlib'

resfile = resdir / 'safegraph_analysis.hdf5'
# resfile = resdir / 'safegraph_analysis_monthly.hdf5'
with pd.HDFStore(resfile, complevel=complevel, complib=complib) as store:
    print(f"File {resfile.stem} has {len(store.keys())} entries.")

cssefile = resdir / 'cssegi_sanddata_analysis.hdf5'
with pd.HDFStore(cssefile, complevel=complevel, complib=complib) as store:
    print(f"File {cssefile.stem} has {len(store.keys())} entries.")


In [None]:
tfmt = '%Y-%m-%d'
exts = ['.png', '.svg']

## Load clustered CBGs

In [None]:
key = "/clustering/clusters"
with pd.HDFStore(resfile, complevel=complevel, complib=complib) as store:
    clusters = store[key]
clusters

In [None]:
clusters_pos = clusters.loc[:, ['X','Y']].rename(columns={'X': 'longitude', 'Y': 'latitude'})
clusters_pos

## Load data from geometry

In [None]:
geofile = datadir / 'safegraph_open_census_data'  / 'geometry' / 'cbg.geojson'
if not geofile.is_file():
    raise ValueError("Geo file doesn't exist!")

In [None]:
geo = gpd.read_file(geofile).astype({'CensusBlockGroup': 'int64'})
geo.set_index('CensusBlockGroup', inplace=True)
XY = geo.representative_point().apply(lambda X: [X.x, X.y]).tolist()
df_xy = pd.DataFrame(data=np.array(XY), index=geo.index, columns=['longitude','latitude'])
df_xy

## Load the confirmed cases from CSSEGI data

In [None]:
dfile = datadir / 'time_series_covid19_confirmed_US.csv'
df_confirmed_us = pd.read_csv(dfile)
df_confirmed_us = df_confirmed_us.loc[df_confirmed_us['iso2'] == 'US'] # only keep the US
#.astype({'FIPS': 'int64'})
df_confirmed_us

Drop the locations with no latitude or longitude information

In [None]:
idx = ( (df_confirmed_us['Lat'] == 0.) | (df_confirmed_us['Long_'] == 0.) )
df_confirmed_us.drop(index=df_confirmed_us.index[idx], inplace=True)
df_confirmed_us

## Mapping clusters to CSSEGI FIPS based on (Longitude, Latitude)

In [None]:
clusters_pos.sort_index(inplace=True)
XY_clusters = clusters_pos.loc[:, ['longitude', 'latitude']].to_numpy()

In [None]:
XY_csse = df_confirmed_us.loc[:, ['Long_', 'Lat']].to_numpy()

In [None]:
# iterate over CSSEGI data
cluster_ids = []
for i in range(len(df_confirmed_us.index)):
    xy = XY_csse[i]
    imin = np.argmin(np.linalg.norm(XY_clusters - xy, axis=1))
    cluster_ids.append(clusters_pos.index[imin])

In [None]:
col = 'cluster_id'
if col in df_confirmed_us.columns:
    del df_confirmed_us[col]

df_confirmed_us.insert(1, col, cluster_ids)
df_confirmed_us

### Retain only the dates and cluster_ID

In [None]:
dates = []
for c in df_confirmed_us.columns:
    try:
        t = datetime.datetime.strptime(c, '%m/%d/%y')
        dates.append(c)
    except ValueError:
        continue

In [None]:
columns = ['cluster_id'] + dates

df_clusters = df_confirmed_us.loc[:, columns].groupby('cluster_id').apply(sum)
del df_clusters['cluster_id']
df_clusters

In [None]:
clusters_csse = pd.DataFrame(data=np.zeros((len(clusters.index),len(df_clusters.columns)), np.uint), index=clusters.index, columns=df_clusters.columns)
clusters_csse.loc[df_clusters.index] = df_clusters
clusters_csse = clusters_csse.T
clusters_csse.index = [datetime.datetime.strptime(s, '%m/%d/%y') for s in clusters_csse.index]
clusters_csse

I currently have one empty community.

In [None]:
clusters.loc[1017].to_frame().T

But the reported number of cases there is zero.

In [None]:
clusters_csse.loc[clusters_csse.index[-1], 1017]

There is one community where the reported number of cases is larger than the population...

In [None]:
idx = (clusters_csse.loc[clusters_csse.index[-1]] > clusters['population']).to_numpy()
np.sum(idx)

In [None]:
clusters.loc[clusters.index[idx]]

In [None]:
clusters_csse.loc[clusters_csse.index[-1], clusters.index[idx]].to_frame().T

So I correct this:

In [None]:
for t in clusters_csse.index:
    x = np.min(np.array([clusters_csse.loc[t].to_numpy(), clusters['population'].to_numpy()], dtype=np.int_), axis=0)
    clusters_csse.loc[t] = x

clusters_csse

In [None]:
idx = (clusters_csse.loc[clusters_csse.index[-1]] > clusters['population']).to_numpy()
np.sum(idx)

In [None]:
total_csse = clusters_csse.sum(axis=1).to_frame()
total_csse.rename(columns={0: 'omega'}, inplace=True)
total_csse

### Write

In [None]:
key = '/clustering/cssegi'
with pd.HDFStore(resfile, complevel=complevel, complib=complib) as store:
    store[str(key)] = clusters_csse

## Plots

In [None]:
figdir = Path('../figures') / '3-import_cssegi'
if not figdir.is_dir():
    figdir.mkdir(parents=True, exist_ok=True)

### Plot the total evolution

In [None]:
# parameters
figsize = (6,4.5)
dpi = 300
ms=2
lw=1


fig = plt.figure(facecolor='w', figsize=figsize)
ax = fig.gca()

ax.plot(total_csse.index.to_numpy(), total_csse.sum(axis=1).to_numpy(), '-', ms=ms, color='darkblue')

ax.set_yscale('log')
ax.set_xlim(total_csse.index[0],None)
ax.set_ylabel("$T$", fontsize="medium")
plt.xticks(rotation=45)
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.tick_params(left=True, labelleft=True, bottom=True, labelbottom=True)
ax.tick_params(axis='both', length=4)
fig.tight_layout()

fname = 'omega_tot_cssegi'
for ext in exts:
    filepath = figdir / (fname + ext)
    fig.savefig(filepath, bbox_inches='tight', pad_inches=0, dpi=dpi)
    print("Written file: {:s}".format(str(filepath)))
fig.clf()
plt.close('all')

In [None]:
filepath = figdir / (fname + '.png')
Image(filename=filepath, width=4./3*360)

### Plot the spatial evolution as a profile

In [None]:
clusters_csse.iloc[-1].max()/1.0e6

In [None]:
# parameters
dpi=150
fps=10
figsize=(6, 4.5)
lw=0.5
ms=4
ylabel="$T_a$"

fileout = figdir / 'dynamic_profile.mp4'
mydir = figdir / 'profiles'
if not mydir.is_dir():
    mydir.mkdir(parents=True, exist_ok=True)

In [None]:
plot_omega_profile(np.array([clusters_csse.to_numpy()]), clusters_csse.index.to_list(), colors=['red'], \
                   fileout=fileout, tpdir=mydir, dpi=dpi, fps=fps, figsize=figsize, ylabel=ylabel, \
                   lw=lw, ms=ms, styles=['o'], deletetp=False, exts=['.png','.svg'], ymin=1., ymax=1.3e6)

In [None]:
HTML("""
<video height="480" controls>
  <source src="{:s}" type="video/mp4">
</video>
""".format(str(fileout)))

### Plot the spatial evolution on a map

In [None]:
clusters_csse.iloc[-1].max()/1.0e6

In [None]:
fileout = figdir / 'dynamic_profile.mp4'
mydir = figdir / 'maps'
if not mydir.is_dir():
    mydir.mkdir(parents=True, exist_ok=True)
fileout = figdir / 'dynamic_map.mp4'

In [None]:
# parameters
dpi=150
fps=10
figsize=(6, 4.5)
lw=0.5
ms=4
idump=1


fileout = mydir / 'map_T.mp4'
plot_omega_map(clusters_csse.to_numpy(), clusters_csse.index.to_list(), XY=clusters.loc[:, ['X', 'Y']].to_numpy().T, \
fileout=fileout, tpdir=mydir, dpi=dpi, fps=fps, figsize=figsize, idump=idump, \
               clabel="$T$", vmin=1., vmax=1.3e6, deletetp=False, exts=['.png','.svg'])

In [None]:
HTML("""
<video height="480" controls>
  <source src="{:s}" type="video/mp4">
</video>
""".format(str(fileout)))