# Exploration of time series data

In [1]:
import pandas as pd

In [2]:
import seaborn as sns

In [3]:
confirmed= pd.read_csv('csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv')
deaths= pd.read_csv('csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Deaths.csv')
recovered= pd.read_csv('csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Recovered.csv')

#where to insert additional columns
n = 4
confirmed.insert(n, 'status', 'confirmed') 
deaths.insert(n, 'status', 'deaths') 
recovered.insert(n, "status", 'recovered') 

# Goal: Organize data into one long data set
<ol>
    <li> Province/State </li>
    <li> Country/Region </li>
    <li> Lat </li>
    <li> Long </li>
    <li> As Of </li>
    <li> Confirmed </li>
    <li> Deaths </li>
    <li> Recovered </li>
</ol>

In [4]:
# get initial list
# clean up column headers -- convert column headers to datetimes, make lower case, remove slashes
df = confirmed
dates_to_strings = pd.to_datetime(df.columns[n+1:]).to_period('d').strftime('%Y-%m-%d')
df.columns = df.columns[:n+1].str.lower().str.replace('/','_').tolist() + dates_to_strings.tolist()
full_set = pd.melt(df, id_vars=df.columns[:n], value_vars=df.columns[n+1:], var_name ='date', value_name=df.status[0])

# execute on other data sets
other_dfs = [deaths, recovered]
for df in other_dfs:
    dates_to_strings = pd.to_datetime(df.columns[n+1:]).to_period('d').strftime('%Y-%m-%d')
    df.columns = df.columns[:n+1].str.lower().str.replace('/','_').tolist() + dates_to_strings.tolist()
    data_to_merge = pd.melt(df, id_vars=df.columns[:n], value_vars=df.columns[n+1:],var_name ='date', value_name=df.status[0])
    full_set = full_set.merge(data_to_merge, how='left', on =['province_state', 'country_region', 'lat', 'long', 'date'])

In [5]:
full_set.head()

Unnamed: 0,province_state,country_region,lat,long,date,confirmed,deaths,recovered
0,,Thailand,15.0,101.0,2020-01-22,2,0,0
1,,Japan,36.0,138.0,2020-01-22,2,0,0
2,,Singapore,1.2833,103.8333,2020-01-22,0,0,0
3,,Nepal,28.1667,84.25,2020-01-22,0,0,0
4,,Malaysia,2.5,112.5,2020-01-22,0,0,0


In [7]:
full_set.to_csv('covid_test.csv')