In [1]:
import pandas as pd

In [2]:
cases_path = "csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv"
deaths_path = "csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv"
recovered_path = "csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv"

In [34]:
cases_df = pd.read_csv(cases_path)
deaths_df = pd.read_csv(deaths_path)
recovered_df = pd.read_csv(recovered_path)

In [4]:
cases_df

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,3/16/20,3/17/20,3/18/20,3/19/20,3/20/20,3/21/20,3/22/20,3/23/20,3/24/20,3/25/20
0,,Afghanistan,33.000000,65.000000,0,0,0,0,0,0,...,21,22,22,22,24,24,40,40,74,84
1,,Albania,41.153300,20.168300,0,0,0,0,0,0,...,51,55,59,64,70,76,89,104,123,146
2,,Algeria,28.033900,1.659600,0,0,0,0,0,0,...,54,60,74,87,90,139,201,230,264,302
3,,Andorra,42.506300,1.521800,0,0,0,0,0,0,...,2,39,39,53,75,88,113,133,164,188
4,,Angola,-11.202700,17.873900,0,0,0,0,0,0,...,0,0,0,0,1,2,2,3,3,3
5,,Antigua and Barbuda,17.060800,-61.796400,0,0,0,0,0,0,...,1,1,1,1,1,1,1,3,3,3
6,,Argentina,-38.416100,-63.616700,0,0,0,0,0,0,...,56,68,79,97,128,158,266,301,387,387
7,,Armenia,40.069100,45.038200,0,0,0,0,0,0,...,52,78,84,115,136,160,194,235,249,265
8,Australian Capital Territory,Australia,-35.473500,149.012400,0,0,0,0,0,0,...,2,2,3,4,6,9,19,32,39,39
9,New South Wales,Australia,-33.868800,151.209300,0,0,0,0,3,4,...,171,210,267,307,353,436,669,669,818,1029


In [14]:
for i, row in cases_df.iterrows():
    print(row[4:].values)
    break

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1
 1 1 1 1 1 1 1 1 1 4 4 5 7 7 7 11 16 21 22 22 22 24 24 40 40 74 84]


In [27]:
def construct_akb_entry_from_row(state, country, row_values, topic):
    # row_values is a list of ints containing all the values from all the dates
    # topic is "cases", "deaths", "recoveries"
    akb_entries_list = []
    for val in row_values:
        subj = country
        if isinstance(state, str):
            subj = " ".join([state, country])
        pred = "has"
        obj = " ".join([str(val), topic])
        sent = " ".join([subj, pred, obj])
        url = "https://coronavirus.jhu.edu/"
        url_title = "JHU Coronavirus Data"
        rating = "False"
        akb_entries_list.append({"sent": sent, "url": url, "url_title": url_title, "rating": rating,
                                "subj": subj, "pred": pred, "obj": obj})
    return akb_entries_list

In [28]:
def construct_akb_df(df, topic):
    """
    from the original df, go through all the dates and construct akb entries 
    in the same format as we have in the web app.
    """
    ret_df = pd.DataFrame()
    for i, row in df.iterrows():
        akb_entries = construct_akb_entry_from_row(row["Province/State"], row["Country/Region"], row[4:].values, topic)
        ret_df = ret_df.append(akb_entries)
    return ret_df

In [35]:
akb_cases_df = construct_akb_df(cases_df, "cases")
akb_deaths_df = construct_akb_df(deaths_df, "deaths")
akb_recovered_df = construct_akb_df(recovered_df, "recoveries")

In [36]:
akb_cases_df.sample(10)

Unnamed: 0,obj,pred,rating,sent,subj,url,url_title
33,0 cases,has,False,Mali has 0 cases,Mali,https://coronavirus.jhu.edu/,JHU Coronavirus Data
28,0 cases,has,False,Equatorial Guinea has 0 cases,Equatorial Guinea,https://coronavirus.jhu.edu/,JHU Coronavirus Data
23,0 cases,has,False,Namibia has 0 cases,Namibia,https://coronavirus.jhu.edu/,JHU Coronavirus Data
35,0 cases,has,False,Argentina has 0 cases,Argentina,https://coronavirus.jhu.edu/,JHU Coronavirus Data
46,0 cases,has,False,Benin has 0 cases,Benin,https://coronavirus.jhu.edu/,JHU Coronavirus Data
43,4 cases,has,False,Georgia has 4 cases,Georgia,https://coronavirus.jhu.edu/,JHU Coronavirus Data
37,60 cases,has,False,US has 60 cases,US,https://coronavirus.jhu.edu/,JHU Coronavirus Data
33,0 cases,has,False,Moldova has 0 cases,Moldova,https://coronavirus.jhu.edu/,JHU Coronavirus Data
56,0 cases,has,False,Mozambique has 0 cases,Mozambique,https://coronavirus.jhu.edu/,JHU Coronavirus Data
33,0 cases,has,False,Denmark has 0 cases,Denmark,https://coronavirus.jhu.edu/,JHU Coronavirus Data


In [37]:
akb_deaths_df.sample(10)

Unnamed: 0,obj,pred,rating,sent,subj,url,url_title
11,0 deaths,has,False,Equatorial Guinea has 0 deaths,Equatorial Guinea,https://coronavirus.jhu.edu/,JHU Coronavirus Data
3,0 deaths,has,False,Haiti has 0 deaths,Haiti,https://coronavirus.jhu.edu/,JHU Coronavirus Data
61,0 deaths,has,False,Queensland Australia has 0 deaths,Queensland Australia,https://coronavirus.jhu.edu/,JHU Coronavirus Data
27,0 deaths,has,False,Luxembourg has 0 deaths,Luxembourg,https://coronavirus.jhu.edu/,JHU Coronavirus Data
38,0 deaths,has,False,Alberta Canada has 0 deaths,Alberta Canada,https://coronavirus.jhu.edu/,JHU Coronavirus Data
28,0 deaths,has,False,North Macedonia has 0 deaths,North Macedonia,https://coronavirus.jhu.edu/,JHU Coronavirus Data
5,0 deaths,has,False,Isle of Man United Kingdom has 0 deaths,Isle of Man United Kingdom,https://coronavirus.jhu.edu/,JHU Coronavirus Data
2,0 deaths,has,False,Cambodia has 0 deaths,Cambodia,https://coronavirus.jhu.edu/,JHU Coronavirus Data
12,0 deaths,has,False,New Brunswick Canada has 0 deaths,New Brunswick Canada,https://coronavirus.jhu.edu/,JHU Coronavirus Data
52,3 deaths,has,False,Greece has 3 deaths,Greece,https://coronavirus.jhu.edu/,JHU Coronavirus Data


In [38]:
akb_recovered_df.sample(10)

Unnamed: 0,obj,pred,rating,sent,subj,url,url_title
25,0.0 recoveries,has,False,Paraguay has 0.0 recoveries,Paraguay,https://coronavirus.jhu.edu/,JHU Coronavirus Data
41,0.0 recoveries,has,False,Timor-Leste has 0.0 recoveries,Timor-Leste,https://coronavirus.jhu.edu/,JHU Coronavirus Data
53,4.0 recoveries,has,False,New South Wales Australia has 4.0 recoveries,New South Wales Australia,https://coronavirus.jhu.edu/,JHU Coronavirus Data
28,1.0 recoveries,has,False,Sri Lanka has 1.0 recoveries,Sri Lanka,https://coronavirus.jhu.edu/,JHU Coronavirus Data
9,0.0 recoveries,has,False,Slovenia has 0.0 recoveries,Slovenia,https://coronavirus.jhu.edu/,JHU Coronavirus Data
2,0.0 recoveries,has,False,Croatia has 0.0 recoveries,Croatia,https://coronavirus.jhu.edu/,JHU Coronavirus Data
10,0.0 recoveries,has,False,Peru has 0.0 recoveries,Peru,https://coronavirus.jhu.edu/,JHU Coronavirus Data
39,0.0 recoveries,has,False,Greenland Denmark has 0.0 recoveries,Greenland Denmark,https://coronavirus.jhu.edu/,JHU Coronavirus Data
8,0.0 recoveries,has,False,Tunisia has 0.0 recoveries,Tunisia,https://coronavirus.jhu.edu/,JHU Coronavirus Data
29,7.0 recoveries,has,False,Vietnam has 7.0 recoveries,Vietnam,https://coronavirus.jhu.edu/,JHU Coronavirus Data


In [39]:
len(akb_cases_df)

15680

In [40]:
all_df = pd.concat([akb_cases_df, akb_deaths_df, akb_recovered_df])

In [41]:
all_df

Unnamed: 0,obj,pred,rating,sent,subj,url,url_title
0,0 cases,has,False,Afghanistan has 0 cases,Afghanistan,https://coronavirus.jhu.edu/,JHU Coronavirus Data
1,0 cases,has,False,Afghanistan has 0 cases,Afghanistan,https://coronavirus.jhu.edu/,JHU Coronavirus Data
2,0 cases,has,False,Afghanistan has 0 cases,Afghanistan,https://coronavirus.jhu.edu/,JHU Coronavirus Data
3,0 cases,has,False,Afghanistan has 0 cases,Afghanistan,https://coronavirus.jhu.edu/,JHU Coronavirus Data
4,0 cases,has,False,Afghanistan has 0 cases,Afghanistan,https://coronavirus.jhu.edu/,JHU Coronavirus Data
5,0 cases,has,False,Afghanistan has 0 cases,Afghanistan,https://coronavirus.jhu.edu/,JHU Coronavirus Data
6,0 cases,has,False,Afghanistan has 0 cases,Afghanistan,https://coronavirus.jhu.edu/,JHU Coronavirus Data
7,0 cases,has,False,Afghanistan has 0 cases,Afghanistan,https://coronavirus.jhu.edu/,JHU Coronavirus Data
8,0 cases,has,False,Afghanistan has 0 cases,Afghanistan,https://coronavirus.jhu.edu/,JHU Coronavirus Data
9,0 cases,has,False,Afghanistan has 0 cases,Afghanistan,https://coronavirus.jhu.edu/,JHU Coronavirus Data


In [42]:
all_df.to_csv("/Users/georgekaragiannis/Desktop/Cornell/research/akb_demo/data/jhu_timeseries_akb_03262020.csv", index=False)