# The political and economic consequences of COVID-19

In [73]:
import numpy as np
import pandas as pd
import json
import OpenBlender
import glob
import config
from datetime import datetime

import matplotlib.pyplot as plt
import seaborn as sb


## Data Collection

In [81]:
# load data from https://github.com/CSSEGISandData/COVID-19
df_confirmed = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv')
df_death = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Deaths.csv')
df_recovered = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Recovered.csv')


In [82]:
df_confirmed.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,3/5/20,3/6/20,3/7/20,3/8/20,3/9/20,3/10/20,3/11/20,3/12/20,3/13/20,3/14/20
0,,Thailand,15.0,101.0,2,3,5,7,8,8,...,47,48,50,50,50,53,59,70,75,82
1,,Japan,36.0,138.0,2,1,2,2,4,4,...,360,420,461,502,511,581,639,639,701,773
2,,Singapore,1.2833,103.8333,0,1,3,3,4,5,...,117,130,138,150,150,160,178,178,200,212
3,,Nepal,28.1667,84.25,0,0,0,1,1,1,...,1,1,1,1,1,1,1,1,1,1
4,,Malaysia,2.5,112.5,0,0,0,3,4,4,...,50,83,93,99,117,129,149,149,197,238


In [23]:
# load the news stories which were extracted from Media Cloud
df_news = pd.read_csv('covid-19-or-coronavirus-or-all-story-urls-20200315082840.csv')
df_news.head()


Unnamed: 0,stories_id,publish_date,title,url,language,ap_syndicated,themes,media_id,media_name,media_url
0,1483691901,2020-01-01 02:15:15,19 Things You Won't Understand If You Were Bor...,https://www.buzzfeed.com/daniellaemanuel/kids-...,en,False,,6218,Buzzfeed,http://www.buzzfeed.com
1,1483736058,2020-01-01 00:00:28,"Kings ride power play, 4-goal first period pas...",https://www.dailynews.com/2019/12/31/kings-rid...,en,False,,24901,bleacherreport.com,http://bleacherreport.com
2,1483742069,2020-01-01 00:22:23,"Neal records hat trick in 4-point game, Oilers...",https://www.kansascity.com/entertainment/artic...,en,False,,30,The Kansas City Star,http://www.kansascity.com/
3,1483743956,2020-01-01 00:15:24,"Kane scores twice, leads Blackhawks to 5-3 win...",http://www.startribune.com/kane-scores-twice-l...,en,False,,19,Star Tribune,http://www.startribune.com/
4,1483743953,2020-01-01 00:25:06,"Neal records hat trick in 4-point game, Oilers...",http://www.startribune.com/neal-records-hat-tr...,en,False,,19,Star Tribune,http://www.startribune.com/


In [34]:
# how many news stories matching my coronavirus query were published sind January 1st?
df_news.shape


(85809, 10)

In [53]:
# load financial data for commodities, stocks, bonds and currencies from Yahoo financial
# this code was adapted from this project https://github.com/julianikulski/bike-sharing/blob/master/dataset_creation.ipynb
path = r'C:\Users\julia\Documents\Coding\own-projects\covid-19\finance-data'
all_files = glob.glob(path + "/*.csv")

df_list = []

for file in all_files:
    df_temp = pd.read_csv(file)
    df_temp['asset'] = file[67:-4]
    df_list.append(df_temp)
    
df_fin = pd.concat(df_list, axis=0, ignore_index=True)

# print the first 5 rows of the new dataframe
df_fin.head()


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,asset
0,1962-01-02,4.06,4.06,4.06,4.06,4.06,0.0,10y_bond
1,1962-01-03,4.03,4.03,4.03,4.03,4.03,0.0,10y_bond
2,1962-01-04,3.99,3.99,3.99,3.99,3.99,0.0,10y_bond
3,1962-01-05,4.02,4.02,4.02,4.02,4.02,0.0,10y_bond
4,1962-01-07,,,,,,,10y_bond


In [55]:
# get the descriptive statistics of the financial data
df_fin.describe(include='all')


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,asset
count,85409,79285.0,79285.0,79285.0,79285.0,79285.0,79285.0,85409
unique,18508,,,,,,,11
top,2019-05-02,,,,,,,10y_bond
freq,11,,,,,,,18101
mean,,2261.949755,2278.150859,2244.461496,2262.181063,2262.181063,526783700.0,
std,,4175.081455,4201.745134,4146.347592,4175.467711,4175.467711,1969249000.0,
min,,0.484,0.601,0.072902,0.499,0.499,0.0,
25%,,8.01,8.01,8.01,8.01,8.01,0.0,
50%,,308.410004,309.769989,306.769012,308.690002,308.690002,567.0,
75%,,2207.26001,2224.850098,2191.560059,2208.050049,2208.050049,171300000.0,


In [58]:
# load the news about Biden, Sanders and the primary and general elections in 2020
df_news_politics = pd.read_csv('biden-or-sanders-or-primary-all-story-urls-20200315100750.csv')
df_news_politics.head()


Unnamed: 0,stories_id,publish_date,title,url,language,ap_syndicated,themes,media_id,media_name,media_url
0,1177339361,2020-02-15 00:00:00,Walker says Madison voters driven by anger,http://archive.jsonline.com/news/statepolitics...,en,False,,36,Milwaukee Journal Sentinel,http://www.jsonline.com/
1,1177816610,2020-02-15 00:00:00,"'Pod Save America' host: After Trump, politics...",https://www.sfchronicle.com/politics/article/P...,,False,,14,sfchronicle,http://www.sfgate.com/
2,1449604986,2020-01-15 00:00:00,Janison: California as Democratic HQ of the U.S.,https://www.newsday.com/long-island/columnists...,en,False,,13,Newsday,http://www.newsday.com/
3,1483740563,2020-01-01 00:01:49,Giuliani Says He’s Prepared to ‘Do Demonstrati...,http://feedproxy.google.com/~r/thedailybeast/a...,en,False,,1707,The Daily Beast,http://www.thedailybeast.com/
4,1483750136,2020-01-01 00:36:36.812637,California Election Results 2014: House Map by...,https://www.politico.com/2014-election/results...,en,False,,18268,Politico,https://www.politico.com/


## Data Analysis
### Covid-19 cases

In [76]:
# check the US numbers
df_US_covid = df_confirmed[df_confirmed['countryregion'] == 'US']

df_US_covid['date'] = df_US_covid['timestamp'].apply(lambda x: datetime.utcfromtimestamp(x).strftime('%Y-%m-%d'))

cumulative_cases = df_US_covid.groupby('date')['confirmed'].sum()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.


date
2020-01-22       1.0
2020-01-23       1.0
2020-01-24       2.0
2020-01-25       2.0
2020-01-26       5.0
2020-01-27       5.0
2020-01-28       5.0
2020-01-29       5.0
2020-01-30       5.0
2020-02-01     141.0
2020-02-03     108.0
2020-02-05      27.0
2020-02-09      62.0
2020-02-11       2.0
2020-02-13      42.0
2020-02-21      84.0
2020-02-22      36.0
2020-02-24      36.0
2020-02-25      47.0
2020-02-26      84.0
2020-02-27      24.0
2020-02-28     139.0
2020-02-29      15.0
2020-03-01      15.0
2020-03-02     442.0
2020-03-03     128.0
2020-03-04      77.0
2020-03-05     195.0
2020-03-06     240.0
2020-03-07     416.0
2020-03-08     665.0
2020-03-09     203.0
2020-03-10    1040.0
Name: confirmed, dtype: float64

In [None]:
plt.