In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
from datetime import timedelta

In [2]:
#Open excel workbook and first sheet
xls = pd.ExcelFile("C:/Users/jason/D3-Visaulization/raw_data/Brent_oil_price.xls")
df = pd.read_excel(xls, "Data 1", skiprows=2, names=['Date', 'Brent_Spot_Price'])
df.head()

Unnamed: 0,Date,Brent_Spot_Price
0,1987-05-15,18.58
1,1987-05-22,18.54
2,1987-05-29,18.6
3,1987-06-05,18.7
4,1987-06-12,18.75


In [3]:
#We only want dates that correspond to viral outbreak dates
#Start with SARS
sars_df = pd.read_csv("C:/Users/jason/D3-Visaulization/cleaned_data/In_progress/SARS_data.csv")
sars_df.head()

Unnamed: 0,Date,Country,Cases,Deaths
0,2003-03-17,Germany,1,0
1,2003-03-17,Canada,8,2
2,2003-03-17,Singapore,20,0
3,2003-03-17,Hong Kong,95,1
4,2003-03-17,Switzerland,2,0


In [4]:
#Take only totals
sars_ct = sars_df[sars_df["Country"].str.contains("Total")]
sars_ct.head()

Unnamed: 0,Date,Country,Cases,Deaths
7,2003-03-17,Total,167,4
18,2003-03-18,Total,219,4
31,2003-03-19,Total,264,9
45,2003-03-20,Total,306,10
61,2003-03-21,Total,350,10


In [5]:
#Datetime64 and object could not merge; make both datetime 
#-- from stackoverflow.com/questions/55974994/typeerror-dtype-class-datetime-timedelta-not-understood
df["Date"] = pd.to_datetime(df['Date'], errors='coerce')
sars_ct["Date"] = pd.to_datetime(sars_df['Date'], errors='coerce')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.


In [6]:
#Merge the dataframes, then drop unwanted rows to get only the relevant dates
oil_sars = pd.merge(df, sars_ct, how="right", on=["Date", "Date"])
del oil_sars["Country"]
oil_sars.head()

Unnamed: 0,Date,Brent_Spot_Price,Cases,Deaths
0,2003-03-21,28.18,350,10
1,2003-03-28,26.95,1485,53
2,2003-04-04,26.93,2353,84
3,2003-04-11,24.96,2890,116
4,2003-04-18,24.92,3547,182


In [7]:
#Next do MERS
mers_df = pd.read_csv("C:/Users/jason/D3-Visaulization/cleaned_data/In_progress/mers_final.csv")
mers_df.head()

Unnamed: 0.1,Unnamed: 0,Year,Week,day,Country,Cases,Continent,lat,long
0,0,2015,18,0,SA,7,Asia,23.885942,45.079162
1,1,2015,22,30,SA,6,Asia,23.885942,45.079162
2,2,2015,26,60,SA,1,Asia,23.885942,45.079162
3,3,2015,30,90,SA,14,Asia,23.885942,45.079162
4,4,2015,18,0,KR,0,Asia,35.907757,127.766922


In [8]:
#Drop unnecessary columns
del mers_df["Unnamed: 0"]
del mers_df["Continent"]
del mers_df["lat"]
del mers_df["long"]
mers_df

Unnamed: 0,Year,Week,day,Country,Cases
0,2015,18,0,SA,7
1,2015,22,30,SA,6
2,2015,26,60,SA,1
3,2015,30,90,SA,14
4,2015,18,0,KR,0
5,2015,22,30,KR,86
6,2015,26,60,KR,3
7,2015,30,90,KR,0


In [9]:
#Separate Year, Week, Day columns need to be translated to datetime
#From https://www.epochconverter.com/weeks/2015: 
#week 18=2015-04-27, week 22=2015-05-25, week 26=2015-06-22, week 30=2015-07-20
Date = []
for row in range(len(mers_df)):
    week_number = mers_df.iloc[row]['Week']
    if week_number == 18:
        Date.append("2015-4-24")
    elif week_number == 22:
        Date.append("2015-5-22")
    elif week_number == 26:
        Date.append("2015-6-19")
    else:
        Date.append("2015-7-17")

mers_df["Date"] = Date
mers_df


Unnamed: 0,Year,Week,day,Country,Cases,Date
0,2015,18,0,SA,7,2015-4-24
1,2015,22,30,SA,6,2015-5-22
2,2015,26,60,SA,1,2015-6-19
3,2015,30,90,SA,14,2015-7-17
4,2015,18,0,KR,0,2015-4-24
5,2015,22,30,KR,86,2015-5-22
6,2015,26,60,KR,3,2015-6-19
7,2015,30,90,KR,0,2015-7-17


In [10]:
del mers_df["Year"]
del mers_df["Week"]
del mers_df["day"]
del mers_df["Country"]
mers_df

Unnamed: 0,Cases,Date
0,7,2015-4-24
1,6,2015-5-22
2,1,2015-6-19
3,14,2015-7-17
4,0,2015-4-24
5,86,2015-5-22
6,3,2015-6-19
7,0,2015-7-17


In [11]:
mers_df["Date"] = pd.to_datetime(mers_df['Date'], errors='coerce')
mers_ct = mers_df.groupby(["Date"]).sum()
mers_ct

Unnamed: 0_level_0,Cases
Date,Unnamed: 1_level_1
2015-04-24,7
2015-05-22,92
2015-06-19,4
2015-07-17,14


In [12]:
#Merge with oil prices
oil_mers = pd.merge(df, mers_ct, how="right", on=["Date", "Date"])
oil_mers.head()

Unnamed: 0,Date,Brent_Spot_Price,Cases
0,2015-04-24,61.41,7
1,2015-05-22,64.31,92
2,2015-06-19,60.65,4
3,2015-07-17,57.17,14


In [13]:
#COVID-19 is next
covid_df = pd.read_csv("C:/Users/jason/D3-Visaulization/cleaned_data/In_progress/covid_daily_world.csv")
covid_df.head()

Unnamed: 0,Date,Country/Region,Confirmed,Deaths
0,2020-01-22,Hong Kong,0,0
1,2020-01-22,Japan,2,0
2,2020-01-22,Macao,1,0
3,2020-01-22,China,547,17
4,2020-01-22,South Korea,1,0


In [14]:
#Take only totals
covid_ct = covid_df[covid_df["Country/Region"].str.contains("Total")]
covid_ct.head()

Unnamed: 0,Date,Country/Region,Confirmed,Deaths
8,2020-01-22,Total,555,17
25,2020-01-23,Total,653,18
37,2020-01-24,Total,941,26
52,2020-01-25,Total,1438,42
68,2020-01-26,Total,2118,56


In [15]:
#Merge with oil
covid_ct["Date"] = pd.to_datetime(covid_ct['Date'], errors='coerce')
oil_covid = pd.merge(df, covid_ct, how="right", on=["Date", "Date"])
del oil_covid["Country/Region"]

oil_covid.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,Date,Brent_Spot_Price,Confirmed,Deaths
0,2020-01-24,62.2,941,26
1,2020-01-31,58.57,9925,213
2,2020-02-07,54.59,34392,719
3,2020-02-14,55.33,66887,1523
4,2020-02-21,58.61,76843,2251


In [16]:
#And finally, ebola
ebola_df = pd.read_csv("C:/Users/jason/D3-Visaulization/cleaned_data/In_progress/updated_ebola.csv")
ebola_df.head()

Unnamed: 0.1,Unnamed: 0,Country,Date,No. of confirmed cases,No. of confirmed deaths
0,0,Guinea,2014-08-29,482.0,287.0
1,3,Liberia,2014-08-29,322.0,225.0
2,0,Total,2014-08-29,1754.0,898.0
3,1,Nigeria,2014-08-29,15.0,6.0
4,2,Sierra Leone,2014-08-29,935.0,380.0


In [17]:
#Take only totals
ebola_ct = ebola_df[ebola_df["Country"].str.contains("Total")]
ebola_ct.head()

Unnamed: 0.1,Unnamed: 0,Country,Date,No. of confirmed cases,No. of confirmed deaths
2,0,Total,2014-08-29,1754.0,898.0
10,1,Total,2014-09-05,2383.0,1243.0
12,2,Total,2014-09-08,2552.0,1376.0
22,3,Total,2014-09-12,2639.0,1386.0
23,4,Total,2014-09-16,3017.0,1513.0


In [18]:
#Merge with oil
ebola_ct["Date"] = pd.to_datetime(ebola_ct['Date'], errors='coerce')
oil_ebola = pd.merge(df, ebola_ct, how="right", on=["Date", "Date"])
del oil_ebola["Unnamed: 0"]

oil_ebola.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,Date,Brent_Spot_Price,Country,No. of confirmed cases,No. of confirmed deaths
0,2014-08-29,100.64,Total,1754.0,898.0
1,2014-09-05,100.45,Total,2383.0,1243.0
2,2014-09-12,97.32,Total,2639.0,1386.0
3,2014-09-26,95.01,Total,3626.0,1837.0
4,2014-10-03,93.41,Total,4108.0,2078.0
