#### **Data exploratory**
In this step we import the dataset, and perform some data manipulation in order to get familiar with the data at hand.

In [2]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

data_transformed = pd.read_csv('Covid-19 impacts on global economy/dataset/transformed_data.csv')
data_raw = pd.read_csv('Covid-19 impacts on global economy/dataset/raw_data.csv')

In [3]:
data_transformed.head()

Unnamed: 0,CODE,COUNTRY,DATE,HDI,TC,TD,STI,POP,GDPCAP
0,AFG,Afghanistan,2019-12-31,0.498,0.0,0.0,0.0,17.477233,7.497754
1,AFG,Afghanistan,2020-01-01,0.498,0.0,0.0,0.0,17.477233,7.497754
2,AFG,Afghanistan,2020-01-02,0.498,0.0,0.0,0.0,17.477233,7.497754
3,AFG,Afghanistan,2020-01-03,0.498,0.0,0.0,0.0,17.477233,7.497754
4,AFG,Afghanistan,2020-01-04,0.498,0.0,0.0,0.0,17.477233,7.497754


In [4]:
data_raw.head()

Unnamed: 0,iso_code,location,date,total_cases,total_deaths,stringency_index,population,gdp_per_capita,human_development_index,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13
0,AFG,Afghanistan,2019-12-31,0.0,0.0,0.0,38928341,1803.987,0.498,#NUM!,#NUM!,#NUM!,17.477233,7.497754494
1,AFG,Afghanistan,2020-01-01,0.0,0.0,0.0,38928341,1803.987,0.498,#NUM!,#NUM!,#NUM!,17.477233,7.497754494
2,AFG,Afghanistan,2020-01-02,0.0,0.0,0.0,38928341,1803.987,0.498,#NUM!,#NUM!,#NUM!,17.477233,7.497754494
3,AFG,Afghanistan,2020-01-03,0.0,0.0,0.0,38928341,1803.987,0.498,#NUM!,#NUM!,#NUM!,17.477233,7.497754494
4,AFG,Afghanistan,2020-01-04,0.0,0.0,0.0,38928341,1803.987,0.498,#NUM!,#NUM!,#NUM!,17.477233,7.497754494


As presented above, we have two datasets,the `raw dataset` and the `transformed one`. We can see that the raw dataset contains more more columns and usefuls ones, thus we are going to combine both datasets.

In [5]:
# We can get the value
data_transformed['COUNTRY'].value_counts().mode()

0    294
dtype: int64

In [6]:
country_code = data_transformed['CODE'].unique().tolist()
country = data_transformed['COUNTRY'].unique().tolist()
population =  data_transformed['POP'].unique().tolist()
# population = []
date = data_transformed['DATE'].unique().tolist()
hdi = []
tc = []
td = []
sti = []
gpd = []

for element in country:
    hdi.append((data_transformed.loc[data_transformed['COUNTRY'] == element, 'HDI']).sum()/294)
    tc.append((data_transformed.loc[data_transformed['COUNTRY'] == element, 'TC']).sum())
    td.append((data_transformed.loc[data_transformed['COUNTRY']== element, 'TD']).sum())
    sti.append((data_transformed.loc[data_transformed['COUNTRY']== element, 'STI']).sum()/294)
    population.append((data_transformed.loc[data_transformed['COUNTRY'] == element, 'POP']).sum())

combined_data = pd.DataFrame(list(zip(country_code, country, hdi, tc, td, sti, population)), columns=
                                ["Country Code", "Country", "HDI", 
                                "Total Cases", "Total Death",
                                "Stringency Index", "Population"])

combined_data.head()

Unnamed: 0,Country Code,Country,HDI,Total Cases,Total Death,Stringency Index,Population
0,AFG,Afghanistan,0.498,2000.646094,1226.948181,3.049673,17.477233
1,ALB,Albania,0.600765,1702.240756,929.3688,3.005624,14.872537
2,DZA,Algeria,0.754,2052.510847,1406.216387,3.195168,17.596309
3,AND,Andorra,0.659551,1465.82825,786.655112,2.677654,11.254996
4,AGO,Angola,0.418952,1203.978763,590.138675,2.96556,17.307957


In [23]:
country_code = data_transformed['CODE'].unique().tolist()
country = data_transformed['COUNTRY'].unique().tolist()
population =  data_transformed['POP'].unique().tolist()
# population = []
date = data_transformed['DATE'].unique().tolist()
hdi = []
tc = []
td = []
sti = []
gpd = []

for element in country:
    hdi.append((data_transformed.loc[data_transformed['COUNTRY'] == element, 'HDI']).sum()/294)
    tc.append((data_raw.loc[data_raw['location'] == element, 'total_cases']).sum())
    td.append((data_raw.loc[data_raw['location']== element, 'total_deaths']).sum())
    sti.append((data_transformed.loc[data_transformed['COUNTRY']== element, 'STI']).sum()/294)
    population.append((data_raw.loc[data_raw['location'] == element, 'population']).sum())
    date.append((data_raw.loc[data_raw['location']== element, 'date']))

combined_data = pd.DataFrame(list(zip(country_code, country, hdi, tc, td, date, sti, population)), columns=
                                ["Country Code", "Country", "HDI", 
                                "Total Cases", "Total Death", "Date",
                                "Stringency Index", "Population"])

combined_data.head()

Unnamed: 0,Country Code,Country,HDI,Total Cases,Total Death,Date,Stringency Index,Population
0,AFG,Afghanistan,0.498,5126433.0,165875.0,2019-12-31,3.049673,17.477233
1,ALB,Albania,0.600765,1071951.0,31056.0,2020-01-01,3.005624,14.872537
2,DZA,Algeria,0.754,4893999.0,206429.0,2020-01-02,3.195168,17.596309
3,AND,Andorra,0.659551,223576.0,9850.0,2020-01-03,2.677654,11.254996
4,AGO,Angola,0.418952,304005.0,11820.0,2020-01-04,2.96556,17.307957


In [44]:
# Sort Combined data foolowing the total number of covid cases 
data = combined_data.sort_values(by=['Total Cases'], ascending=False)

In [59]:
# geting the countries with the first 10 countries with the highest covid case
sorted_data = data.head(10)
print(sorted_data)

    Country Code         Country       HDI  Total Cases  Total Death  \
200          USA   United States  0.924000  746014098.0   26477574.0   
27           BRA          Brazil  0.759000  425704517.0   14340567.0   
90           IND           India  0.640000  407771615.0    7247327.0   
157          RUS          Russia  0.816000  132888951.0    2131571.0   
150          PER            Peru  0.599490   74882695.0    3020038.0   
125          MEX          Mexico  0.774000   74347548.0    7295850.0   
178          ESP           Spain  0.887969   73717676.0    5510624.0   
175          ZAF    South Africa  0.608653   63027659.0    1357682.0   
42           COL        Colombia  0.581847   60543682.0    1936134.0   
199          GBR  United Kingdom  0.922000   59475032.0    7249573.0   

           Date  Stringency Index  Population  
200  2020-07-18          3.350949   19.617637  
27   2020-01-27          3.136028   19.174732  
90   2020-03-30          3.610552   21.045353  
157  2020-06-05

### Adding the GPD per capita Before and After Covid-19 for the countries with highest cases

In [60]:
GPGBeforeCovid = [65279.53, 8897.49, 2100.75, 11497.65, 7027.61, 9946.03,29564.74, 6001.40, 6424.98, 42354.41]
GPGAfterCovid = [63543.58, 6796.84, 1900.71, 10126.72, 6126.87, 8346.70, 27057.16, 5090.72, 5332.77, 40284.64]

sorted_data.insert(5,'GPG Before Covid', GPGBeforeCovid, True)
sorted_data.insert(6,'GPG After Covid', GPGAfterCovid, True)

# sorted_data['GPA Before Covid'] = GPGBeforeCovid
# sorted_data['GPA After Covid'] = GPGAfterCovid

print(sorted_data)

    Country Code         Country       HDI  Total Cases  Total Death  \
200          USA   United States  0.924000  746014098.0   26477574.0   
27           BRA          Brazil  0.759000  425704517.0   14340567.0   
90           IND           India  0.640000  407771615.0    7247327.0   
157          RUS          Russia  0.816000  132888951.0    2131571.0   
150          PER            Peru  0.599490   74882695.0    3020038.0   
125          MEX          Mexico  0.774000   74347548.0    7295850.0   
178          ESP           Spain  0.887969   73717676.0    5510624.0   
175          ZAF    South Africa  0.608653   63027659.0    1357682.0   
42           COL        Colombia  0.581847   60543682.0    1936134.0   
199          GBR  United Kingdom  0.922000   59475032.0    7249573.0   

     GPG Before Covid  GPG After Covid        Date  Stringency Index  \
200          65279.53         63543.58  2020-07-18          3.350949   
27            8897.49          6796.84  2020-01-27          3.1

#### Analyzing the Spread of Covid-19

In [81]:
figure = px.bar(sorted_data, x = 'Country', y = 'Total Cases', hover_data=['Date'], color = 'Country Code')
figure.update_layout(title = 'Highest Covid Cases per Countries', title_x = 0.5)
figure.show()

In [82]:
figure = px.bar(sorted_data, x = 'Country', y = 'Total Death', hover_data=['Date'], color = 'Country Code')
figure.update_layout(title = 'Highest Covid Death per Countries', title_x = 0.5)
figure.show()