# Python for Data Analysis Project 
## Datasets: COVID-19


## 1. Import of libraries, Loading of Datasets, wrangling and cleaning

In [None]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
#!pip3 install --upgrade pip
#!pip3 install --upgrade seaborn cufflinks plotly
#!pip install plotly --upgrade
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import seaborn as sns
from datetime import datetime
%matplotlib inline
import plotly.express as px
from plotly.offline import plot
import plotly.graph_objs as go
import folium
from plotly.subplots import make_subplots
#conda install -c conda-forge folium

In [None]:
countries_aggregated = pd.read_csv("https://raw.githubusercontent.com/datasets/covid-19/master/data/countries-aggregated.csv")
countries_aggregated.head()


In [None]:
print(countries_aggregated.shape)
countries_aggregated.head()

In [None]:
countries_aggregated.isna().sum()

In [None]:
reference = pd.read_csv("https://raw.githubusercontent.com/datasets/covid-19/master/data/reference.csv")
print(reference.shape)
reference.head()

In [None]:
reference.isna().sum()

In [None]:
reference['iso2'] = reference['iso2'].fillna(0)
reference['iso3'] = reference['iso3'].fillna(0)
reference['code3'] = reference['code3'].fillna(0)
reference['Admin2'] = reference['Admin2'].fillna(0)
reference['Lat'] = reference['Lat'].fillna(0)
reference['Long_'] = reference['Long_'].fillna(0)
reference['FIPS'] = reference['FIPS'].fillna(0)
reference['Population'] = reference['Population'].fillna(0)
reference['Province_State'] = reference['Province_State'].fillna(0) # Assuming here 0 for the missing values and province/state

In [None]:
reference.isna().sum()

In [None]:
reference.tail()

In [None]:
reference=reference.rename(columns={'Country_Region' : 'Country'})

In [None]:
reference.head(4)

In [None]:
Countries_reference = pd.merge(left =countries_aggregated, right = reference, on = 'Country') # Merged countries_aggregated and reference on country

In [None]:
Countries_reference.head()

In [None]:
Countries_reference.columns

In [None]:
Countries_reference.shape

### Formulating Active and Closed cases columns

In [None]:
# Active Case = confirmed - deaths - recovered
Countries_reference['Active'] = Countries_reference['Confirmed'] - Countries_reference['Deaths'] - Countries_reference['Recovered']

In [None]:
countries_aggregated['Active'] = countries_aggregated['Confirmed'] - countries_aggregated['Deaths'] - countries_aggregated['Recovered']

In [None]:
#Closed Cases = Number of Recovered Cases + Number of Death Case
Countries_reference['Closed'] = Countries_reference['Recovered'] + Countries_reference['Deaths']

In [None]:
countries_aggregated['Closed'] = countries_aggregated['Recovered'] + countries_aggregated['Deaths']

In [None]:
Countries_reference.head()

In [None]:
countries_aggregated.head()

## 2. Trend of Cases Globally (Confirmed, Recovered etc
    Line plot(Matplotlib)

In [None]:
countries_aggregated.groupby("Date")["Confirmed"].sum()

In [None]:
plt.figure(figsize=(10,7))
plt.plot(countries_aggregated.groupby("Date")["Confirmed"].sum(),color="Black")

plt.xlabel("Date")
plt.ylabel("Confirmed")
plt.title("Total Confirmed cases trend in the world")
plt.xticks(["2020-01-26", "2020-02-26","2020-03-26","2020-04-26" ,"2020-06-3",],rotation=45,
           horizontalalignment='right',fontsize='15')
plt.show() 

Trend of Recovered cases worldwide

In [None]:
countries_aggregated.groupby("Date")["Recovered"].sum()

In [None]:
plt.figure(figsize=(10,7))
plt.plot(countries_aggregated.groupby("Date")["Recovered"].sum(),color="Blue")
plt.xlabel("Date")
plt.ylabel("Recovered")
plt.title("Total Recovered cases trend in the world")
plt.xticks(["2020-01-26", "2020-02-26","2020-03-26","2020-04-26" ,"2020-06-3",],rotation=45,
           horizontalalignment='right',fontsize='15')
plt.show() 

Trend of Active Cases worldwide

In [None]:
countries_aggregated.groupby("Date")["Active"].sum()

In [None]:
plt.figure(figsize=(10,7))
plt.plot(countries_aggregated.groupby("Date")["Active"].sum(),color="Purple")
plt.xlabel("Date")
plt.ylabel("Active")
plt.title("Total Active cases trend in the world")
plt.xticks(["2020-01-26", "2020-02-26","2020-03-26","2020-04-26" ,"2020-06-3",],rotation=45,
           horizontalalignment='right',fontsize='15')
plt.show() 

Trend of Death cases worlwide

In [None]:
countries_aggregated.groupby("Date")["Deaths"].sum()

In [None]:
plt.figure(figsize=(10,7))
plt.plot(countries_aggregated.groupby("Date")["Deaths"].sum(),color="Red")
plt.xlabel("Date")
plt.ylabel("Deaths")
plt.title("Total Deaths cases trend in the world")
plt.xticks(["2020-01-26", "2020-02-26","2020-03-26","2020-04-26" ,"2020-06-3",],rotation=45,
           horizontalalignment='right',fontsize='15')
plt.show() 

## 3. Area plot overview of Cases over time
used plotly express: A fast rising Python library for cool visualizations

In [None]:
aggregated= countries_aggregated.groupby('Date')['Confirmed', 'Recovered', 'Deaths', 'Active', 'Closed'].sum().reset_index()
aggregated = aggregated.melt(id_vars="Date", value_vars=['Confirmed', 'Recovered', 'Deaths', 'Active', 'Closed'],
                 var_name='Cases', value_name='Count')
aggregated.head()

fig = px.area(aggregated, x="Date", y="Count", color='Cases', height=700, color_discrete_sequence = ['#19D3F3', '#FF6692', '#B6E880', '#FF97FF', '#FECB52'], 
             title='Area plot of Cases over time')
fig.update_layout(xaxis_rangeslider_visible=True)
fig.show()

## 4. Heatmap of cases with countries(Active, Confirmed etc)
 Used plotly graph: Also an amazing python library for visualizations

In [None]:
fig = go.Figure(data=go.Heatmap(
        z=countries_aggregated['Active'],
        x=countries_aggregated['Date'],
        y=countries_aggregated['Country'],
        colorscale='Blackbody'
        
        ))

fig.update_layout(yaxis = dict(dtick = 1))
fig.update_layout(height=2000)
fig.show()

In [None]:
fig = go.Figure(data=go.Heatmap(
        z=countries_aggregated['Confirmed'],
        x=countries_aggregated['Date'],
        y=countries_aggregated['Country'],
        colorscale='Bluered'
        
        ))

fig.update_layout(yaxis = dict(dtick = 1))
fig.update_layout(height=2000)
fig.show()

In [None]:
fig = go.Figure(data=go.Heatmap(
        z=countries_aggregated['Recovered'],
        x=countries_aggregated['Date'],
        y=countries_aggregated['Country'],
        colorscale='Rainbow'
        
      ))

fig.update_layout(yaxis = dict(dtick = 1))
fig.update_layout(height=2000)
fig.show()

In [None]:
fig = go.Figure(data=go.Heatmap(
        z=countries_aggregated['Deaths'],
        x=countries_aggregated['Date'],
        y=countries_aggregated['Country'],
       colorscale='YlOrRd'
        
        ))

fig.update_layout(yaxis = dict(dtick = 1))
fig.update_layout(height=2000)
fig.show()

In [None]:
fig = go.Figure(data=go.Heatmap(
        z=countries_aggregated['Closed'],
        x=countries_aggregated['Date'],
        y=countries_aggregated['Country'],
       colorscale='Jet'
        
        ))

fig.update_layout(yaxis = dict(dtick = 1))
fig.update_layout(height=2000)
fig.show()

## 5. Folium Map overview of cases worldwide
-Used Folium

In [None]:
aggregated = Countries_reference[Countries_reference['Date'] == max(Countries_reference['Date'])]

A = folium.Map(location=[0, 0], tiles='cartodbpositron',
               min_zoom=1, max_zoom=2, zoom_start=1)

for i in range(0, len(aggregated)):
    folium.Circle(
        location=[aggregated.iloc[i]['Lat'], aggregated.iloc[i]['Long_']],
        color='crimson', fill='crimson',
        tooltip =   '<li><bold>Country : '+str(aggregated.iloc[i]['Country'])+
                    '<li><bold>Recovered : '+str(aggregated.iloc[i]['Recovered'])+
                    '<li><bold>Active : '+str(aggregated.iloc[i]['Active'])+
                  '<li><bold>Closed : '+str(aggregated.iloc[i]['Closed'])+
                    '<li><bold>Confirmed : '+str(aggregated.iloc[i]['Confirmed'])+
                    '<li><bold>Deaths : '+str(aggregated.iloc[i]['Deaths']),
        radius=int(aggregated.iloc[i]['Confirmed'])**0.5).add_to(A)
A


## 6. Bar graph overview of Cases with plotly (Confirmed, Recovered, Deaths etc)

In [None]:
fig = px.bar(countries_aggregated, x="Date", y="Confirmed", color='Country', height=400,
             title='Confirmed', color_discrete_sequence = px.colors.cyclical.mygbm)
fig.show()

In [None]:
fig = px.bar(countries_aggregated, x="Date", y="Recovered", color='Country', height=400,
             title='Recovered cases', color_discrete_sequence = px.colors.cyclical.mygbm)
fig.show()

In [None]:
fig = px.bar(countries_aggregated, x="Date", y="Deaths", color='Country', height=400,
             title='Deaths', color_discrete_sequence = px.colors.cyclical.mygbm)
fig.show()

In [None]:
fig = px.bar(countries_aggregated, x="Date", y="Active", color='Country', height=400,
             title='Active cases', color_discrete_sequence = px.colors.cyclical.mygbm)
fig.show()

In [None]:
fig = px.bar(countries_aggregated, x="Date", y="Closed", color='Country', height=400,
             title='Closed cases', color_discrete_sequence = px.colors.cyclical.mygbm)
fig.show()

## 7. Looking at Top 15 countries with Most/Least cases
-Confirmed, Recovered, Deaths etc
Table, bar chart & Scatter chart with markers (plotly graph),


In [None]:
per_country = countries_aggregated.groupby(["Country"])["Confirmed"].max().reset_index().sort_values("Confirmed",ascending=False).reset_index(drop=True)


In [None]:
headerColor = 'grey'
rowEvenColor = 'lightgrey'
rowOddColor = 'white'

fig = go.Figure(data=[go.Table(
  header=dict(
    values=['<b>Country</b>','<b>Confirmed</b>'],
    line_color='darkslategray',
    fill_color=headerColor,
    align=['left','center'],
    font=dict(color='white', size=12)
  ),
  cells=dict(
    values=[
       per_country['Country'],
       per_country['Confirmed'],
      ],
    line_color='darkslategray',
    
    fill_color = [[rowOddColor,rowEvenColor,rowOddColor, rowEvenColor,rowOddColor]*len(per_country)],
    align = ['left', 'center'],
    font = dict(color = 'darkslategray', size = 11)
    ))
])
fig.update_layout(
    title='Confirmed Cases In Each Country',
)
fig.show()

In [None]:
fig = px.choropleth(per_country, locations=per_country['Country'],
                    color=per_country['Confirmed'],locationmode='country names', 
                    hover_name=per_country['Country'], 
                    #color_continuous_scale=px.colors.sequential.deep)
                    color_continuous_scale=px.colors.sequential.Viridis)
fig.update_layout(
    title='Map of Confirmed Cases In Each Country',
)
fig.show()

In [None]:
fig = go.Figure(data=[go.Bar(
            x=per_country['Country'][0:15], y=per_country['Confirmed'][0:15],
            text=per_country['Confirmed'][0:15],
            textposition='auto',
            marker_color='goldenrod'

        )])
fig.update_layout(
    title='The 15 COVID-19 most infected Countries',
    xaxis_title="Countries",
    yaxis_title="Confirmed Cases",
)
fig.show()

In [None]:
fig = go.Figure(data=[go.Scatter(
    x=per_country['Country'][0:15],
    y=per_country['Confirmed'][0:15],
    mode='markers',
    marker=dict(
        color=100+np.random.randn(500),
        size=(per_country['Confirmed'][0:15]/5000),
        showscale=True
        )
)])
fig.update_layout(
    title='The 15 COVID-19 most infected Countries',
    xaxis_title="Countries",
    yaxis_title="Confirmed Cases",
)
fig.show()


In [None]:
fig = go.Figure(data=[go.Bar(
            x=per_country['Country'][-15:], y=per_country['Confirmed'][-15:],
            text=per_country['Confirmed'][-15:],
            textposition='auto',
            marker_color='blue'

        )])
fig.update_layout(
    title='The COVID-19 Least 15 infected Countries',
    xaxis_title="Countries",
    yaxis_title="Confirmed Cases",
)
fig.show()

In [None]:
fig = go.Figure(data=[go.Scatter(
    x=per_country['Country'][-15:],
    y=per_country['Confirmed'][-15:],
    mode='markers',
    marker=dict(
        color=100+np.random.randn(500),
        size=(per_country['Confirmed'][-15:]*5),
        showscale=True
        )
)])
fig.update_layout(
    title='The COVID-19 Least 15 infected Countries',
    xaxis_title="Countries",
    yaxis_title="Confirmed Cases",
)
fig.show()

In [None]:
death_country = countries_aggregated.groupby(["Country"])["Deaths"].max().reset_index().sort_values("Deaths",ascending=False).reset_index(drop=True)


In [None]:
fig = go.Figure(data=[go.Bar(
            x=death_country['Country'][0:15], y=death_country['Deaths'][0:15],
            text=death_country['Deaths'][0:15],
            textposition='auto',
            marker_color='brown'

        )])
fig.update_layout(
    title='The COVID-19 15 Countries with most Deaths',
    xaxis_title="Countries",
    yaxis_title="Death Cases",
)
fig.show()

In [None]:
fig = go.Figure(data=[go.Bar(
            x=death_country['Country'][-15:], y=death_country['Deaths'][-15:],
            text=death_country['Deaths'][-15:],
            textposition='auto',
            marker_color='brown'

        )])
fig.update_layout(
    title='The 15 least death Countries',
    xaxis_title="Countries",
    yaxis_title="Death Cases",
)
fig.show()

In [None]:
recover_country = countries_aggregated.groupby(["Country"])["Recovered"].max().reset_index().sort_values("Recovered",ascending=False).reset_index(drop=True)


In [None]:
fig = go.Figure(data=[go.Bar(
            x=recover_country['Country'][0:15], y=recover_country['Recovered'][0:15],
            text=recover_country['Recovered'][0:15],
            textposition='auto',
            marker_color='green'

        )])
fig.update_layout(
    title='The COVID-19 Top 15 most recovered Countries',
    xaxis_title="Countries",
    yaxis_title="Recovered Cases",
)
fig.show()

## 8. Country specific overview and trend of the cases
Germany, US, Nigeria, China

In [None]:
Germany= countries_aggregated[(countries_aggregated['Country'] == 'Germany') ].reset_index(drop=True)
Germany.head()

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=Germany['Date'], y=Germany['Confirmed'],
                    mode='lines',
                    name='Confirmed'))

fig.add_trace(go.Scatter(x=Germany['Date'], y=Germany['Active'],
                    mode='lines',
                    name='Active',line=dict( dash='dot')))
fig.add_trace(go.Scatter(x=Germany['Date'], y=Germany['Deaths'],name='Deaths',
                                   marker_color='black',mode='lines',line=dict( dash='dot') ))
fig.add_trace(go.Scatter(x=Germany['Date'], y=Germany['Recovered'],
                    mode='lines',
                    name='Recovered',marker_color='green'))
fig.add_trace(go.Scatter(x=Germany['Date'], y=Germany['Closed'],
                    mode='lines',
                    name='Closed',marker_color='yellow'))
fig.update_layout(
    title='Trend and Evolution of cases over time in Germany',
)

fig.show()


In [None]:
Us= countries_aggregated[(countries_aggregated['Country'] == 'US') ].reset_index(drop=True)
Us.head()

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=Us['Date'], y=Us['Confirmed'],
                    mode='lines',
                    name='Confirmed'))

fig.add_trace(go.Scatter(x=Us['Date'], y=Us['Active'],
                    mode='lines',
                    name='Active',line=dict( dash='dot')))
fig.add_trace(go.Scatter(x=Us['Date'], y=Us['Deaths'],name='Deaths',
                                   marker_color='black',mode='lines',line=dict( dash='dot') ))
fig.add_trace(go.Scatter(x=Us['Date'], y=Us['Recovered'],
                    mode='lines',
                    name='Recovered',marker_color='green'))
fig.add_trace(go.Scatter(x=Us['Date'], y=Us['Closed'],
                    mode='lines',
                    name='Closed',marker_color='yellow'))
fig.update_layout(
    title='Trend and Evolution of cases over time in United States',
)

fig.show()

In [None]:
Nigeria= countries_aggregated[(countries_aggregated['Country'] == 'Nigeria') ].reset_index(drop=True)
Nigeria.head()

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=Nigeria['Date'], y=Nigeria['Confirmed'],
                    mode='lines',
                    name='Confirmed'))

fig.add_trace(go.Scatter(x=Nigeria['Date'], y=Nigeria['Active'],
                    mode='lines',
                    name='Active',line=dict( dash='dot')))
fig.add_trace(go.Scatter(x=Nigeria['Date'], y=Nigeria['Deaths'],name='Deaths',
                                   marker_color='black',mode='lines',line=dict( dash='dot') ))
fig.add_trace(go.Scatter(x=Nigeria['Date'], y=Nigeria['Recovered'],
                    mode='lines',
                    name='Recovered',marker_color='green'))
fig.add_trace(go.Scatter(x=Nigeria['Date'], y=Nigeria['Closed'],
                    mode='lines',
                    name='Closed',marker_color='yellow'))
fig.update_layout(
    title='Trend and Evolution of cases over time in Nigeria',
)

fig.show()

In [None]:
China= countries_aggregated[(countries_aggregated['Country'] == 'China') ].reset_index(drop=True)
China.head()

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=China['Date'], y=China['Confirmed'],
                    mode='lines',
                    name='Confirmed'))

fig.add_trace(go.Scatter(x=China['Date'], y=China['Active'],
                    mode='lines',
                    name='Active',line=dict( dash='dot')))
fig.add_trace(go.Scatter(x=China['Date'], y=China['Deaths'],name='Deaths',
                                   marker_color='black',mode='lines',line=dict( dash='dot') ))
fig.add_trace(go.Scatter(x=China['Date'], y=China['Recovered'],
                    mode='lines',
                    name='Recovered',marker_color='green'))
fig.add_trace(go.Scatter(x=China['Date'], y=China['Closed'],
                    mode='lines',
                    name='Closed',marker_color='yellow'))
fig.update_layout(
    title='Trend and Evolution of cases over time in China',
)

fig.show()

## 8.Correlation of the case variables

In [None]:
corr = countries_aggregated.corr()
corr.style.background_gradient(cmap='coolwarm') #Pandas, built-in fxn

In [None]:
Cases_cor = ['Confirmed','Recovered', 'Deaths', 'Active', 'Closed']
fig, ax = plt.subplots(figsize=(10,10)) 
sns.heatmap(Countries_reference[Cases_cor].corr(), annot = True, fmt = '.2f')
plt.show()

In [None]:
corr = countries_aggregated.corr()
mask = np.triu(np.ones_like(corr, dtype=np.bool))
, ax = plt.subplots(figsize=(15, 8))
cmap = sns.diverging_palette(150, 5, as_cmap=True)
sns.heatmap(corr, mask=mask, cmap=cmap, vmax=1, center=0,
            square=True, linewidths=0.6, cbar_kws={"shrink": 0.5})

In [None]:
grid = sns.pairplot(countries_aggregated) # Scatter plot matrix of the variables
grid.fig.suptitle('Pair of COVID-19 Case variables', y=1.05)

## 9. Loading of Additional Datasets with Testing/other variables
-Testing and other interesting variables
-Sourced from United Nations, World Bank etc
-Github link to the datasets


In [None]:
#Testing = pd.read_csv("https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/testing/covid-testing-all-observations.csv")
#Testing.head()

In [None]:
world_data= pd.read_csv("https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/owid-covid-data.csv")
world_data.head()

In [None]:
world_data.isna().sum() 

In [None]:
world_data['iso_code'] = world_data['iso_code'].fillna(0)
world_data['continent'] = world_data['continent'].fillna(0)
world_data['total_cases_per_million'] = world_data['total_cases_per_million'].fillna(0)
world_data['new_cases_per_million'] = world_data['new_cases_per_million'].fillna(0)
world_data['total_deaths_per_million'] = world_data['total_deaths_per_million'].fillna(0)
world_data['new_deaths_per_million'] = world_data['new_deaths_per_million'].fillna(0)
world_data['total_tests'] = world_data['total_tests'].fillna(0)
world_data['new_tests'] = world_data['new_tests'].fillna(0)
world_data['total_tests_per_thousand'] = world_data['total_tests_per_thousand'].fillna(0)
world_data['new_tests_smoothed'] = world_data['new_tests_smoothed'].fillna(0)
world_data['new_tests_smoothed_per_thousand'] = world_data['new_tests_smoothed_per_thousand'].fillna(0)
world_data['tests_units'] = world_data['tests_units'].fillna(0)
world_data['stringency_index'] = world_data['stringency_index'].fillna(0)
world_data['population'] = world_data['population'].fillna(0)
world_data['population_density'] = world_data['population_density'].fillna(0)
world_data['median_age'] = world_data['median_age'].fillna(0)
world_data['aged_65_older'] = world_data['aged_65_older'].fillna(0)
world_data['aged_70_older'] = world_data['aged_70_older'].fillna(0)
world_data['gdp_per_capita'] = world_data['gdp_per_capita'].fillna(0)
world_data['extreme_poverty'] = world_data['extreme_poverty'].fillna(0)
world_data['cvd_death_rate'] = world_data['cvd_death_rate'].fillna(0)
world_data['diabetes_prevalence'] = world_data['diabetes_prevalence'].fillna(0)
world_data['female_smokers'] = world_data['female_smokers'].fillna(0)
world_data['male_smokers'] = world_data['male_smokers'].fillna(0)
world_data['handwashing_facilities'] = world_data['handwashing_facilities'].fillna(0)
world_data['hospital_beds_per_thousand'] = world_data['hospital_beds_per_thousand'].fillna(0)
world_data['new_tests_per_thousand'] = world_data['new_tests_per_thousand'].fillna(0)

In [None]:
world_data.isna().sum()

In [None]:
world_data = world_data.rename(columns = {'iso_code' : 'iso3'})

In [None]:
world_reference = pd.merge(left = world_data, right=reference, on = 'iso3')

In [None]:
world_reference.head()

In [None]:
world_reference.columns

In [None]:
world_reference["Country"].nunique()

In [None]:
world_reference.groupby("date")["total_tests"].sum()

## 10. Looking at Top 15 countries
-With the most/least situations of the variables; Testing,

In [None]:
test_country = world_reference.groupby(["Country"])["total_tests"].max().reset_index().sort_values("total_tests",ascending=False).reset_index(drop=True)


In [None]:
headerColor = 'grey'
rowEvenColor = 'lightgrey'
rowOddColor = 'white'

fig = go.Figure(data=[go.Table(
  header=dict(
    values=['<b>Country</b>','<b>total_tests</b>'],
    line_color='darkslategray',
    fill_color=headerColor,
    align=['left','center'],
    font=dict(color='white', size=12)
  ),
  cells=dict(
    values=[
       test_country['Country'],
       test_country['total_tests'],
      ],
    line_color='darkslategray',
    
    fill_color = [[rowOddColor,rowEvenColor,rowOddColor, rowEvenColor,rowOddColor]*len(test_country)],
    align = ['left', 'center'],
    font = dict(color = 'darkslategray', size = 11)
    ))
])
fig.update_layout(
    title='Total tests Carried out In Each Country',
)
fig.show()

In [None]:
fig = go.Figure(data=[go.Bar(
            x=test_country['Country'][0:15], y=test_country['total_tests'][0:15],
            text=test_country['total_tests'][0:15],
            textposition='auto',
            marker_color='green'

        )])
fig.update_layout(
    title='The 15 most Tested Countries',
    xaxis_title="Countries",
    yaxis_title="Total tests",
)
fig.show()

In [None]:
fig = go.Figure(data=[go.Bar(
            x=test_country['Country'][-15:], y=test_country['total_tests'][-15:],
            text=test_country['total_tests'][-15:],
            textposition='auto',
            marker_color='brown'

        )])
fig.update_layout(
    title='Least 15 Tested Countries',
    xaxis_title="Countries",
    yaxis_title="Total Tests",
)
fig.show()

In [None]:
world_reference.columns

In [None]:
world_reference.groupby("date")["new_cases"].sum()

In [None]:
plt.figure(figsize=(10,7))
plt.plot(world_reference.groupby("date")["new_cases"].sum(),color="Brown")
plt.xlabel("date")
plt.ylabel("new_cases")
plt.title("new cases trend in the world")
plt.xticks(["2020-01-26", "2020-02-26","2020-03-26","2020-04-26" ,"2020-06-4",],rotation=45,
           horizontalalignment='right',fontsize='15')
plt.show() 

In [None]:
hand_washing = world_reference.groupby(["Country"])["handwashing_facilities"].max().reset_index().sort_values("handwashing_facilities",ascending=False).reset_index(drop=True)


In [None]:
fig = go.Figure(data=[go.Bar(
            x=hand_washing['Country'][0:15], y=hand_washing['handwashing_facilities'][0:15],
            text=hand_washing['handwashing_facilities'][0:15],
            textposition='auto',
            marker_color='blue'

        )])
fig.update_layout(
    title='The 15 most handwashing_facilities Countries',
    xaxis_title="Countries",
    yaxis_title="handwashing_facilities",
)
fig.show()

In [None]:
fig = go.Figure(data=[go.Bar(
            x=hand_washing['Country'][-15:], y=hand_washing['handwashing_facilities'][-15:],
            text=hand_washing['handwashing_facilities'][-15:],
            textposition='auto',
            marker_color='red'

        )])
fig.update_layout(
    title='The 15 least handwashing_facilities Countries',
    xaxis_title="Countries",
    yaxis_title="handwashing_facilities",
)
fig.show()

In [None]:
female_smokers = world_reference.groupby(["Country"])["female_smokers"].max().reset_index().sort_values("female_smokers",ascending=False).reset_index(drop=True)


In [None]:
fig = go.Figure(data=[go.Bar(
            x=female_smokers['Country'][0:15], y=female_smokers['female_smokers'][0:15],
            text=female_smokers['female_smokers'][0:15],
            textposition='auto',
            marker_color='pink'

        )])
fig.update_layout(
    title='The 15 most female_smokers Countries',
    xaxis_title="Countries",
    yaxis_title="female_smokers",
)
fig.show()

In [None]:
fig = go.Figure(data=[go.Bar(
            x=female_smokers['Country'][-15:], y=female_smokers['female_smokers'][-15:],
            text=female_smokers['female_smokers'][-15:],
            textposition='auto',
            marker_color='blue'

        )])
fig.update_layout(
    title='The 15 least female_smokers Countries',
    xaxis_title="Countries",
    yaxis_title="female_smokers",
)
fig.show()

In [None]:
male_smokers = world_reference.groupby(["Country"])["male_smokers"].max().reset_index().sort_values("male_smokers",ascending=False).reset_index(drop=True)


In [None]:
fig = go.Figure(data=[go.Bar(
            x=male_smokers['Country'][0:15], y=male_smokers['male_smokers'][0:15],
            text=male_smokers['male_smokers'][0:15],
            textposition='auto',
            marker_color='gray'

        )])
fig.update_layout(
    title='The 15 most male_smokers Countries',
    xaxis_title="Countries",
    yaxis_title="male_smokers",
)
fig.show()

In [None]:
fig = go.Figure(data=[go.Bar(
            x=male_smokers['Country'][-15:], y=male_smokers['male_smokers'][-15:],
            text=male_smokers['male_smokers'][-15:],
            textposition='auto',
            marker_color='blue'

        )])
fig.update_layout(
    title='The 15 least male_smokers Countries',
    xaxis_title="Countries",
    yaxis_title="male_smokers",
)
fig.show()

In [None]:
diabetes= world_reference.groupby(["Country"])["diabetes_prevalence"].max().reset_index().sort_values("diabetes_prevalence",ascending=False).reset_index(drop=True)


In [None]:
fig = go.Figure(data=[go.Bar(
            x=diabetes['Country'][0:15], y=diabetes['diabetes_prevalence'][0:15],
            text=diabetes['diabetes_prevalence'][0:15],
            textposition='auto',
            marker_color='purple'

        )])
fig.update_layout(
    title='The 15 most diabetes_prevalence Countries',
    xaxis_title="Countries",
    yaxis_title="diabetes_prevalences",
)
fig.show()

In [None]:
hospital= world_reference.groupby(["Country"])["hospital_beds_per_thousand"].max().reset_index().sort_values("hospital_beds_per_thousand",ascending=False).reset_index(drop=True)


In [None]:
fig = go.Figure(data=[go.Bar(
            x=hospital['Country'][0:15], y=hospital['hospital_beds_per_thousand'][0:15],
            text=hospital['hospital_beds_per_thousand'][0:15],
            textposition='auto',
            marker_color='orange'

        )])
fig.update_layout(
    title='The 15 most hospital_beds_per_thousand Countries',
    xaxis_title="Countries",
    yaxis_title="hospital_beds_per_thousand",
)
fig.show()

In [None]:
poverty= world_reference.groupby(["Country"])["extreme_poverty"].max().reset_index().sort_values("extreme_poverty",ascending=False).reset_index(drop=True)


In [None]:
fig = go.Figure(data=[go.Bar(
            x=poverty['Country'][0:15], y=poverty['extreme_poverty'][0:15],
            text=poverty['extreme_poverty'][0:15],
            textposition='auto',
            marker_color='magenta'

        )])
fig.update_layout(
    title='The 15 most extreme_poverty Countries',
    xaxis_title="Countries",
    yaxis_title="extreme_poverty",
)
fig.show()

In [None]:
gdp= world_reference.groupby(["Country"])["gdp_per_capita"].max().reset_index().sort_values("gdp_per_capita",ascending=False).reset_index(drop=True)


In [None]:
fig = go.Figure(data=[go.Bar(
            x=gdp['Country'][0:15], y=gdp['gdp_per_capita'][0:15],
            text=gdp['gdp_per_capita'][0:15],
            textposition='auto',
            marker_color='blue'

        )])
fig.update_layout(
    title='The 15 most gdp_per_capita Countries',
    xaxis_title="Countries",
    yaxis_title="gdp_per_capita",
)
fig.show()

In [None]:
aged_70= world_reference.groupby(["Country"])["aged_70_older"].max().reset_index().sort_values("aged_70_older",ascending=False).reset_index(drop=True)


In [None]:
fig = go.Figure(data=[go.Bar(
            x=aged_70['Country'][0:15], y=aged_70['aged_70_older'][0:15],
            text=aged_70['aged_70_older'][0:15],
            textposition='auto',
            marker_color='gray'

        )])
fig.update_layout(
    title='The 15 most aged_70_older Countries',
    xaxis_title="Countries",
    yaxis_title="aged_70_older",
)
fig.show()

In [None]:
world_reference.columns

In [None]:
aged_65= world_reference.groupby(["Country"])["aged_65_older"].max().reset_index().sort_values("aged_65_older",ascending=False).reset_index(drop=True)


In [None]:
fig = go.Figure(data=[go.Bar(
            x=aged_65['Country'][0:15], y=aged_65['aged_65_older'][0:15],
            text=aged_65['aged_65_older'][0:15],
            textposition='auto',
            marker_color='brown'

        )])
fig.update_layout(
    title='The 15 most aged_65_older Countries',
    xaxis_title="Countries",
    yaxis_title="aged_65_older",
)
fig.show()

In [None]:
middle= world_reference.groupby(["Country"])["median_age"].max().reset_index().sort_values("median_age",ascending=False).reset_index(drop=True)


In [None]:
fig = go.Figure(data=[go.Bar(
            x=middle['Country'][0:15], y=middle['median_age'][0:15],
            text=middle['median_age'][0:15],
            textposition='auto',
            marker_color='green'

        )])
fig.update_layout(
    title='The 15 most median_age Countries',
    xaxis_title="Countries",
    yaxis_title="median_age",
)
fig.show()

In [None]:
population_dense= world_reference.groupby(["Country"])["population_density"].max().reset_index().sort_values("population_density",ascending=False).reset_index(drop=True)


In [None]:
fig = go.Figure(data=[go.Bar(
            x=population_dense['Country'][0:15], y=population_dense['population_density'][0:15],
            text=population_dense['population_density'][0:15],
            textposition='auto',
            marker_color='yellow'

        )])
fig.update_layout(
    title='The 15 most population_density Countries',
    xaxis_title="Countries",
    yaxis_title="population_density",
)
fig.show()

In [None]:
populations= world_reference.groupby(["Country"])["population"].max().reset_index().sort_values("population",ascending=False).reset_index(drop=True)


In [None]:
fig = go.Figure(data=[go.Bar(
            x=populations['Country'][0:15], y=populations['population'][0:15],
            text=populations['population'][0:15],
            textposition='auto',
            marker_color='orange'

        )])
fig.update_layout(
    title='The 15 most populated Countries',
    xaxis_title="Countries",
    yaxis_title="population",
)
fig.show()

In [None]:
per_thousand= world_reference.groupby(["Country"])["total_tests_per_thousand"].max().reset_index().sort_values("total_tests_per_thousand",ascending=False).reset_index(drop=True)


In [None]:
fig = go.Figure(data=[go.Bar(
            x=per_thousand['Country'][0:15], y=per_thousand['total_tests_per_thousand'][0:15],
            text=per_thousand['total_tests_per_thousand'][0:15],
            textposition='auto',
            marker_color='violet'

        )])
fig.update_layout(
    title='The 15 most total_tests_per_thousandCountries',
    xaxis_title="Countries",
    yaxis_title="total_tests_per_thousand",
)
fig.show()

In [None]:
world_circle = world_reference[world_reference['date'] == max(world_reference['date'])]

W = folium.Map(location=[0, 0], tiles='cartodbpositron',
               min_zoom=1, max_zoom=2, zoom_start=1)

for i in range(0, len(world_circle)):
    folium.Circle(
        location=[world_circle.iloc[i]['Lat'], world_circle.iloc[i]['Long_']],
        color='crimson', fill='crimson',
        tooltip =   '<li><bold>Country : '+str(world_circle.iloc[i]['Country'])+
                    '<li><bold>total_cases : '+str(world_circle.iloc[i]['total_cases'])+
                    '<li><bold>population_density: '+str(world_circle.iloc[i]['population_density'])+
                    '<li><bold>extreme_poverty : '+str(world_circle.iloc[i]['extreme_poverty'])+
                    '<li><bold>diabetes_prevalence : '+str(world_circle.iloc[i]['diabetes_prevalence'])+
                    '<li><bold>aged_70_older : '+str(world_circle.iloc[i]['aged_70_older'])+
                    '<li><bold>handwashing_facilities : '+str(world_circle.iloc[i]['handwashing_facilities'])+
                    '<li><bold>male_smokers : '+str(world_circle.iloc[i]['male_smokers'])+
                    '<li><bold>female_smokers : '+str(world_circle.iloc[i]['female_smokers']),
        radius=int(world_circle.iloc[i]['population'])**0.5).add_to(W)
W

In [None]:
world_reference.columns

In [None]:
Group = ['total_cases','population_density']
fig, ax = plt.subplots(figsize=(10,10)) 
sns.heatmap(world_reference[Group].corr(), annot = True, fmt = '.2f')
plt.show()

In [None]:
world_data.columns

In [None]:
countries_aggregated.columns

In [None]:
world_data=world_data.rename(columns={'date' : 'Date'})

In [None]:
world_aggregated = pd.merge(right = world_data, left=countries_aggregated, on = 'Date')

In [None]:
world_aggregated.columns

## Correlation plot
-using a much wider datasets with other variables
-No much correlation with certain interesting variables


In [None]:
corr = world_aggregated.corr()
mask = np.triu(np.ones_like(corr, dtype=np.bool))
, ax = plt.subplots(figsize=(35, 28))
cmap = sns.diverging_palette(220, 10, as_cmap=True)
sns.heatmap(corr, mask=mask, cmap=cmap, vmax=1, center=0,
            square=True, linewidths=0.6, cbar_kws={"shrink": 2})

In [None]:
corr = world_aggregated.corr()
corr.style.background_gradient(cmap='coolwarm') #Using pandas built in function