In [1]:
import pandas as pd

from datetime import datetime

%matplotlib inline

import matplotlib as mpl
import matplotlib.pyplot as plt

import plotly.graph_objects as go

import dash
from dash import dcc
from dash import html
from dash.dependencies import Input, Output

In [2]:
mpl.rcParams['figure.figsize'] = (16, 9)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

#sns.set(style="darkgrid")

In [24]:
# url from the Covid-19 data is extracted
url = "https://covid.ourworldindata.org/data/owid-covid-data.csv"

In [25]:
covid_data = pd.read_csv(url,sep=",")

In [26]:
print(f"The number of unique countries in the data are {len(covid_data['location'].unique())}")

The number of unique countries in the data are 244


In [27]:
def get_data(url):
    
    """
    Reads CSV data COVID-19 data of different counties from the url  
    
    INPUT: URL of the CSV
    OUTPUT: Pandas dataframe

    """
    
    url = "https://covid.ourworldindata.org/data/owid-covid-data.csv"
    covid_data = pd.read_csv(url,sep=",")
    print(f"The number of unique countries in the data are {len(covid_data['location'].unique())}")
    
    return covid_data
    

In [28]:
def clean_data(df):
    """
    Takes in the dataframe and
    - removes unnecessary columns
    - fills the NaN values with zeroes [Assumption: NaN means there were no cases that day instead of filling it with mean values] 
    - correcting the datetime format
    
    INPUT: Pandas dataframe
    OUTPUT: Cleaned dataframe

    """
    cases = df[['date','location','new_cases_smoothed','total_cases', 'total_deaths', 'new_deaths_smoothed']]
    cases = cases.fillna(0)
    
    # converting date object into datetime
    cases["date"]= pd.to_datetime(cases["date"])
    
    # reseting the index
    cases = cases.reset_index(drop = True)
    
    loc = "E:/uni/Data_Science_E/Data_Science_Enterprise/data/final_cases.csv"
    # saving the dataset
    cases.to_csv(loc,index=False)
    
    return cases
    

### Dataset for total cases

In [29]:
cases = covid_data[['date','location','new_cases_smoothed','total_cases', 'total_deaths', 'new_deaths_smoothed']]

In [30]:
cases = cases.fillna(0)
cases.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 203338 entries, 0 to 203337
Data columns (total 6 columns):
 #   Column               Non-Null Count   Dtype  
---  ------               --------------   -----  
 0   date                 203338 non-null  object 
 1   location             203338 non-null  object 
 2   new_cases_smoothed   203338 non-null  float64
 3   total_cases          203338 non-null  float64
 4   total_deaths         203338 non-null  float64
 5   new_deaths_smoothed  203338 non-null  float64
dtypes: float64(4), object(2)
memory usage: 9.3+ MB


In [31]:
# converting date object into datetime
cases["date"]= pd.to_datetime(cases["date"])
cases.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 203338 entries, 0 to 203337
Data columns (total 6 columns):
 #   Column               Non-Null Count   Dtype         
---  ------               --------------   -----         
 0   date                 203338 non-null  datetime64[ns]
 1   location             203338 non-null  object        
 2   new_cases_smoothed   203338 non-null  float64       
 3   total_cases          203338 non-null  float64       
 4   total_deaths         203338 non-null  float64       
 5   new_deaths_smoothed  203338 non-null  float64       
dtypes: datetime64[ns](1), float64(4), object(1)
memory usage: 9.3+ MB


In [32]:
# reseting the index
cases = cases.reset_index(drop = True)
cases.tail()

Unnamed: 0,date,location,new_cases_smoothed,total_cases,total_deaths,new_deaths_smoothed
203333,2022-07-19,Zimbabwe,29.429,256187.0,5568.0,0.429
203334,2022-07-20,Zimbabwe,29.429,256187.0,5568.0,0.429
203335,2022-07-21,Zimbabwe,24.286,256217.0,5570.0,0.571
203336,2022-07-22,Zimbabwe,19.143,256217.0,5570.0,0.571
203337,2022-07-23,Zimbabwe,23.286,256246.0,5572.0,0.857


In [33]:
# saving the dataset

cases.to_csv("E:/uni/Data_Science_E/Data_Science_Enterprise/data/final_cases.csv",index=False)

### Dashboard preparation

In [43]:
cases = pd.read_csv("E:/uni/Data_Science_E/Data_Science_Enterprise/data/final_cases.csv")

fig = go.Figure()

app = dash.Dash()
app.layout = html.Div([

    dcc.Markdown('''
    #  Applied Data Science on COVID-19 data

    Goal of the project is to teach data science by applying a cross industry standard process,
    it covers the full walkthrough of: automated data gathering, data transformations,
    filtering and machine learning to approximating the doubling time, and
    (static) deployment of responsive dashboard.

    '''),

    dcc.Markdown('''
    ## Multi-Select Country for visualization
    '''),


    dcc.Dropdown(
        id='country_drop_down',
        options=[ {'label': each,'value':each} for each in cases['location'].unique()],
        value=['Germany','Italy'], # which are pre-selected
        multi=True
    ),

    dcc.Markdown('''
        ## Select other graphs
        '''),


    dcc.Dropdown(
    id='daily_stats',
    options=[
        {'label': 'Daily New Cases', 'value': 'new_cases_smoothed'},
        {'label': 'Daily New Deaths', 'value': 'new_deaths_smoothed'},
        {'label': 'Total Cases', 'value': 'total_cases'},
        {'label': 'Total Deaths', 'value': 'total_deaths'},
    ],
    value='new_cases_smoothed',
    multi=False
    ), 

    dcc.Graph(figure=fig, id='main_window_slope')
])



@app.callback(
    Output('main_window_slope', 'figure'),
    [Input('country_drop_down', 'value'),
    Input('daily_stats', 'value')])

def update_figure(country_list,show_stats):

    traces = []
    for each in country_list:

        df_plot=cases[cases['location']==each]


        traces.append(dict(x=df_plot.date,
                                y=df_plot[show_stats],
                                mode='markers+lines',
                                opacity=0.9,
                                name=each
                        )
                )

    return {
            'data': traces,
            'layout': dict (
                width=1280,
                height=720,

                xaxis={'title':'Timeline',
                        'tickangle':-45,
                        'nticks':20,
                        'tickfont':dict(size=14,color="#7f7f7f"),
                      }
        )
    }

if __name__ == '__main__':

    app.run_server(debug=True, use_reloader=False)

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app '__main__' (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: on
