In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [None]:
def readCSV(dataset):
    """"
    This Function takes the dataset as argument and loads the dataset 
    
    """
    data= pd.read_csv(dataset)
    return(data)

In [None]:
# calling the function to read dataset by passing a dataset
df = readCSV('coronavirus.csv')

In [None]:
df

In [None]:
#Renamimg the columns
df = df.rename(columns={'Country/Region' : 'Country', 'WHO Region' : 'Region'})

In [None]:
df

In [None]:
#Printing the summary of dataset
df.info()

In [None]:
#Checking the missing values in dataset
df.isna()

In [None]:
#Sum of missing values in the dataset
df.isna().sum()

In [None]:
#Dropping the columns with most missing values
df = df.drop(['Province/State','Lat','Long'],axis=1)

In [None]:
#Dropping the rows with NaN values in Region column
df=df.dropna()

In [None]:
df.head(20)

In [None]:
df= df.reset_index()

In [None]:
df=df.drop(['index'],axis=1)

In [None]:
df

In [None]:
#Finding datatypes of each column
df.dtypes

In [None]:
#Changing the datatype of date to Datetime Format
df['Date']=pd.to_datetime(df['Date'])

In [None]:
df.dtypes

In [None]:
df

In [None]:
#Grouping the rows of dataset by country,Date and Region
Country = df.groupby(['Country','Date','Region']).sum().reset_index()

In [None]:
Country

In [None]:
#Finding the cumulatve sum of all cases until the max date in dataset
Cumulative_sum_cases = Country[Country['Date'] == Country['Date'].max()]
Cumulative_sum_cases

In [None]:
#Finding top 10 countries with most covid cases
def top_countries(label):
    """"
    This function is used to return top 10 countries with most covid cases based on argument label
    """
    sorted_Country = Cumulative_sum_cases.sort_values(label,ascending=False)
    return sorted_Country.head(10) 

In [None]:
def bar_chart(x,y,name,name2):
    """
    This function returns the barplot for top 10 countries depending on label 
    """
    fig = plt.figure(figsize=(10,8))
    ax=fig.add_axes([0,0,1,1])
    ax.bar(x,y,color='slateblue')
    plt.xlabel("Country",fontsize=14,color='black',fontweight='bold')
    plt.ylabel("{casetype}".format(casetype=name2),fontsize=14,fontweight='bold')
    plt.title('Top 10 countries with {caseType} '.format(caseType = name),color='crimson',fontsize=12,fontweight='bold')
    plt.show()
    plt.savefig(name)

In [None]:
#plotting bar for top 10 countries with confirmed cases
x=top_countries('Confirmed')['Country']
y=top_countries('Confirmed')['Confirmed']
bar_chart(x,y,name='highest covid cases',name2='No. of covid cases')

In [None]:
#Plotting barchart for most number of deaths by calling bar_chart function
x=top_countries('Deaths')['Country']
y=top_countries('Deaths')['Deaths']
bar_chart(x,y,name='Highest Death cases',name2='No of Death cases')

In [None]:
#Grouping the data by region 
region = df.groupby(['Region', 'Date']).sum().reset_index()
region

In [None]:
#Finding the cumulative sum of cases in regions
Regions_cum_sum = region[region['Date'] == region['Date'].max()]
Regions_cum_sum

In [None]:
def region_data(x,y):
    """
    This Function is used to visualize the covid cases by region
    """
    plt.figure(figsize=(6,6))
    ex=[0,0.1,0,0.1,0,0.1]
    Region=Regions_cum_sum['Region']
    textprops={"fontsize":12,"fontweight":'bold'}
    colors = ['seagreen','deepskyblue','teal','tomato','crimson','peru']
    plt.pie(Regions_cum_sum[x], labels=Region,explode=ex,shadow=True,startangle=50,autopct='%1.1f%%',
            textprops=textprops,colors=colors)
    plt.title("{casetype}  by Region".format(casetype=y),color='darkmagenta',fontsize=14,fontweight='bold')
    plt.show()
    plt.savefig('regioncases')

In [None]:
#Getting the active cases by region
region_data('Active','Active cases')

In [None]:
region_data('Deaths','Death cases')

In [None]:
#Visualizing the covid trend among different countries
df1= df.groupby(['Country','Date'])['Confirmed','Active','Deaths','Recovered'].sum()

In [None]:
def covid_trend(a,x,y):
    """
    This function is used to visualize the covid trend among different countries
    """
    sns.set(rc={'figure.figsize':(11, 4)})
    ax = a.loc[x,y].plot()
    ax.set_ylabel('{a} cases in {b}'.format(a=y,b=x))
    ax.legend()
    ax.set_title('Covid Trend of {casetype} VS Confirmed cases in {countryname}'.format(countryname=x,casetype=y),color='red',fontsize=14)
    
    

In [None]:
covid_trend(df1,'Yemen','Confirmed')
covid_trend(df1,'Yemen','Deaths')


In [None]:
Cumulative_sum_cases

In [None]:
#Calculating the Mortality rate percentage for each country
Cumulative_sum_cases["Mortality Rate%"] = round((Cumulative_sum_cases.Deaths/Cumulative_sum_cases.Active)*100,2)

In [None]:
Cumulative_sum_cases

In [None]:
#Finding the Countries with more mortality rate
Mortality_data= Cumulative_sum_cases.sort_values('Mortality Rate%',ascending=False).reset_index()
Mortality_data_top= Mortality_data.head(10)
Mortality_data_top=Mortality_data_top.drop(['index','Recovered','Date'],axis=1)
Mortality_data_top.index=Mortality_data_top.index + 1
Mortality_data_top

In [None]:
#Finding the Mortality rate of Countries with more Confirmed cases
Mortality_data_C= Cumulative_sum_cases.sort_values('Confirmed',ascending=False).reset_index()
Mortality_data_C = Mortality_data_C.head(10)
Mortality_data_C=Mortality_data_C.drop(['index','Recovered','Date'],axis=1)
Mortality_data_C.index=Mortality_data_C.index + 1
Mortality_data_C

In [None]:
#Comparing the mortality rate with covid cases in top 10 countries
def mortality_rate(data,data2):
    fig,ax=plt.subplots(nrows=2,ncols=2,figsize=(20,14)) 
    ax[0,0].plot(data['Country'],data['Active']) 
    ax[0,0].set_title("Active cases in countries with high mortality",color='orangered',fontsize=12)
    ax[0,1].plot(data['Country'],data['Mortality Rate%']) 
    ax[0,1].set_title("countries with high mortality",color='orangered',fontsize=12)
    ax[1,0].plot(data2['Country'],data2['Active']) 
    ax[1,0].set_title("Countries with highest Active covid cases",color='orangered',fontsize=12)
    ax[1,1].plot(data2['Country'],data2['Mortality Rate%'])
    ax[1,1].set_title("Mortality rate in countries with more cases",color='orangered',fontsize=12)
    plt.suptitle("Comparing Mortality rate of countries with top and least Active cases",color='darkgreen',fontsize=14
                ,fontweight = 'bold',y=0.05)
    plt.show()
    plt.savefig('Mortality')

In [None]:
mortality_rate(Mortality_data_top,Mortality_data_C)


In [None]:
#Mortality rate among regions
Regions_cum_sum

In [None]:
Regions_cum_sum = Regions_cum_sum.reset_index()

In [None]:
Regions_cum_sum = Regions_cum_sum.drop(['index'],axis=1)
Regions_cum_sum

In [None]:
#Mortality rate by region
Regions_cum_sum["Mortality Rate%"] = round((Regions_cum_sum.Deaths/Regions_cum_sum.Active)*100,2)

In [None]:
Regions_cum_sum

In [None]:
region_data('Mortality Rate%','Mortality Rate')