# Covid19 cases Analysis

In [None]:
import pandas as pd                                    # Importing the required libraries
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import datetime as dt

In [None]:
from google.colab import files

uploaded = files.upload()                               # Uploading the file

In [None]:
df = pd.read_excel("data base covid.csv")
df

In [None]:
df.head(20)

In [None]:
df.tail(20)

In [None]:
df.shape

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df.isna().sum()

In [None]:
df["State/UnionTerritory"].unique(),df["State/UnionTerritory"].nunique()

In [None]:
name_correction_dict = {'Bihar****':'Bihar',
    'Dadra and Nagar Haveli':'Dadra and Nagar Haveli and Daman and Diu',
    'Madhya Pradesh***':'Madhya Pradesh',
    'Maharashtra***':'Maharashtra',
    'Karanataka':'Karnataka'}

def state_correction(state):
  try:
    return name_correction_dict["state"]

  except:
    return state


df["State/UnionTerritory"] = df["State/UnionTerritory"].apply(state_correction)


df["State/UnionTerritory"].unique()

In [None]:
df["Date"] = pd.to_datetime(df["Date"])

df["Date"] = df["Date"].dt.strftime("%d-%m -%Y")

df["Date"]

In [None]:
df.drop(["Time","ConfirmedIndianNational","ConfirmedForeignNational"],axis = 1, inplace = True)

In [None]:
df.head()

In [None]:
num_columns = df.select_dtypes(exclude = "object")

num_columns

In [None]:
str_columns = df.select_dtypes(exclude = "int")                        #"Getting only categorical data"

str_columns

In [None]:
df["Active_Cases"] = df["Confirmed"] - df["Cured"] - df["Deaths"]

df

In [None]:
statewise = pd.pivot_table(df,values = ["Active_Cases","Confirmed","Cured","Deaths"],index = "State/UnionTerritory",aggfunc = "max",margins = True)



# statewise = pd.pivot_table(df,values=['Cured','Deaths','Confirmed'],index='State/UnionTerritory',aggfunc='max',margins=True)
statewise

In [None]:
df_highest_cases_10 = df.nlargest(10,["Active_Cases"])

df_highest_cases_10 = df.groupby(["State/UnionTerritory"])["Active_Cases"].max().sort_values(ascending = False).reset_index()

df_highest_cases_10

highest_active_statewise = df_highest_cases_10.nlargest(10,["Active_Cases"])

highest_active_statewise

In [None]:
df_most_deaths_10 = df.nlargest(10,["Deaths"])

df_most_deaths_10 = df.groupby(["State/UnionTerritory"])["Deaths"].max().sort_values(ascending = False).reset_index()

df_most_deaths_10

df_deaths_statewise = df_most_deaths_10.nlargest(10,["Deaths"])

df_deaths_statewise

In [None]:
statewise["Recovery Rate"]= statewise["Cured"]*100/statewise["Confirmed"]

statewise["Death Rate"]= statewise["Deaths"]*100/statewise["Confirmed"]

statewise

In [None]:
statewise.corr()

In [None]:
Confirmed = df["Confirmed"].sum()
Cured = df["Cured"].sum()
Death = df["Deaths"].sum()
Active = df["Active_Cases"].sum()

print('Total Confirmed cases =',Confirmed)
print('Total Cured cases =',Cured)
print('Total Active cases =',Death)
print('Total Death cases =',Active)

barplot = sns.barplot(x = ["Confirmed","Cured","Deaths","Active_cases"],y =[Confirmed,Cured,Death,Active])
barplot.set_yticklabels(labels=(barplot.get_yticks()*1).astype(int))

fig = plt.figure(figsize=(20,10))
plt.show()





In [None]:

df_values = [df["Confirmed"].sum(), df["Cured"].sum(), df["Deaths"].sum(), df["Active_Cases"].sum()]

df_keys = ["Confirm","Cure","Death","Active"]

plt.pie(df_values,labels = df_keys, explode = (0.02,0.02,0.1,0.02), autopct = '%.0f%%')


plt.legend( ["Confirmed","Cured","Deaths","Active_Cases"])

fig = plt.figure(figsize=(17,10))

plt.show()

In [None]:
fig = plt.figure(figsize = (18,10))


highest_active_statewise.groupby(["State/UnionTerritory"]).sum()["Active_Cases"].plot(kind = "pie",explode=(0.05,0.02,0.03,0.04,0.04,0.05,0.1,0.04,0.09,0.04),autopct='%1.0f%%')
plt.title('Top 10 states with most Active cases',size=20)

plt.show()

In [None]:
fig = plt.figure(figsize = (15,7))

sns.barplot(data = highest_active_statewise.iloc[:10],y = "Active_Cases",x = "State/UnionTerritory")

plt.title("Top 10 states with active cases",size = 15)

plt.show()

In [None]:
fig = plt.figure(figsize = (15,7))


df_deaths_statewise.groupby(["State/UnionTerritory"]).sum()["Deaths"].plot(kind='pie',rot=90,explode=(0.05,0.02,0.03,0.04,0.04,0.05,0.1,0.04,0.09,0.04),autopct='%1.0f%%')

plt.title("Top 10 states with active cases",size = 15)

plt.show()



In [None]:
fig = plt.figure(figsize = (15,7))

sns.barplot(data = df_deaths_statewise.iloc[:10],y = "Deaths",x = "State/UnionTerritory")

plt.title("Top 10 states with most deaths", size = 15)

plt.show()



In [None]:
fig = plt.figure(figsize = (8,4))

plot = sns.lineplot(data = df[df["State/UnionTerritory"].isin(["Maharashtra","Karnataka","Delhi","Uttar Pradesh","Tamil Nadu"])],x = "Date",y = "Active_Cases",hue = "State/UnionTerritory", size = "State/UnionTerritory")

plt.title("5 most affected states",size = 20 )

plt.show()


In [None]:
fig = plt.figure(figsize = (8,4))

map = sns.heatmap(df.corr())

plt.title("Correlation")

plt.show()

In [None]:
df["Fatality Ratio"] = df["Deaths"]/df["Confirmed"]

a4_dims = (15,8)
fig,ax = plt.subplots(figsize=a4_dims)
sns.pointplot(data = df,x='State/UnionTerritory',y='Fatality Ratio',ax=ax,color='Green')
plt.xticks(rotation=90)
plt.title('Fatality ratio of contaminated states',size=20)
plt.show()