In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
covid_19 = '/kaggle/input/covid19-in-india/covid_19_india.csv'

# Read the CSV file into a DataFrame
covid19 = pd.read_csv(covid_19)

# Display the first few rows of the DataFrame
print(covid19.head())

In [None]:
covid19.head()

In [None]:
covid19.info()

In [None]:
covid19.shape

In [None]:
covid19.drop(['Sno','ConfirmedIndianNational','ConfirmedForeignNational'], axis=1, inplace=True)

In [None]:
covid19.head()

In [None]:
covid19['State/UnionTerritory'].unique(),covid19['State/UnionTerritory'].nunique()

In [None]:
"Correcting spelling mistakes or impurities"

state_correction_dict = {
    'Bihar****':'Bihar',
    'Dadra and Nagar Haveli':'Dadra and Nagar Haveli and Daman and Diu',
    'Madhya Pradesh***':'Madhya Pradesh',
    'Maharashtra***':'Maharashtra',
    'Karanataka':'Karnataka'
}

In [None]:
def state_correction(state):
    try:
        return state_correction_dict[state]
    except:
        return state
    
covid19['State/UnionTerritory'] = covid19['State/UnionTerritory'].apply(state_correction)
covid19['State/UnionTerritory'].nunique()

In [None]:
covid19['State/UnionTerritory'].unique(),covid19['State/UnionTerritory'].nunique()

In [None]:
from datetime import datetime

In [None]:
covid19['Date']=pd.to_datetime(covid19['Date'],format='%Y-%m-%d')

In [None]:
covid19.head()

In [None]:
#Active_case
covid19['Active_case']=covid19['Confirmed']-(covid19['Cured']+covid19['Deaths'])

In [None]:
covid19.head()

In [None]:
#using the pivot_table function from the pandas library to create a summary table 
statewise=pd.pivot_table(covid19,values=['Cured','Deaths','Confirmed'],index='State/UnionTerritory',aggfunc='max',margins=True)

In [None]:
statewise

In [None]:
#top ten active by statiwise
df_top_10 = covid19.nlargest(10,['Active_case'])

In [None]:
df_top_10 

In [None]:
df_top_10 = covid19.groupby(['State/UnionTerritory'])['Active_case'].max().sort_values(ascending=False).reset_index()
df_top = df_top_10.nlargest(10,['Active_case'])
df_top

In [None]:
df_top_death=covid19.nlargest(10,['Deaths'])

In [None]:
df_top_10=covid19.groupby(['State/UnionTerritory'])['Deaths'].max().sort_values(ascending=False).reset_index()

In [None]:
df_top_death=df_top_10.nlargest(10,['Deaths'])

In [None]:
df_top_death

In [None]:
#Finding recovery rate and deathrate with Background gradient

In [None]:
statewise['Recovary_rate']=statewise['Cured']*100/statewise['Confirmed']
statewise['Deathrate'] = statewise['Deaths']*100/statewise['Confirmed']
statewise=statewise.sort_values(by='Confirmed',ascending=True)
statewise.style.background_gradient(cmap='viridis')


In [None]:
#top active case
top10_active_case=covid19.groupby(by='State/UnionTerritory').max()[['Active_case','Date']].sort_values(by=['Active_case'],ascending=False).reset_index()

In [None]:
fig= plt.figure(figsize=(16,9))

In [None]:
plt.title("top 10 state with most  active case ")

In [None]:
ax=sns.barplot(data=top10_active_case.iloc[:10],y="Active_case",x="State/UnionTerritory",linewidth=2,edgecolor='black')

In [None]:
# top 10 state active case

In [None]:
top_10_active_case=covid19.groupby(by='State/UnionTerritory').max()[['Active_case','Date']].sort_values(by=['Active_case'],ascending=False).reset_index()
fig=plt.figure(figsize=(16,9))
plt.title("top 10 state with most  active case ")
ax=sns.barplot(data= top_10_active_case.iloc[:10],y="Active_case",x="State/UnionTerritory",linewidth=2,edgecolor='black')
plt.xlabel('State')
plt.ylabel("Total Active_case")
plt.show()

In [None]:
top_10_Deaths_case=covid19.groupby(by='State/UnionTerritory').max()[['Deaths','Date']].sort_values(by=['Deaths'],ascending=False).reset_index()
plt.figure(figsize=(16,9))
ax=sns.barplot(data=top_10_Deaths_case.iloc[:10],y='Deaths',x='State/UnionTerritory',linewidth=2,edgecolor="black")
plt.title("Top 10 State where most death occur")
plt.xlabel("STATE")
plt.ylabel("Total Death")
plt.show()

In [None]:
#Top 5 affected States
plt.figure(figsize=(20,10))
#ax=sns.lineplot(data=covid_19[covid19['State/UnionTerritory'].isin(['Maharashtra','Kerala','Karnataka','Tamil Nadu','Delhi'])], X='Date',y='Active_case',hue='State/UnionTerritory')
import seaborn as sns

ax = sns.lineplot(data=covid19[covid19['State/UnionTerritory'].isin(['Maharashtra','Kerala','Karnataka','Tamil Nadu','Delhi'])], x='Date', y='Active_case', hue='State/UnionTerritory')
ax.set_title("top 5 affected state")


In [None]:
Vaccine_path = '/kaggle/input/covid19-in-india/covid_vaccine_statewise.csv'

# Read the CSV file into a DataFrame
vaccine = pd.read_csv(Vaccine_path)


In [None]:
vaccine.head()

In [None]:
vaccine.shape

In [None]:
vaccine.isnull().sum()

In [None]:
vaccine.rename(columns={ 'Updated On':'Vaccine_Date'}, inplace=True)

#student_df_1.rename(columns={"id": "ID"}, inplace=True)

In [None]:
vaccine.head()

In [None]:
vaccine.info()

In [None]:
vaccine.drop(['Sputnik V (Doses Administered)','AEFI','18-44 Years (Doses Administered)','45-60 Years (Doses Administered)','60+ Years (Doses Administered)'],axis=1,inplace=True)

In [None]:
vaccine.head()

In [None]:
#male vs female
male=vaccine['Male(Individuals Vaccinated)'].sum()
female=vaccine['Female(Individuals Vaccinated)'].sum()
male,female

In [None]:
import plotly.express as px
from plotly.subplots import make_subplots

In [None]:
px.pie(names=['male','female'],values=[male,female],title='Male and Female v')

In [None]:
vaccine_df=vaccine[vaccine['State']!='India']

In [None]:
max_va=vaccine_df.groupby(by='State').sum()[['Total']].sort_values(by=['Total'],ascending=False).iloc[:10].reset_index()

In [None]:
max_va

In [None]:
plt.figure(figsize=(16,9))


x=sns.barplot(data=max_va,y=max_va.Total,x=max_va.State,linewidth=2,edgecolor='black')