# For each state, district and overall, find the following ratios: total number of Covishield vaccinated persons (either 1 or 2 doses) to total number of Covaxin vaccinated persons

In [1]:
import numpy as np
import pandas as pd
import datetime as dt
import warnings
warnings.filterwarnings('ignore')

In [2]:
df = pd.read_csv(r'cowin_vaccine_data_districtwise.csv')

In [3]:
# df[['16/01/2021', '16/01/2021.1', '16/01/2021.2', '16/01/2021.3','16/01/2021.4', '16/01/2021.5', '16/01/2021.6', '16/01/2021.7','16/01/2021.8','16/01/2021.9']]

In [4]:
# COLLECTING ALL COLUMNS AS DATES ARE ALSO PRESENT IN COLUMNS
list1 = df.columns
# COLLECTING ALL DATES INTO A LIST
dates_unmod=[]
for i in range(len(list1)):
    if(".8" in list1[i]):              #AS.8,.9 are covaxin,covishield vaccination data
        dates_unmod.append((list1[i],list1[i+1]))

In [5]:
#AS WE NEED TO ANALYSE TILL 14/08/2021.8
dates_unmod = dates_unmod[:211]

In [6]:
# dates_unmod

In [7]:
#THIS IS ENTIRE DATAFRAME WITH ALL DISTRICTS AND CONTAING DATES AS ROWS
dfnew = pd.DataFrame()

In [8]:
for i in dates_unmod:
    #CREATING A DATAFRAME DATEWISE FOR ALL DISTRICTS
    t= pd.DataFrame()
    
    covaxin_dose = df[i[0]]
    coviShield_dose = df[i[1]]
    t = df.loc[:,['State','District','District_Key']]
    t['covaxin'] = covaxin_dose
    t['covishield'] = coviShield_dose
    t['Date'] = i[0].replace(".8","")
    
    dfnew = pd.concat([dfnew,t])

In [9]:
dfnew.head()

Unnamed: 0,State,District,District_Key,covaxin,covishield,Date
0,,,,Covaxin (Doses Administered),CoviShield (Doses Administered),16/01/2021
1,Andaman and Nicobar Islands,Nicobars,AN_Nicobars,0,0,16/01/2021
2,Andaman and Nicobar Islands,North and Middle Andaman,AN_North and Middle Andaman,0,0,16/01/2021
3,Andaman and Nicobar Islands,South Andaman,AN_South Andaman,0,23,16/01/2021
4,Andhra Pradesh,Anantapur,AP_Anantapur,0,287,16/01/2021


In [10]:
#CONVERTING DATE COLUMN INTO PANDAS DATETIME IN REQUIRED FORMAT
dfnew['Date'] =  pd.to_datetime(dfnew['Date'], format='%d/%m/%Y')

In [11]:
#DROPPING FIRSTROW
a = dfnew[dfnew['covaxin']=='Covaxin (Doses Administered)'].index
finaldf = dfnew.drop(a)

In [12]:
finaldf.dtypes

State                   object
District                object
District_Key            object
covaxin                 object
covishield              object
Date            datetime64[ns]
dtype: object

In [13]:
convert_dict = {'covaxin': float,'covishield': float}
finaldf = finaldf.astype(convert_dict)

In [14]:
finaldf

Unnamed: 0,State,District,District_Key,covaxin,covishield,Date
1,Andaman and Nicobar Islands,Nicobars,AN_Nicobars,0.0,0.0,2021-01-16
2,Andaman and Nicobar Islands,North and Middle Andaman,AN_North and Middle Andaman,0.0,0.0,2021-01-16
3,Andaman and Nicobar Islands,South Andaman,AN_South Andaman,0.0,23.0,2021-01-16
4,Andhra Pradesh,Anantapur,AP_Anantapur,0.0,287.0,2021-01-16
5,Andhra Pradesh,Chittoor,AP_Chittoor,0.0,424.0,2021-01-16
...,...,...,...,...,...,...
750,West Bengal,Purulia,WB_Purulia,119235.0,667928.0,2021-08-14
751,West Bengal,Birbhum,WB_Birbhum,66816.0,443747.0,2021-08-14
752,West Bengal,South 24 Parganas,WB_South 24 Parganas,155874.0,1608273.0,2021-08-14
753,West Bengal,Uttar Dinajpur,WB_Uttar Dinajpur,115375.0,598638.0,2021-08-14


In [15]:
# #STORING FOR OVERALL ANALYSIS
India = finaldf.copy()

COMBINING NUMBER OF covaxin/covishield WITH SAME DISTRICTKEY USING GROUPBY

In [16]:
finaldf=finaldf.groupby(['District_Key','Date','State','District']).aggregate({'covaxin':sum,'covishield':sum})
finaldf.reset_index(inplace=True)

In [17]:
finaldf.head()

Unnamed: 0,District_Key,Date,State,District,covaxin,covishield
0,AN_Nicobars,2021-01-16,Andaman and Nicobar Islands,Nicobars,0.0,0.0
1,AN_Nicobars,2021-01-17,Andaman and Nicobar Islands,Nicobars,0.0,0.0
2,AN_Nicobars,2021-01-18,Andaman and Nicobar Islands,Nicobars,0.0,0.0
3,AN_Nicobars,2021-01-19,Andaman and Nicobar Islands,Nicobars,0.0,1.0
4,AN_Nicobars,2021-01-20,Andaman and Nicobar Islands,Nicobars,0.0,1.0


# DISTRICTWISE ANALYSIS

In [18]:
#AS DATA IS CUMULATIVE 
def first_last(df1):
        return df1.iloc[[-1]]

In [19]:
#GROUPING DISTRICTWISE
t = finaldf.groupby('District_Key').apply(first_last)

In [20]:
t.reset_index(inplace=True,drop=True)

In [21]:
t['vaccineratio'] = t['covishield']/t['covaxin']

In [22]:
t.head()

Unnamed: 0,District_Key,Date,State,District,covaxin,covishield,vaccineratio
0,AN_Nicobars,2021-08-14,Andaman and Nicobar Islands,Nicobars,0.0,30191.0,inf
1,AN_North and Middle Andaman,2021-08-14,Andaman and Nicobar Islands,North and Middle Andaman,0.0,97324.0,inf
2,AN_South Andaman,2021-08-14,Andaman and Nicobar Islands,South Andaman,0.0,202481.0,inf
3,AP_Anantapur,2021-08-14,Andhra Pradesh,Anantapur,280843.0,1680877.0,5.985113
4,AP_Chittoor,2021-08-14,Andhra Pradesh,Chittoor,380464.0,1811380.0,4.760976


In [23]:
t.columns

Index(['District_Key', 'Date', 'State', 'District', 'covaxin', 'covishield',
       'vaccineratio'],
      dtype='object')

In [24]:
t= t[['District_Key','vaccineratio']]
t.rename(columns={'District_Key':'districtid'})

Unnamed: 0,districtid,vaccineratio
0,AN_Nicobars,inf
1,AN_North and Middle Andaman,inf
2,AN_South Andaman,inf
3,AP_Anantapur,5.985113
4,AP_Chittoor,4.760976
...,...,...
724,WB_Purba Bardhaman,9.334141
725,WB_Purba Medinipur,7.296529
726,WB_Purulia,5.601778
727,WB_South 24 Parganas,8.377581


In [25]:
#SORTING ON VACCINERATIO
t.sort_values('vaccineratio',inplace=True)
t.reset_index(inplace=True,drop=True)

In [26]:
#REPLACING INFINITE WITH NAN
t['vaccineratio'] = t['vaccineratio'].astype('str')
t['vaccineratio']=t['vaccineratio'].str.replace('inf','NaN')

In [27]:
#WRITING TO CSV
t.to_csv('district-vaccine-type-ratio.csv',index=False)

# STATE ANALYSIS

In [28]:
# AS DATA IS CUMULATIVE
lastdate = pd.to_datetime('14/08/2021',format='%d/%m/%Y')

In [29]:
state = finaldf[finaldf['Date'] == lastdate]

In [30]:
state.reset_index(inplace=True,drop=True)

In [31]:
#GROUPING STATEWISE
state = state.groupby('State').aggregate({'covaxin':sum,'covishield':sum})

In [32]:
state.reset_index(inplace=True)

In [33]:
state

Unnamed: 0,State,covaxin,covishield
0,Andaman and Nicobar Islands,0.0,329996.0
1,Andhra Pradesh,4070226.0,20879561.0
2,Arunachal Pradesh,0.0,900714.0
3,Assam,1732408.0,12138691.0
4,Bihar,3394727.0,26628989.0
5,Chandigarh,4528.0,973402.0
6,Chhattisgarh,1392850.0,10760651.0
7,Dadra and Nagar Haveli and Daman and Diu,47.0,688754.0
8,Delhi,2715588.0,8750718.0
9,Goa,27658.0,1415066.0


In [34]:
# India = state.copy()

In [35]:
state['vaccineratio'] = state['covishield']/state['covaxin']

In [36]:
state.shape

(36, 4)

In [37]:
state.columns

Index(['State', 'covaxin', 'covishield', 'vaccineratio'], dtype='object')

In [38]:
state.rename(columns={'State':'stateid'},inplace=True)
state = state[['stateid','vaccineratio']]

In [39]:
#SORTING ON VACCINERATIO
state.sort_values('vaccineratio',inplace=True)
state.reset_index(inplace=True,drop=True)

In [40]:
#REPLACING INFINITE WITH NAN
state['vaccineratio'] = state['vaccineratio'].astype('str')
state['vaccineratio']=state['vaccineratio'].str.replace('inf','NaN')

In [41]:
#dictionary to map statename to stateid
name_to_id = {'state unassigned': 'UN','andaman and nicobar islands': 'AN','andhra pradesh': 'AP','arunachal pradesh': 'AR','assam': 'AS','bihar': 'BR','chandigarh': 'CH','chhattisgarh': 'CT','delhi': 'DL',
              'dadra and nagar haveli and daman and diu': 'DN','goa': 'GA','gujarat': 'GJ','himachal pradesh': 'HP','haryana': 'HR','jharkhand': 'JH','jammu and kashmir': 'JK','karnataka': 'KA',
              'kerala': 'KL','ladakh': 'LA','lakshadweep': 'LD','maharashtra': 'MH','meghalaya': 'ML','manipur': 'MN','madhya pradesh': 'MP','mizoram': 'MZ','nagaland': 'NL','odisha': 'OR',
              'punjab': 'PB','puducherry': 'PY','rajasthan': 'RJ','sikkim': 'SK','telangana': 'TG','tamil nadu': 'TN','tripura': 'TR','uttar pradesh': 'UP','uttarakhand': 'UT','west bengal': 'WB'}

In [42]:
state['stateid'] = state.stateid.str.lower()
state['stateid'] = state['stateid'].map(name_to_id)

In [43]:
state.to_csv('state-vaccine-type-ratio.csv',index=False)

# OVERALL ANALYSIS

In [44]:
# AS DATA IS CUMULATIVE
lastdate = pd.to_datetime('14/08/2021',format='%d/%m/%Y')

In [45]:
India=India[India['Date'] == lastdate]

In [46]:
#CALCULATION SUM AND THEN RATIO
icov = India['covaxin'].sum()
ics = India['covishield'].sum()
r = ics/icov

In [47]:
icov,ics,r

(67264707.0, 471470878.0, 7.00918652630123)

In [48]:
overall=pd.DataFrame(data = {'overall':['India'],'covaxin':[icov],'covishield':[ics],'vaccineratio':[r]})

In [49]:
overall.columns


Index(['overall', 'covaxin', 'covishield', 'vaccineratio'], dtype='object')

In [50]:
overall=overall[['overall','vaccineratio']]

In [51]:
#SORTING ON VACCINERATIO
overall.sort_values('vaccineratio',inplace=True)

In [52]:
overall

Unnamed: 0,overall,vaccineratio
0,India,7.009187


In [53]:
#WRITING TO CSV
overall.to_csv('overall-vaccine-type-ratio.csv',index=False)