#### Importing libraries

In [1]:
import pandas as pd
import numpy as np
import math
from datetime import datetime, timedelta

#### Importing dataset

In [3]:
# importing filtered dataset that we created in question6
census_data = pd.read_csv("filtered_census_data.csv")

#### Dropping irrelevant columns

In [4]:
census_data.drop(columns=['Total_Male_Population','Total_Female_Population'],inplace=True)
census_data

Unnamed: 0,District,Total_Population,State
0,Kupwara,870354,Jammu and Kashmir
1,Badgam,753745,Jammu and Kashmir
2,Leh(Ladakh),133487,Jammu and Kashmir
3,Kargil,140802,Jammu and Kashmir
4,Punch,476835,Jammu and Kashmir
...,...,...,...
635,Mahe,41816,Puducherry
636,Karaikal,200222,Puducherry
637,Nicobars,36842,Andaman and Nicobar Islands
638,North and Middle Andaman,105597,Andaman and Nicobar Islands


In [5]:
vaccine_data = pd.read_csv("state-vaccinated-count-week.csv")
vaccine_data

Unnamed: 0,state,week,dose1,dose2
0,Andaman and Nicobar Islands,1,0,0
1,Andaman and Nicobar Islands,2,897,0
2,Andaman and Nicobar Islands,3,1140,0
3,Andaman and Nicobar Islands,4,720,0
4,Andaman and Nicobar Islands,5,260,0
...,...,...,...,...
1111,West Bengal,27,835366,1096734
1112,West Bengal,28,946725,734887
1113,West Bengal,29,1350077,486888
1114,West Bengal,30,2121735,388892


In [6]:
# using one of the output file of question no 8
state_vacine_dose_ratio = pd.read_csv("state-vaccinated-dose-ratio.csv")
state_vacine_dose_ratio

Unnamed: 0,state,vaccinateddose1ratio,vaccinateddose2ratio
0,Andaman and Nicobar Islands,0.669024,0.247309
1,Andhra Pradesh,0.377624,0.127726
2,Arunachal Pradesh,0.40749,0.11672
3,Assam,0.330196,0.071971
4,Bihar,0.244371,0.048358
5,Chandigarh,0.694925,0.231514
6,Chhattisgarh,0.28179,0.088583
7,Goa,0.763592,0.22619
8,Gujarat,0.454086,0.138737
9,Haryana,0.375539,0.114609


#### finding no of doses administered in each district in last week

In [7]:
last_week_vaccine_data = vaccine_data[vaccine_data['week']==31]
last_week_vaccine_data

Unnamed: 0,state,week,dose1,dose2
30,Andaman and Nicobar Islands,31,17688,3038
61,Andhra Pradesh,31,1039018,345116
92,Arunachal Pradesh,31,12957,16353
123,Assam,31,1242697,292703
154,Bihar,31,2227472,459602
185,Chandigarh,31,46970,26120
216,Chhattisgarh,31,260467,213065
247,Dadra and Nagar Haveli and Daman and Diu,31,18093,11509
278,Delhi,31,506741,329534
309,Goa,31,25430,34850


#### finding total population in each state

In [8]:
census_state_data = census_data.groupby(['State']).sum().reset_index()
census_state_data.rename(columns={'State':'state'}, inplace=True)
census_state_data

Unnamed: 0,state,Total_Population
0,Andaman and Nicobar Islands,380581
1,Andhra Pradesh,84580777
2,Arunachal Pradesh,1383727
3,Assam,31205576
4,Bihar,104099452
5,Chandigarh,1055450
6,Chhattisgarh,25545198
7,Dadra and Nagar Haveli,343709
8,Daman and Diu,243247
9,Goa,1458545


#### merging vaccination dataset and population dataset

In [9]:
temp = pd.merge(state_vacine_dose_ratio, census_state_data, on=['state'])
merged_df = pd.merge(temp, last_week_vaccine_data, on=['state'])
merged_df.drop(columns=['vaccinateddose2ratio','dose2','week'], inplace=True)
merged_df

Unnamed: 0,state,vaccinateddose1ratio,Total_Population,dose1
0,Andaman and Nicobar Islands,0.669024,380581,17688
1,Andhra Pradesh,0.377624,84580777,1039018
2,Arunachal Pradesh,0.40749,1383727,12957
3,Assam,0.330196,31205576,1242697
4,Bihar,0.244371,104099452,2227472
5,Chandigarh,0.694925,1055450,46970
6,Chhattisgarh,0.28179,25545198,260467
7,Goa,0.763592,1458545,25430
8,Gujarat,0.454086,60439692,2758409
9,Haryana,0.375539,25351462,539785


#### finding no of people who are are not vaccinated even by first dose

In [11]:
merged_df['rem_population'] = (merged_df['Total_Population'] - (merged_df['Total_Population']*merged_df['vaccinateddose1ratio'])).apply(np.floor)

#### finding rate of vaccination

In [10]:
merged_df['rate_of_vaccination'] = (merged_df['dose1']/merged_df['Total_Population'])*100

#### findind date on which complete population of a state is expected to vaccinated at least by first dose

In [12]:
merged_df['date'] = pd.to_datetime('2021-08-14') + pd.to_timedelta(((merged_df['Total_Population']/merged_df['dose1'])*7).apply(np.floor), unit = 'D')

In [13]:
merged_df['rem_population'] = merged_df['rem_population'].astype('int')
merged_df

Unnamed: 0,state,vaccinateddose1ratio,Total_Population,dose1,rate_of_vaccination,rem_population,date
0,Andaman and Nicobar Islands,0.669024,380581,17688,4.647631,125963,2022-01-11
1,Andhra Pradesh,0.377624,84580777,1039018,1.228433,52641022,2023-03-06
2,Arunachal Pradesh,0.40749,1383727,12957,0.936384,819872,2023-08-31
3,Assam,0.330196,31205576,1242697,3.982291,20901620,2022-02-05
4,Bihar,0.244371,104099452,2227472,2.139754,78660525,2022-07-07
5,Chandigarh,0.694925,1055450,46970,4.450234,321991,2022-01-18
6,Chhattisgarh,0.28179,25545198,260467,1.019632,18346811,2023-07-01
7,Goa,0.763592,1458545,25430,1.743518,344812,2022-09-19
8,Gujarat,0.454086,60439692,2758409,4.563903,32994852,2022-01-14
9,Haryana,0.375539,25351462,539785,2.129207,15831002,2022-07-08


#### creating csv file for result

In [14]:
merged_df[['state','rem_population','rate_of_vaccination','date']].to_csv("complete-vaccination.csv", header=['stateid','populationleft','rateofvaccination','date'], index=False)