### Importing libraries that will be useful :-

In [2]:
import pandas as pd 
import numpy as np
from pprint import pprint
import warnings 
warnings.filterwarnings('ignore')
import datetime
from dateutil import relativedelta
import re

# Question 3 solution : 

Reading data from districts.csv file i.e covid data :-

In [7]:
covid_data = pd.read_csv('districts.csv', usecols = ['Date','State','District','Confirmed'], low_memory = True)
# covid_data

Since some district names are same across states, so first lets create district ID for every district and then drop state and district coloumns as they will no longer be required.

In [8]:
covid_data["District_ID"] = covid_data["State"] + '_' + covid_data["District"]
covid_data = covid_data.drop(columns=["State","District"])
covid_data = covid_data[['District_ID','Date','Confirmed']]
# covid_data

In [10]:
covid_data

Unnamed: 0,District_ID,Date,Confirmed
0,Andaman and Nicobar Islands_Unknown,2020-04-26,33
1,Andhra Pradesh_Anantapur,2020-04-26,53
2,Andhra Pradesh_Chittoor,2020-04-26,73
3,Andhra Pradesh_East Godavari,2020-04-26,39
4,Andhra Pradesh_Guntur,2020-04-26,214
...,...,...,...
320401,West Bengal_Purba Bardhaman,2021-09-03,40647
320402,West Bengal_Purba Medinipur,2021-09-03,61772
320403,West Bengal_Purulia,2021-09-03,19290
320404,West Bengal_South 24 Parganas,2021-09-03,97821


## Calculating overall cases for every district :-

Since the data is cumulative and in question it is asked to calculate cases upto 14th August,2021 so we took data directly of 14th August,2021. This will give all the total cases upto that date.

In [11]:
cases_overall = covid_data[covid_data['Date'] == '2021-08-14']
cases_overall = cases_overall.reset_index()
cases_overall = cases_overall.drop(columns=["index"])
cases_overall.rename(columns = {'Date':'timeid','District_ID':'districtid','Confirmed':'cases'}, inplace = True)
cases_overall['timeid'] = 'overall'
# cases_overall

In [12]:
# cases_overall

#### So now saving overall cases calculated at each district as "cases-overall.csv" :-

In [13]:
cases_overall.to_csv('cases-overall.csv', index=False)

## Calculating monthly cases for every district :-

In [17]:
monthly_data = covid_data.copy()
# pd.to_datetime(monthly_data['Date'])
# monthly_data['Date'] = pd.to_datetime(monthly_data['Date'])
monthly_data

Unnamed: 0,District_ID,Date,Confirmed
0,Andaman and Nicobar Islands_Unknown,2020-04-26,33
1,Andhra Pradesh_Anantapur,2020-04-26,53
2,Andhra Pradesh_Chittoor,2020-04-26,73
3,Andhra Pradesh_East Godavari,2020-04-26,39
4,Andhra Pradesh_Guntur,2020-04-26,214
...,...,...,...
320401,West Bengal_Purba Bardhaman,2021-09-03,40647
320402,West Bengal_Purba Medinipur,2021-09-03,61772
320403,West Bengal_Purulia,2021-09-03,19290
320404,West Bengal_South 24 Parganas,2021-09-03,97821


In [15]:
monthly_data['District_ID'] = monthly_data['District_ID'] + '_' + monthly_data['Date']
monthly_data = monthly_data.drop(columns=["Date"])
monthly_data = monthly_data.set_index(['District_ID'])
# monthly_data

In [16]:
monthly_data

Unnamed: 0_level_0,Confirmed
District_ID,Unnamed: 1_level_1
Andaman and Nicobar Islands_Unknown_2020-04-26,33
Andhra Pradesh_Anantapur_2020-04-26,53
Andhra Pradesh_Chittoor_2020-04-26,73
Andhra Pradesh_East Godavari_2020-04-26,39
Andhra Pradesh_Guntur_2020-04-26,214
...,...
West Bengal_Purba Bardhaman_2021-09-03,40647
West Bengal_Purba Medinipur_2021-09-03,61772
West Bengal_Purulia_2021-09-03,19290
West Bengal_South 24 Parganas_2021-09-03,97821


In [34]:
def case_this_day(district_ID,date):
    Id = district_ID + '_' + str(date)
    try:
        cases = monthly_data.at[Id,'Confirmed']
    except:
        cases = 0
    return cases 

In [35]:
def return_date(date_case):
    date_case = str(date_case)
    match_str = re.search(r'\d{4}-\d{2}-\d{2}', date_case)
    res = datetime.datetime.strptime(match_str.group(), '%Y-%m-%d').date()
    return str(res)

In [36]:
district_ids = cases_overall[['districtid']].copy()
temp_df = pd.DataFrame()
list = ['districtid','timeid','cases']
monthly_data1 = pd.DataFrame(columns=list)
start_date = datetime.datetime.strptime('2020/04/26','%Y/%m/%d')
end_date = datetime.datetime.strptime('2021/08/14','%Y/%m/%d')
j = 0
for i in district_ids.index:
    m_no = 1
    district = district_ids.iloc[i]['districtid']
    while start_date < end_date:
        next_date = start_date + relativedelta.relativedelta(months=1) - datetime.timedelta(days=1)
        cases = case_this_day(district,return_date(next_date)) - case_this_day(district,return_date(start_date))
        monthly_data1['districtid'] = [district]
        monthly_data1['timeid'] = m_no
        monthly_data1['cases'] = cases
        temp_df = temp_df.append(monthly_data1, ignore_index=True)
        m_no += 1
        start_date += relativedelta.relativedelta(months=1)
    start_date = datetime.datetime.strptime('2020/04/26','%Y/%m/%d')
# temp_df

In [39]:
monthly_data2 = temp_df.copy()
monthly_data2

Unnamed: 0,districtid,timeid,cases
0,Andaman and Nicobar Islands_Unknown,1,0
1,Andaman and Nicobar Islands_Unknown,2,25
2,Andaman and Nicobar Islands_Unknown,3,218
3,Andaman and Nicobar Islands_Unknown,4,2627
4,Andaman and Nicobar Islands_Unknown,5,774
...,...,...,...
10523,West Bengal_Uttar Dinajpur,12,2020
10524,West Bengal_Uttar Dinajpur,13,7413
10525,West Bengal_Uttar Dinajpur,14,2368
10526,West Bengal_Uttar Dinajpur,15,380


In [40]:
temp_d = pd.DataFrame()
gb = monthly_data2.groupby(['timeid'])
for x in gb.groups:
    temp_d = temp_d.append(gb.get_group(x),ignore_index=True)
    
temp_d["timeid"] = temp_d["timeid"].astype(str)
temp_d['timeid'] = 'month_' + temp_d['timeid']
# temp_d

In [41]:
temp_d

Unnamed: 0,districtid,timeid,cases
0,Andaman and Nicobar Islands_Unknown,month_1,0
1,Andhra Pradesh_Anantapur,month_1,83
2,Andhra Pradesh_Chittoor,month_1,135
3,Andhra Pradesh_East Godavari,month_1,20
4,Andhra Pradesh_Foreign Evacuees,month_1,62
...,...,...,...
10523,West Bengal_Purba Bardhaman,month_16,614
10524,West Bengal_Purba Medinipur,month_16,1253
10525,West Bengal_Purulia,month_16,115
10526,West Bengal_South 24 Parganas,month_16,1623


#### So now saving monthly cases calculated at each district as "cases-month.csv" :-

In [13]:
temp_d.to_csv('cases-month.csv', index=False)

## Calculating weekly cases for every district :-

In [14]:
weekly_data = pd.DataFrame()
list = ['districtid','timeid','cases']
weekly_data1 = pd.DataFrame(columns=list)
start_date = datetime.datetime.strptime('2020/04/26','%Y/%m/%d')
end_date = datetime.datetime.strptime('2021/08/14','%Y/%m/%d')
for i in district_ids.index:
    w_no = 1
    district = district_ids.iloc[i]['districtid']
    while start_date < end_date:
        next_date = start_date + relativedelta.relativedelta(weeks=1) - datetime.timedelta(days=1)
        cases = case_this_day(district,return_date(next_date)) - case_this_day(district,return_date(start_date))
        weekly_data1['districtid'] = [district]
        weekly_data1['timeid'] = w_no
        weekly_data1['cases'] = cases
        weekly_data = weekly_data.append(weekly_data1, ignore_index=True)
        w_no += 1
        start_date += relativedelta.relativedelta(weeks=1)
    start_date = datetime.datetime.strptime('2020/04/26','%Y/%m/%d')
# weekly_data

In [42]:
weeklydata = weekly_data.copy()
weeklydata
# district_ids

Unnamed: 0,districtid,timeid,cases
0,Andaman and Nicobar Islands_Unknown,1,0
1,Andaman and Nicobar Islands_Unknown,2,0
2,Andaman and Nicobar Islands_Unknown,3,0
3,Andaman and Nicobar Islands_Unknown,4,0
4,Andaman and Nicobar Islands_Unknown,5,0
...,...,...,...
44739,West Bengal_Uttar Dinajpur,64,61
44740,West Bengal_Uttar Dinajpur,65,57
44741,West Bengal_Uttar Dinajpur,66,40
44742,West Bengal_Uttar Dinajpur,67,56


In [43]:
temp_d1 = pd.DataFrame()
gb = weeklydata.groupby(['timeid'])
for x in gb.groups:
    temp_d1 = temp_d1.append(gb.get_group(x),ignore_index=True)
    
temp_d1["timeid"] = temp_d1["timeid"].astype(str)
temp_d1['timeid'] = 'Week_' + temp_d1['timeid']
# temp_d1

In [44]:
temp_d1

Unnamed: 0,districtid,timeid,cases
0,Andaman and Nicobar Islands_Unknown,Week_1,0
1,Andhra Pradesh_Anantapur,Week_1,18
2,Andhra Pradesh_Chittoor,Week_1,7
3,Andhra Pradesh_East Godavari,Week_1,6
4,Andhra Pradesh_Foreign Evacuees,Week_1,0
...,...,...,...
44739,West Bengal_Purba Bardhaman,Week_68,96
44740,West Bengal_Purba Medinipur,Week_68,249
44741,West Bengal_Purulia,Week_68,14
44742,West Bengal_South 24 Parganas,Week_68,342


#### So now saving weekly cases calculated at each district as "cases-week.csv" :-

In [17]:
temp_d1.to_csv('cases-week.csv', index=False)

# Q3 completed..:)