<a href="https://colab.research.google.com/github/punit0087/Covid19India.Org-Tracker/blob/master/Covid19IndiaTracker.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [23]:
import numpy as np
import pandas as pd
import requests
from math import sin, cos, sqrt, atan2, radians
import os

In [24]:
#https://drive.google.com/file/d/1FBbr9zubK4VsB6gmNzj4fvE204ngvCeZ/view?usp=sharing
def download_file_from_google_drive(id, destination):
    URL = "https://docs.google.com/uc?export=download"

    session = requests.Session()

    response = session.get(URL, params = { 'id' : id }, stream = True)
    token = get_confirm_token(response)

    if token:
        params = { 'id' : id, 'confirm' : token }
        response = session.get(URL, params = params, stream = True)

    save_response_content(response, destination)    

def get_confirm_token(response):
    for key, value in response.cookies.items():
        if key.startswith('download_warning'):
            return value

    return None

def save_response_content(response, destination):
    CHUNK_SIZE = 32768

    with open(destination, "wb") as f:
        for chunk in response.iter_content(CHUNK_SIZE):
            if chunk: # filter out keep-alive new chunks
                f.write(chunk)

if __name__ == "__main__":
    file_id = '1FBbr9zubK4VsB6gmNzj4fvE204ngvCeZ'
    destination = 'IndiaPostalCodes.csv'

    #If file is not present in current directory, then download file from google_drive.
    files_in_dir = os.listdir()
    if 'IndiaPostalCodes.csv' not in files_in_dir :
        download_file_from_google_drive(file_id, destination)

In [25]:

def get_distance_between_lats_lons(lat1,lon1,lat2,lon2):
# approximate radius of earth in km
        R = 6373

        lat1 = radians(lat1)
        lon1 = radians(lon1)
        lat2 = radians(lat2)
        lon2 = radians(lon2)

        dlon = lon2 - lon1
        dlat = lat2 - lat1

        a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
        c = 2 * atan2(sqrt(a), sqrt(1 - a))

        distance = R * c
        return(distance)

In [26]:
file= 'IndiaPostalCodes.csv'
city_wise_coordinates= pd.read_csv(file)
city_wise_coordinates['City'] = city_wise_coordinates['City'].str.upper()
city_wise_coordinates['District'] = city_wise_coordinates['District'].str.upper()
city_wise_coordinates['State'] = city_wise_coordinates['State'].str.upper()
#city_wise_coordinates

In [27]:
district_wise_pin_states= city_wise_coordinates.groupby('District')['PIN','State'].agg(pd.Series.mode)
district_wise_lat_lng= city_wise_coordinates.groupby('District')['Lat','Lng'].agg(pd.Series.mean)

district_wise_data_geonames= district_wise_pin_states.merge(district_wise_lat_lng,left_on='District', right_on='District', how= 'inner').reset_index()
#district_wise_data_geonames

In [28]:
df= pd.read_json("https://api.covid19india.org/raw_data.json")## data from covid19india.org
df3= []
for row in range(0,df.shape[0]):
    df1= df['raw_data'][row]
    df2=pd.DataFrame(df1.items()).set_index(0)
    df3.append(df2.T)
# Converting all raw data to data frame
appended_data = pd.concat(df3, sort=False)
appended_data.replace(r'^\s*$', np.nan, regex=True, inplace = True)
appended_data.rename(columns={'detectedcity':'City'}, inplace=True)
appended_data.rename(columns={'detecteddistrict':'District'}, inplace=True)
appended_data.rename(columns={'detectedstate':'State'}, inplace=True)

appended_data['City'] = appended_data['City'].str.lower()
appended_data['District'] = appended_data['District'].str.lower()
appended_data['State'] = appended_data['State'].str.lower()
appended_data= appended_data.dropna(thresh=3)


In [29]:

# Grouping the raw data of patients according to Districts
district_wise_counts= appended_data.groupby('District').agg({'patientnumber': 'count'})
district_wise_counts.rename(columns={'patientnumber':'d_patient_counts'}, inplace=True)
district_wise_counts =district_wise_counts.reset_index()
district_wise_counts['District'] = district_wise_counts['District'].str.upper()

In [30]:
# Grouping the all the patients data with district and getting the longitude and Latitude from the ditrict postal data
corona_db_with_latlng= district_wise_counts.merge(district_wise_data_geonames, left_on='District', right_on='District', how= 'inner')
corona_db_with_latlng.rename(columns={'d_patient_counts':'Num_Positive_cases'}, inplace=True)

In [31]:
import warnings
warnings.filterwarnings("ignore")

In [32]:
def get_idx_distance_from_query_locations(q_lat, q_lng, corona_db_with_latlng):
    dist_array=[]
    for index, row in corona_db_with_latlng.iterrows():
        dist= int(get_distance_between_lats_lons(q_lat,q_lng,row['Lat'],row['Lng']))
        dist_array.append(dist)

    minpos = dist_array.index(min(dist_array))
    mindist= dist_array[minpos]
    cases= corona_db_with_latlng.loc[minpos,'Num_Positive_cases']
    location= corona_db_with_latlng.loc[minpos,'District']
    state= corona_db_with_latlng.loc[minpos,'State']
    Lats= corona_db_with_latlng.loc[minpos,'Lat']
    Lngs= corona_db_with_latlng.loc[minpos,'Lng']

    return(mindist, cases, location, state ,Lats , Lngs)

In [41]:
def get_nearest_covid19_stats_pincode(query_info,corona_db_with_latlng):
    # First we check if there is any case reported in same postal are.
    # For this we check the presence pf parsed pinCode in the recived data thorugh api i.e.  corona_db_with_latlng
    # Also we set mindist to 1-2 km ,since a postal area is in general of 2 km.
    if query_info.PIN.iloc[0] in corona_db_with_latlng['PIN'].values:
        mindist= 2
        print('if mein hai hum')
        Lat= corona_db_with_latlng.loc[corona_db_with_latlng.PIN==query_info.PIN.iloc[1], 'Lat'];
        Lng= corona_db_with_latlng.loc[corona_db_with_latlng.PIN==query_info.PIN.iloc[1], 'Lng'];
        mindist= int(get_distance_between_lats_lons(query_info.Lat.iloc[1] ,query_info.Lng.iloc[1], Lat,Lng))
        cases= corona_db_with_latlng.loc[corona_db_with_latlng.PIN==query_info.PIN.iloc[1], 'Num_Positive_cases']
        district= corona_db_with_latlng.loc[corona_db_with_latlng.PIN==query_info.PIN.iloc[1], 'District']
        state= corona_db_with_latlng.loc[corona_db_with_latlng.PIN==query_info.PIN.iloc[1], 'State']
        print("The nearest location with COVID-19 from your PIN is in your own Postal Location with {} number of positive cases".format(cases.values[0]))
        print("Location: {} , {}".format(district.values[0].upper(), state.values[0].upper()))
        return (1 , cases.values[0] , district.values[0].upper() , state.values[0].upper() , Lat , Lng)
    else:
        # This else statement is for if no case in same postal area.
        (mindist, cases, district, state , Lat, Lng) = get_idx_distance_from_query_locations(query_info.Lat.iloc[0] ,query_info.Lng.iloc[0], corona_db_with_latlng)
        print("The nearest location with COVID-19 from your PIN is within {} km with {} number of positive cases".format(mindist, cases))
        print("Location: {} , {}".format(district.upper(), state.upper()))
        return (mindist,cases,district.upper(), state.upper(), Lat , Lng)
    

        
def get_nearest_covid19_stats_lat_lng(query_info,corona_db_with_latlng):
    # Here we try to get data through
    # Sometimes one pin can point to morethan one location.        
    (mindist, cases, district, state , Lat, Lng) = get_idx_distance_from_query_locations(query_info['Lat'] ,query_info['Lng'], corona_db_with_latlng)  
    print("The nearest location with COVID-19 from your GPS location is within {} km with {} number of positive cases".format(mindist, cases))
    print("Location: {} , {}".format(district.upper(), state.upper()))
    return (mindist,cases,district.upper(), state.upper(), Lat , Lng)


In [44]:
query_pincode = input("Enter your location as an Indian PIN  : ")
g = int(query_pincode)
    #print('call_received at script 2')
if g in city_wise_coordinates.PIN.values:
    query_info= city_wise_coordinates[city_wise_coordinates.PIN == int(g)]
        #print('No error in script 2')
    (mindist, cases, district, state , Lat, Lng) = get_nearest_covid19_stats_pincode(query_info,corona_db_with_latlng)
else:
    print('You entered an Invalid PIN')


Enter your location as an Indian PIN  :  283111


The nearest location with COVID-19 from your PIN is within 21 km with 9 number of positive cases
Location: AGRA , UTTAR PRADESH


In [39]:
# To enable nearest search using current location.   
Latitude = 23.631247 # input("Enter you lat")
Longitude = 86.135352 # input("Enter your lng")
query_info = {'Lat':Latitude , 'Lng' : Longitude}
get_nearest_covid19_stats_lat_lng(query_info,corona_db_with_latlng)

The nearest location with COVID-19 from your GPS location is within 88 km with 1 number of positive cases
Location: RANCHI , JHARKHAND


(88, 1, 'RANCHI', 'JHARKHAND', 23.335075124378108, 85.33391343283581)

In [164]:
state_wise_counts= appended_data.groupby(['State' , 'District']).agg({'patientnumber': 'count'})
state_wise_counts.rename(columns={'patientnumber':'d_patient_counts'}, inplace=True)
state_wise_counts =state_wise_counts.reset_index()
state_wise_counts['State'] = state_wise_counts['State'].str.upper()
state_wise_counts['District'] = state_wise_counts['District'].str.upper()

In [165]:
print(appended_data['District'].isna().sum())
print(appended_data['City'].isna().sum())
print(appended_data['State'].isna().sum())

327
1191
0


In [38]:
corona_db_with_latlng['PIN']

0                                283111
1                                796261
2                                305801
3      [688504, 688526, 688561, 690503]
4                                301001
                     ...               
156                              531040
157                              673592
158                              110018
159            [534315, 534444, 534455]
160                              445304
Name: PIN, Length: 161, dtype: object

In [146]:
d=district_wise_data_geonames[district_wise_data_geonames['State']=='ANDHRA PRADESH']

In [166]:
d

Unnamed: 0,District,PIN,State,Lat,Lng
20,ANANTHAPUR,515865,ANDHRA PRADESH,14.392188,77.677661
116,CHITTOOR,517247,ANDHRA PRADESH,13.570778,79.039442
123,CUDDAPAH,"[516175, 516267]",ANDHRA PRADESH,14.46318,78.754036
166,EAST GODAVARI,533352,ANDHRA PRADESH,17.05394,81.864499
212,GUNTUR,522647,ANDHRA PRADESH,16.216747,80.177463
338,KRISHNA,521185,ANDHRA PRADESH,16.607002,80.859357
343,KURNOOL,518502,ANDHRA PRADESH,15.475624,77.973648
431,NELLORE,524002,ANDHRA PRADESH,14.459547,79.761635
465,PEDDAPALLI,505416,ANDHRA PRADESH,18.5387,79.0803
474,PRAKASAM,523201,ANDHRA PRADESH,15.559616,79.622014


In [168]:
query_info= city_wise_coordinates[city_wise_coordinates.PIN == int(516175)]

In [169]:
query_info.head()

Unnamed: 0,Country,PIN,City,State,State_ID,District,District_ID,Community,Lat,Lng,Loc_accuracy
7391,IN,516175,JILLELLA,ANDHRA PRADESH,2,CUDDAPAH,551.0,,14.4599,78.7836,4
7392,IN,516175,GUDIPADU,ANDHRA PRADESH,2,CUDDAPAH,551.0,,14.8172,78.6814,4
7393,IN,516175,BAYANAPALLE,ANDHRA PRADESH,2,CUDDAPAH,551.0,Duvvur,14.5698,78.8097,4
7394,IN,516175,CHINNASINGANAPALLE,ANDHRA PRADESH,2,CUDDAPAH,551.0,,14.842,78.6974,4
7395,IN,516175,ERRAPALLE,ANDHRA PRADESH,2,CUDDAPAH,551.0,Duvvur,14.7413,78.826,4


In [171]:
appended_data['patientnumber'].isna().sum()

0