#### import libraries

In [6]:
#data sources 
# covid-19 cases in india - https://api.covid19india.org/documentation/csv/ 

In [7]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline

#pip install reverse_geocoder
import reverse_geocoder as rg

#json
import json

#for mode
from scipy import stats


#pip install -U googlemaps
#import googlemaps
#from datetime import datetime

In [8]:
#global constants 
green_zone_name = 'green'
yello_zone_name = 'yellow'
amber_zone_name = 'amber'
red_zone_name = 'red'

corrected_district_dict = {
                'Tirunelveli Kattabo': 'Tirunelveli',
                'Kanniyakumari':'Kanyakumari',
                'Tiruchchirappalli':'Tiruchirappalli',
                'Villupuram':'Viluppuram'
                }

In [9]:
#sample risk zone matrix
def create_sample_risk_zone_matrix(should_print):
    route_risk_zone_matrix = pd.DataFrame(columns=['red','amber','yellow','green','risk_score'], 
                                      index=['route1','route2'])
    #initialize first row
    route_risk_zone_matrix.loc['route1','red'] = 23
    route_risk_zone_matrix.loc['route1','amber'] = 5
    route_risk_zone_matrix.loc['route1','yellow'] = 1
    route_risk_zone_matrix.loc['route1','green'] = 3
    #initialize second row
    route_risk_zone_matrix.loc['route2','red'] = 6
    route_risk_zone_matrix.loc['route2','amber'] = 15
    route_risk_zone_matrix.loc['route2','yellow'] = 111
    route_risk_zone_matrix.loc['route2','green'] = 322
    #assign empty to risk score
    route_risk_zone_matrix['risk_score'] = ''
    
    if (should_print == True):
        print(route_risk_zone_matrix)
        
    return route_risk_zone_matrix

In [10]:
#empty zone matrix
def create_empty_risk_zone_matrix(route_list, should_print):
    route_risk_zone_matrix = pd.DataFrame(columns=['red','amber','yellow','green','risk_score'], 
                                      index=route_list)
        
    route_risk_zone_matrix['red'] = 0
    route_risk_zone_matrix['amber'] = 0
    route_risk_zone_matrix['yellow'] = 0
    route_risk_zone_matrix['green'] = 0
    route_risk_zone_matrix['risk_score'] = ''

    if (should_print == True):
        print('empty risk zone matrix :\n',route_risk_zone_matrix)
    return route_risk_zone_matrix


In [11]:
#function to calculate risk score
def calculate_risk_score(risk_zone_matrix, should_print):
    #https://queirozf.com/entries/pandas-dataframe-examples-column-operations
    max_number_length = len(np.str( np.max(risk_zone_matrix['red'])))
    risk_zone_matrix['risk_score'] = risk_zone_matrix['red'].map(lambda value: np.str(value).zfill(max_number_length))
    max_number_length = len(np.str( np.max(risk_zone_matrix['amber'])))
    risk_zone_matrix['risk_score'] += risk_zone_matrix['amber'].map(lambda value: np.str(value).zfill(max_number_length))
    max_number_length = len(np.str( np.max(risk_zone_matrix['yellow'])))
    risk_zone_matrix['risk_score'] += risk_zone_matrix['yellow'].map(lambda value: np.str(value).zfill(max_number_length))

    if (should_print):
        print(risk_zone_matrix)
        
    return risk_zone_matrix

In [12]:
def get_all_alternative_routes_latitude_longitude_dict(routes_json, should_print):
    #latitute and longitude : https://www.latlong.net/Show-Latitude-Longitude.html
    latitude_string='\'lat\': '
    longitude_string ='\'lng\': ';
    num = 0
    latitude_longitude_list = list()
    latitude_longitude_dict = dict()
    for single_route in routes_json:
        num += 1
        #print('ROUTE : {} -- START\n'.format(num))
        #print('{}:::{}'.format(num,single_route))
        #print('ROUTE : {} -- END\n'.format(num))
                                           
        legs = single_route['legs']
        steps = legs[0]['steps']
        if(should_print == True):
            print('ROUTE : {} -- START\n'.format(num))
        #below loop will extract the latitute and longitute of single route
        for step in steps:
            #print(str(step['start_location']))
            latitude_and_longitude = str(step['start_location'])
            latitude_index = latitude_and_longitude.find('\'lat\': ') + len(latitude_string)
            comma_index = latitude_and_longitude.find(',')

            #print latitude and longitude
            #print(latitude_and_longitude)
            longitude_index =latitude_and_longitude.find(longitude_string)  + len(longitude_string)
            last_index = len(latitude_and_longitude) - 1

            #print(latitude_and_longitude[latitude_index:comma_index])
            #print(latitude_and_longitude[longitude_index:last_index])
            if (should_print == True):
                print(rg.search((latitude_and_longitude[latitude_index:comma_index], latitude_and_longitude[longitude_index:last_index])))
            
            latitude_longitude_list.append([latitude_and_longitude[latitude_index:comma_index],latitude_and_longitude[longitude_index:last_index]])
        
        if(should_print == True):
            print('ROUTE : {} -- END\n'.format(num))
            
        latitude_longitude_dict['route_{}'.format(num)] =  latitude_longitude_list.copy()
        latitude_longitude_list.clear()
        if(should_print == True):
            print(latitude_longitude_list)
    return latitude_longitude_dict

In [13]:
def getindexdefault(self_list, elem, default):
    try:
        thing_index = self_list.index(elem)
        return thing_index
    except ValueError:
        return default

In [14]:
def get_route_districts_dictionary(latitude_longitude_dict, should_print):
    route_districts_dict = dict()
    districts_list = list()
    for dict_key in latitude_longitude_dict.keys():
        for place_latitude_longitude in latitude_longitude_dict[dict_key]:
            search_result = rg.search((place_latitude_longitude[0],place_latitude_longitude[1]))
            if (should_print == True):
                print(search_result)
                
            district_name = search_result[0]['admin2']
            #print('district : {}, index {}'.format(district_name, getindexdefault(districts_list,district_name,-1)))
            if (getindexdefault(districts_list,district_name,-1) == -1):
                districts_list.append(district_name)
            
        for district_incorrect_name in corrected_district_dict.keys():
            index = getindexdefault(districts_list, district_incorrect_name, -1)
            if (index != -1):
                districts_list[index] = corrected_district_dict[district_incorrect_name]
        
        route_districts_dict[dict_key] = districts_list.copy()
        if (should_print == True):
            print('District Name : ',districts_list)
        districts_list.clear()
    
    return route_districts_dict
    

In [15]:
def get_risk_zone_category(current_case_count, max_case_count):
    score = current_case_count / max_case_count
    #0    - 0.1 -> green
    #0.11 - 0.4 -> yellow
    #0.41 - 0.8 -> amber
    #0.81 - 1   -> red
    if(score <= 0.1):
        return green_zone_name
    if(score <= 0.4):
        return yello_zone_name
    if(score <= 0.8):
        return amber_zone_name
    return red_zone_name
        

In [16]:
def get_hot_spot_risk_zone_category(covid_19_dataset, zone_name, risk_count_column_name, max_case_count, neighbours, should_print):
    zone_category_list = list()
    place_list = list([zone_name])
       
    if (neighbours != None):
        neighbour_list = neighbours.split(',')
        place_list.extend(neighbour_list)

    #max_case_count = np.max(covid_19_dataset[risk_count_column_name])
    if (should_print == True):
        print('max case count :', max_case_count)
        
    for place in place_list:
        current_case_count = covid_19_dataset.loc[place, risk_count_column_name]
        
        risk_category = get_risk_zone_category(current_case_count, max_case_count)
        zone_category_list.append(risk_category)
        if (should_print == True):
            print(place, current_case_count, risk_category)
    
    risk_zone_category = stats.mode(zone_category_list)[0]
    if (should_print == True):
        print('zone name : {} | zone category : {}'.format(zone_name, risk_zone_category))
    return risk_zone_category    

In [17]:
def compute_risk_for_route_old(route_districts_dict, risk_column_name, max_case_count, neighbour_list, should_print):
    risk_zone_matrix = create_empty_risk_zone_matrix(route_districts_dict.keys(), should_print)
    for route_name in route_districts_dict.keys():
        if (should_print == True):
            print('computing risk for route : {}'.format(route_name))
            
        place_list = route_districts_dict[route_name]
        for place_name in place_list:
            #print('place_name:{}:'.format(place_name))
            risk_category = get_hot_spot_risk_zone_category(covid_19_dataset, place_name ,risk_column_name, max_case_count, None, should_print)
            risk_zone_matrix.loc[route_name, risk_category] += 1
        
        risk_zone_matrix = calculate_risk_score(risk_zone_matrix, should_print)
            
    
    return risk_zone_matrix

In [18]:
def compute_risk_for_route(route_districts_dict, risk_column_name, max_case_count, neighbour_dataset, should_print):
    risk_zone_matrix = create_empty_risk_zone_matrix(route_districts_dict.keys(), should_print)
    for route_name in route_districts_dict.keys():
        if (should_print == True):
            print('computing risk for route : {}'.format(route_name))
            
        place_list = route_districts_dict[route_name]
        for place_name in place_list:
            #print('place_name:{}:'.format(place_name))
            neighbours_list = None
            if (neighbour_dataset.empty == False):
                neighbours_list = neighbour_dataset.loc[place_name,'neighbour-districts']
                
            risk_category = get_hot_spot_risk_zone_category(covid_19_dataset, place_name ,risk_column_name, max_case_count, neighbours_list, should_print)
            risk_zone_matrix.loc[route_name, risk_category] += 1
        
        risk_zone_matrix = calculate_risk_score(risk_zone_matrix, should_print)
            
    
    return risk_zone_matrix

In [19]:
def get_direction_from_google_map(origin, destination):
    gmaps = googlemaps.Client(key='AIzaSyAslYW-agiklO_957yjpLrOtomGt6SuRu8')

    # Geocoding an address
    #geocode_result = gmaps.geocode('1600 Amphitheatre Parkway, Mountain View, CA')

    # Look up an address with reverse geocoding
    #reverse_geocode_result = gmaps.reverse_geocode((40.714224, -73.961452))

    # Request directions via public transit
    now = datetime.now()
    #alternatives — If set to true, specifies that the Directions service may provide more than one route alternative in the response. Note that providing route alternatives may increase the response time from the server. This is only available for requests without intermediate waypoints.
    directions_result = gmaps.directions(origin,
                                         destination,
                                         mode="driving",
                                         alternatives=True,
                                         departure_time=now)
    
    return directions_result

In [20]:
#functions to load dataset

In [21]:
#load route detail from json file
# Open the existing json file for loading into a variable
#online json viewer : http://jsonviewer.stack.hu/
#online json formatter : https://jsonformatter.curiousconcept.com/
with open("../data/low-risk-covid-19-map-data/data.json",encoding='utf8') as file:    
    routes_dataset_json = json.load(file)

In [22]:
#load neighbour district list dataset
district_neighbour_dataset =pd.read_csv('..\data\low-risk-covid-19-map-data\district-neighbours-selected.csv', index_col='current-district')
#district_neighbour_dataset.index

In [23]:
#load the covid-19 dataset
covid_19_dataset = pd.read_csv('..\data\low-risk-covid-19-map-data\district_wise.csv',index_col='District')

#### experiment section

In [24]:
#test risk zone matrix and score calcultion
temp_risk_zone_matrix = create_sample_risk_zone_matrix(True)


       red amber yellow green risk_score
route1  23     5      1     3           
route2   6    15    111   322           


In [25]:
temp_risk_zone_matrix = calculate_risk_score(temp_risk_zone_matrix, True)

       red amber yellow green risk_score
route1  23     5      1     3    2305001
route2   6    15    111   322    0615111


In [26]:
#enable the below section only if required
#directions = get_direction_from_google_map('Kanyakumari', 'Chennai')
#directions

In [27]:
#experiment 1 - no neighbours
routes_latitude_longitude_dict = get_all_alternative_routes_latitude_longitude_dict(routes_dataset_json, False)
route_districts_dict = get_route_districts_dictionary(routes_latitude_longitude_dict, False)
#routes_latitude_longitude_dict

Loading formatted geocoded file...


In [28]:
#first experiment - without considering neighbours
risk_column_name ='Active'
max_case_count = np.max(covid_19_dataset[covid_19_dataset['State']=='Tamil Nadu'][risk_column_name])


In [29]:

route_and_risk_score = compute_risk_for_route(route_districts_dict, risk_column_name, max_case_count, pd.DataFrame(), False)

In [30]:
route_and_risk_score

Unnamed: 0,red,amber,yellow,green,risk_score
route_1,1,0,7,2,107
route_2,1,0,6,6,106
route_3,1,0,6,6,106


In [31]:
#experiment 2 - consider neighbours
#to compute max_case_count , take max case of neighbour states' district 
max_case_count_tamilnadu =np.max(covid_19_dataset[covid_19_dataset['State']=='Tamil Nadu'][risk_column_name])
max_case_count_andra = np.max(covid_19_dataset[covid_19_dataset['State']=='Andhra Pradesh'][risk_column_name])
max_case_count_kerala = np.max(covid_19_dataset[covid_19_dataset['State']=='Kerala'][risk_column_name])
max_case_count_karnataka = np.max(covid_19_dataset[covid_19_dataset['State']=='Karnataka'][risk_column_name])
max_neighbour_case_count = np.max([max_case_count_tamilnadu, max_case_count_andra,max_case_count_kerala,  max_case_count_karnataka])  
print('max_case_count : ', max_case_count)
compute_risk_for_route(route_districts_dict, risk_column_name, max_neighbour_case_count, district_neighbour_dataset, False)

max_case_count :  12855


Unnamed: 0,red,amber,yellow,green,risk_score
route_1,0,0,2,8,2
route_2,0,0,2,11,2
route_3,0,0,2,11,2
