In [4]:
import math
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from gmplot import gmplot
import googlemaps
import random
import folium
from folium.plugins import HeatMap
import os

In [None]:
# read in csv as df (with the first column as index_col)
data = pd.read_csv("./data/Jan19_SH.csv", index_col=0)

# remove unwanted columns
unwanted=['opdhVbp', 'vstId', 'vstDate', 'opdhVpulse',
          'opdhVtemp', 'opdhDhtn', 'opdhDihd', 'opdhDhyper',
          'pntAge', 'opdhVbmi', 'opdhVheight', 'opdhVweight',
          'opdhDdm', 'vstAmount', 'vsrcId']
data.drop(unwanted, inplace=True, axis=1)
# inplace=true tells pd to make the changes in our object
# axis=1 - dropping columns

# check for empty cells and replace with 'Pakistan' (default)
print('number of empty cells', data.isnull().sum())
data['pntAddress']=data['pntAddress'].fillna('Pakistan')
print('number of empty cells after fillna', data.isnull().sum(), '\n')

# check the types of data in the addresses column
print('type of data in the df ', data.dtypes)
print('shape of dataFrame ', np.shape(data), '\n')

In [None]:
# need to use googlemaps to find lat and loong of addresses
with open('api.txt') as f:
    myAPIkey=f.readline()
# set the api key first
gmaps_key=googlemaps.Client(key = myAPIkey)

# create empty Lon and Lat columns ... and google maps name
# geocode_object should be included too 
data["LAT"] = None
data["LON"] = None
data["GMAPS_NAME"] = None
data["GEOCODE_OBJECT"] = None
data["PARTIAL_RESULT"] = None

# find lat, lon for a random 50 addresses and add them to the dataFrame
chosen=random.sample(range(0, len(data)), 50)
for i in chosen:
    geocode_result = gmaps_key.geocode(data['pntAddress'][i]+', Pakistan')
    try:
        lat = geocode_result[0]["geometry"]["location"]["lat"]
        lon = geocode_result[0]["geometry"]["location"]["lng"]
        gname = geocode_result[0]["formatted_address"]
        gdata = geocode_result[0]
#         partial = checkPartialMatch(geocode_result)
        data.iat[i, data.columns.get_loc("LAT")] = lat
        data.iat[i, data.columns.get_loc("LON")] = lon
        data.iat[i, data.columns.get_loc("GMAPS_NAME")] = gname
        data.iat[i, data.columns.get_loc("GEOCODE_OBJECT")] = gdata
#         data.iat[i, data.columns.get_loc("PARTIAL_RESULT")] = partial
    except:
        gname = None
        lat = None 
        lon = None
        print("couldn't find address", i, data['pntAddress'][i])

# isloate the lon and lat data
latList=data["LAT"].values;
lonList=data["LON"].values;

# remove any None values for where lon and lat data could not be found
# or where address was not searched for
latList=latList[latList != np.array(None)]
lonList=lonList[lonList != np.array(None)]

In [None]:
import csv
with open(r'./data/PtData.csv','r') as f:
    reader = csv.reader(f)
    linenumber = 1
    try:
        for row in reader:
            linenumber += 1
    except Exception as e:
        print (("Error line %d: %s %s" % (linenumber, str(type(e)), e.message)))

In [None]:
data=pd.read_csv('./data/PtData.csv', index_col=0)

In [None]:
data['pntAddress'].str.contains('\r', regex=False).sum()

In [None]:
# remove unwanted columns
# unwanted=['opdhVbp', 'vstId', 'vstDate', 'opdhVpulse',
#           'opdhVtemp', 'opdhDhtn', 'opdhDihd', 'opdhDhyper',
#           'pntAge', 'opdhVbmi', 'opdhVheight', 'opdhVweight',
#           'opdhDdm', 'vstAmount', 'vsrcId']
# data.drop(unwanted, inplace=True, axis=1)
# inplace=true tells pd to make the changes in our object
# axis=1 - dropping columns

data = data.replace(r'\r','', regex=True) 

# check for empty cells and replace with 'Pakistan' (default)
print('number of empty cells', data.isnull().sum())
data['pntAddress']=data['pntAddress'].fillna('Pakistan')
print('number of empty cells after fillna', data.isnull().sum(), '\n')

# check the types of data in the addresses column
print('type of data in the df ', data.dtypes)
print('shape of dataFrame ', np.shape(data), '\n')

In [None]:
data.to_csv('./data/CleanPtData.csv')

In [57]:
# create dataframe
data = pd.read_csv('../data/GMapsAddress_1329.csv', index_col=0)
# isloate the lon and lat data
latList=data["LAT"].values;
lonList=data["LON"].values;


print(np.isnan(latList).sum())


132


In [61]:
data.dtypes
data.sample(50)

Unnamed: 0_level_0,pntAge,pntGender,pntAddress,pntRegDate,LAT,LON,GMAPS_NAME,GEOCODE_OBJECT,PARTIAL_RESULT
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
10057,24.0,F,peshawar,05/01/2018 15:03,34.015137,71.524915,"Peshawar, Khyber Pakhtunkhwa, Pakistan",{'address_components': [{'long_name': 'Peshawa...,
14822,35.0,F,"Bara Gate,Peshawar",11/12/2018 10:54,33.982726,71.524778,"2 Bara Road., Umeedabad, Peshawar, Khyber Pakh...","{'address_components': [{'long_name': '2', 'sh...",
10777,65.0,M,peshawar saddar,26/02/2018 12:58,34.009578,71.560925,"Saddar Rd, Finance Department, Civil Secretari...",{'address_components': [{'long_name': 'Saddar ...,
5076,48.0,M,hayatabad,03/12/2016 12:18,33.974411,71.435873,"Hayatabad, Peshawar, Khyber Pakhtunkhwa, Pakistan",{'address_components': [{'long_name': 'Hayatab...,
5219,0.0,M,HAYATABAD,16/12/2016 09:39,33.974411,71.435873,"Hayatabad, Peshawar, Khyber Pakhtunkhwa, Pakistan",{'address_components': [{'long_name': 'Hayatab...,
10119,57.0,M,dir,10/01/2018 12:11,35.19766,71.874921,"Dir, Khyber Pakhtunkhwa, Pakistan","{'address_components': [{'long_name': 'Dir', '...",
3101,60.0,F,Sufaid Deri Peshawar,23/05/2016 09:59,33.975103,71.492269,"Sufaid Dheri, Peshawar, Khyber Pakhtunkhwa, Pa...",{'address_components': [{'long_name': 'Sufaid ...,True
14404,62.0,M,Parachinar (Naeem Khattak),07/11/2018 12:21,33.901124,70.086041,"Parachinar, Kurram Agency, Federally Administe...",{'address_components': [{'long_name': 'Parachi...,True
12404,50.0,M,malakand dargi kpk,20/06/2018 10:47,34.507495,71.898636,"Dargai, Malakand, Khyber Pakhtunkhwa, Pakistan",{'address_components': [{'long_name': 'Dargai'...,
8214,55.0,F,karak,27/07/2017 09:32,33.110479,71.091375,"Karak, Khyber Pakhtunkhwa, Pakistan","{'address_components': [{'long_name': 'Karak',...",


In [59]:
# remove any None values for where lon and lat data could not be found
# or where address was not searched for
latList=latList[np.isfinite(latList)];
lonList=lonList[np.isfinite(lonList)];



In [60]:
print(np.isnan(latList).sum())
print(latList)

0
[ 33.9220536  34.0151366  34.0151366 ...,  35.2227114  33.93911    32.98648  ]


In [35]:
counter=0
for i, row in data.iterrows():
    if row['LAT']!=row['LAT']:
        print(row)
        counter+=1
print(counter)

pntAge                                  46
pntGender                                F
pntAddress        Baharat Phase 3 Peshawar
pntRegDate                13/10/2015 10:52
LAT                                    NaN
LON                                    NaN
GMAPS_NAME                             NaN
GEOCODE_OBJECT                         NaN
PARTIAL_RESULT                         NaN
Name: 627, dtype: object
pntAge                          43
pntGender                        M
pntAddress                Phase 5 
pntRegDate        22/10/2015 10:26
LAT                            NaN
LON                            NaN
GMAPS_NAME                     NaN
GEOCODE_OBJECT                 NaN
PARTIAL_RESULT                 NaN
Name: 668, dtype: object
pntAge                          55
pntGender                        F
pntAddress                 Phase 1
pntRegDate        26/10/2015 09:25
LAT                            NaN
LON                            NaN
GMAPS_NAME                     NaN
GEO

pntAge                          27
pntGender                        M
pntAddress                 Phase 1
pntRegDate        28/03/2016 14:51
LAT                            NaN
LON                            NaN
GMAPS_NAME                     NaN
GEOCODE_OBJECT                 NaN
PARTIAL_RESULT                 NaN
Name: 2140, dtype: object
pntAge                          30
pntGender                        M
pntAddress                 phase 1
pntRegDate        11/04/2016 08:43
LAT                            NaN
LON                            NaN
GMAPS_NAME                     NaN
GEOCODE_OBJECT                 NaN
PARTIAL_RESULT                 NaN
Name: 2397, dtype: object
pntAge                          34
pntGender                        M
pntAddress                 PHASE 5
pntRegDate        16/04/2016 10:10
LAT                            NaN
LON                            NaN
GMAPS_NAME                     NaN
GEOCODE_OBJECT                 NaN
PARTIAL_RESULT                 NaN
Nam

pntAge                          27
pntGender                        M
pntAddress                 PHASE 1
pntRegDate        10/09/2016 11:33
LAT                            NaN
LON                            NaN
GMAPS_NAME                     NaN
GEOCODE_OBJECT                 NaN
PARTIAL_RESULT                 NaN
Name: 4098, dtype: object
pntAge                          38
pntGender                        M
pntAddress                 phase 1
pntRegDate        16/09/2016 11:12
LAT                            NaN
LON                            NaN
GMAPS_NAME                     NaN
GEOCODE_OBJECT                 NaN
PARTIAL_RESULT                 NaN
Name: 4105, dtype: object
pntAge                          56
pntGender                        M
pntAddress                 phase 1
pntRegDate        19/09/2016 10:17
LAT                            NaN
LON                            NaN
GMAPS_NAME                     NaN
GEOCODE_OBJECT                 NaN
PARTIAL_RESULT                 NaN
Nam

Name: 5282, dtype: object
pntAge                          50
pntGender                        F
pntAddress                 Phase 1
pntRegDate        24/12/2016 08:37
LAT                            NaN
LON                            NaN
GMAPS_NAME                     NaN
GEOCODE_OBJECT                 NaN
PARTIAL_RESULT                 NaN
Name: 5333, dtype: object
pntAge                          45
pntGender                        F
pntAddress                 PHASE 1
pntRegDate        24/12/2016 12:37
LAT                            NaN
LON                            NaN
GMAPS_NAME                     NaN
GEOCODE_OBJECT                 NaN
PARTIAL_RESULT                 NaN
Name: 5346, dtype: object
pntAge                          10
pntGender                        M
pntAddress                    CITY
pntRegDate        24/12/2016 13:47
LAT                            NaN
LON                            NaN
GMAPS_NAME                     NaN
GEOCODE_OBJECT                 NaN
PARTIAL_RESU

pntAge                          45
pntGender                        M
pntAddress                    town
pntRegDate        20/09/2017 10:42
LAT                            NaN
LON                            NaN
GMAPS_NAME                     NaN
GEOCODE_OBJECT                 NaN
PARTIAL_RESULT                 NaN
Name: 8871, dtype: object
pntAge                          57
pntGender                        M
pntAddress                   tall 
pntRegDate        25/09/2017 14:30
LAT                            NaN
LON                            NaN
GMAPS_NAME                     NaN
GEOCODE_OBJECT                 NaN
PARTIAL_RESULT                 NaN
Name: 8952, dtype: object
pntAge                                                  33
pntGender                                                M
pntAddress        labour colomy phase 5 hayatabad peshawar
pntRegDate                                29/12/2017 12:00
LAT                                                    NaN
LON                    

In [None]:
# use folium to add heat map to map of peshawar 
hmap = folium.Map(location=[33.99, 71.52], zoom_start=12, )
exit()
heat = HeatMap( list(zip(latList, lonList)),
                   min_opacity=0.2,
                   max_val=100,
                   radius=20, blur=15, 
                   max_zoom=10, 
                 )
hmap.add_child(heat)

time=datetime.now().strftime('%H%M')
hmap.save('../data/'+'initial_folium'+time+'.html')