In [1]:
#!/usr/bin/env python3 

In [10]:
# !pip install pandas --user 
# !pip install geopy 
# !pip install folium 
# !pip install geopandas
# !pip install xlrd==1.1.0

<h3> Getting The Geolocation Of NIN Locations In Nigeria </h3> 

In [1]:
# Importing the necessary modules 
import folium
import numpy as np 
import pandas as pd 
import geopandas as gpd 
from geopy.geocoders import Nominatim 
from folium.plugins import HeatMap, MarkerCluster 

In [2]:
# Creating the geo-locator object 
geolocator = Nominatim(user_agent="nin-eda") 

In [3]:
# The path to the dataset 
dataPath = "cleanData.xls"; 

# Loading the data into memory 
df = pd.read_excel(dataPath); 

In [4]:
# Removing the unnames column 
df.pop(df.columns[0]); 

# Creating a list to hold the values for lat, long and it's co-ordinates 
latitude = list()
longitude = list()
CoOrdinates = list() 
location = list() 

# Removing the first row 
df = df.drop(df.index[[0, 0]])
 
# looping through the CoOrdinates column and clean the data
for ordinates in df["CoOrdinates"].values:
    latValue = str(ordinates.replace(' ', '').replace(')', '').replace('(', '').split(",")[0][:5])
    longValue = str(ordinates.replace(' ', '').replace(')', '').replace('(', '').split(",")[1][:5]) 
    
    # Append the clean data to the latValue and longValue list 
    latitude.append(str(latValue))
    longitude.append(str(longValue)) 
    
    # Concat the values for latValue and longValue 
    ordinatesC = latValue + "," + longValue;
    CoOrdinates.append(ordinatesC)
    
# 
for ActualLocation in df["Location"].values:
    ActualLocation = str(ActualLocation.replace(',', '')) 
    
    # Append 
    location.append(ActualLocation) 
    
# Creating a new column with the respective names 
df["latitude"] = pd.DataFrame(latitude)
df["longitude"] = pd.DataFrame(longitude) 
df["CoOrdinates"] = pd.DataFrame(CoOrdinates)
df["Location"] = pd.DataFrame(location) 

In [5]:
# Trying to get the co-ordinates again for the correct values 
location_array = df["Location"].values
latarr = []
longarr = []


# 
for locations in location_array: 
    co_ordinates = geolocator.geocode(locations)
    lat = co_ordinates.latitude; 
    long = co_ordinates.longitude; 
    
    # 
    latarr.append(lat) 
    longarr.append(long) 

In [8]:
df["latitude"] = pd.DataFrame(latarr)
df["longitude"] = pd.DataFrame(longarr) 

In [9]:
# Checking for null values, and if any drop the row with NaN values 
df.isnull().any()

Head Office    False
Location        True
CoOrdinates     True
latitude        True
longitude       True
dtype: bool

In [10]:
df = df.dropna() 

In [11]:
# Drop duplicates values found in the latitude and longitude columns 
df = df.drop_duplicates(subset=['latitude', 'longitude']) 

In [12]:
# Viewing the head of the dataframe
df.head() 

Unnamed: 0,Head Office,Location,CoOrdinates,latitude,longitude
1,Allen Avenue 1,Lagos Lagos Island 100242 Nigeria,"6.455,3.394",9.064331,7.489297
3,Maitama 1,Abuja Municipal Area Council Federal Capital T...,"9.064,7.489",10.46214,7.223942
6,Zuba,Abuja Municipal Area Council Federal Capital T...,"9.064,7.489",4.841603,6.860409
7,Bwari Main,Rivers Nigeria,"4.841,6.860",8.215125,3.56429
8,Port Harcourt,Oyo Nigeria,"8.215,3.564",6.455057,3.394179


In [25]:
# Describing the dataframe after cleaning 
df.describe() 

Unnamed: 0,latitude,longitude
count,30.0,30.0
mean,11.707235,8.703276
std,12.818334,13.854771
min,4.762979,-0.509389
25%,6.2775,5.098246
50%,7.701207,6.767675
75%,9.480115,7.509584
max,51.624583,80.676


In [14]:
# Showing a brief description of the latitude column 
df["latitude"].describe() 

count    30.000000
mean     11.707235
std      12.818334
min       4.762979
25%       6.277500
50%       7.701207
75%       9.480115
max      51.624583
Name: latitude, dtype: float64

In [15]:
# Diplaying a brief description of the longitude column 
df["longitude"].describe() 

count    30.000000
mean      8.703276
std      13.854771
min      -0.509389
25%       5.098246
50%       6.767675
75%       7.509584
max      80.676000
Name: longitude, dtype: float64

In [13]:
# Getting the columns for the loaded dataframe 
columns = df.columns; 

# Displaying the shape of the loaded dataframe 
print(f"Data shape: {df.shape}")
print(columns)

Data shape: (30, 5)
Index(['Head Office', 'Location', 'CoOrdinates', 'latitude', 'longitude'], dtype='object')


In [14]:
# Displaying the description of the head office 
df["Head Office"].describe() 

count                 30
unique                30
top       Allen Avenue 1
freq                   1
Name: Head Office, dtype: object

In [18]:
# Brief description for the location address 
df["Location"].describe() 

count                                    30
unique                                   22
top       Lagos,Lagos Island,100242,Nigeria
freq                                      4
Name: Location, dtype: object

<h3> Geospital Analysis </h3> 

In [19]:
# Getting the country location 
location = "Nigeria" 

# Extract the co-ordinates for nigeria 
co_ordinates = geolocator.geocode(location) 

# Getting the longitude and latitude for Nigeria 
lat = co_ordinates.latitude; 
long = co_ordinates.longitude; 

# Displaying the co-ordinates 
co_ordinates.point

Point(9.6000359, 7.9999721, 0.0)

In [20]:
# mainMap = folium.Map([9.0643, 7.4892974], titles='Stamen Toner', zoom_start=5) 
# folium.Marker([lat, long], 
#                         radius=6,    
#                         ).add_to(mainMap)

# mainMap

In [15]:
# Creating a map for locating NIN locations 
mainMap = folium.Map([lat, long], titles='Stamen Toner', zoom_start=5) 

# 
for index, row in df.iterrows():
    folium.Marker([row['longitude'], row['latitude']], 
                        radius=6, 
                        popup=row['Head Office'],  
                        fill_color="red", 
                        ).add_to(mainMap)

# Showing the locations 
mainMap

In [17]:
df.to_csv("ORIGINAL.csv")
df.to_excel("ORIGINAL.xls")