In [None]:
# Import some libraries
import numpy as np  # useful for many scientific computing in Python
import pandas as pd # primary data structure library

In [None]:
# Note:

# I manually added the country column to the data file generated
# by the previous notebook.
#
# Since there were only 26 unique countries, it wasn't that hard
# to do.
#
# I also had to manually add my own area code since the chat logs did
# not have a phone number listed for me, just my name.

# Load the data file
df = pd.read_csv("chat_area_codes_2.csv")
df.head()

Unnamed: 0,Code,Country
0,223,Mali
1,233,Ghana
2,31,Netherlands
3,32,Belgium
4,33,France


In [None]:
# Check the data types
df.dtypes

Code        int64
Country    object
dtype: object

In [None]:
# Load the world geo data
df_codes = pd.read_csv("world_country_and_usa_states_latitude_and_longitude_values.csv")
df.head()

Unnamed: 0,Code,Country
0,223,Mali
1,233,Ghana
2,31,Netherlands
3,32,Belgium
4,33,France


In [None]:
# Load US geo data
df_area_codes = pd.read_csv("us-area-code-geo.csv")
df_area_codes.head()

Unnamed: 0,area_code,latitude,longitude
0,201,40.83885,-74.045678
1,202,38.89511,-77.03637
2,203,41.291798,-73.122453
3,205,33.427671,-86.886473
4,206,47.564027,-122.348976


In [None]:
# Load Canada geo data
df_ca_area_codes = pd.read_csv("ca-area-code-geo.csv")
df_ca_area_codes.head()

Unnamed: 0,area_code,latitude,longitude
0,204,51.203034,-98.729935
1,226,43.233831,-81.230922
2,236,50.432725,-121.515369
3,249,46.041823,-80.003286
4,250,50.837259,-121.84152


In [None]:
# Function to return latitude/longitude based on the country and/or area code of the row
def getLatitudeLongitude(row):
  # If the country is United States, search the US dataset for geo data
  if row['Country'] == 'United States':
    retVal = df_area_codes[['latitude', 'longitude']].loc[df_area_codes['area_code'] == row['Code']]
    # Did we find anything?
    if len(retVal) == 0:
      # No, then area code probably belongs to Canada.
      # Search that dataset instead
      retVal = df_ca_area_codes[['latitude', 'longitude']].loc[df_ca_area_codes['area_code'] == row['Code']]
  else:
    # Not US/Canada, so search world dataset
    retVal = df_codes[['latitude', 'longitude']].loc[df_codes['country'] == row['Country']]

  if len(retVal) == 1:                                        # Found exactly one row?  Should always be true
    return pd.Series([retVal.iloc[0, 0], retVal.iloc[0, 1]])  # Yes, return latitude/longitude
  return None                                                 # No, return None

In [None]:
# Test the function
ret = getLatitudeLongitude(df.iloc[0])
ret

0    17.570692
1    -3.996166
dtype: float64

In [None]:
# Create a copy of the original dataset
df2 = df.copy()

# Now, for each row, call our function and save the data to new columns
df2[['latitude', 'longitude']] = df2.apply(getLatitudeLongitude, axis=1)
df2.head()

Unnamed: 0,Code,Country,latitude,longitude
0,223,Mali,17.570692,-3.996166
1,233,Ghana,7.946527,-1.023194
2,31,Netherlands,52.132633,5.291266
3,32,Belgium,50.503887,4.469936
4,33,France,46.227638,2.213749


In [None]:
# Verify that there are no null rows
df2[df2.isnull().any(axis=1)]

Unnamed: 0,Code,Country,latitude,longitude


In [None]:
# Save the data to a CSV file
df2.to_csv("chat_geo_data.csv", index=False)