In [5]:
# module to install : changes lat long to Country
#run once

!pip install reverse_geocoder



In [2]:
#Dependencies and Set Up
import requests
import json
import pandas as pd
import reverse_geocoder as rg
import matplotlib.pyplot as plt

from pprint import pprint
from datetime import datetime as dt

#define the output folder to save csv data and png to
output_folder = "../output_data/"

### Note: Gathered Earthquake information by start/end dates and alert level, through api
API: https://earthquake.usgs.gov/fdsnws/event/1/query?

Start: Jan 1st 2010 to Dec 31st 2020

1. Green:
2. Yellow:
3. Orange:
4. Red:

In [3]:
#Build a parameter and url in order to request data from API
paramss = {"format": "geojson", 
           "starttime": "2010-01-01", 
           "endtime": "2020-12-31"}

url = r"https://earthquake.usgs.gov/fdsnws/event/1/query?"

#Create list for the different alert levels, and an empty data list to append data in it
alert_levels=['green','yellow','orange','red']
data_list = []

def callapi_earthquake(level):
    #Function to get the json data from earthquake.usgs.gov
    paramss['alertlevel'] = level
    print(paramss)
    try:
        response = requests.get(url, params = paramss)
        print(response.status_code)
        if response.status_code == 200:
            data = json.loads(response.text)
            #pprint(data)

    except requests.exceptions.RequestException as e:
        print(e)
    finally:
        return data

In [4]:
def getJsonData():
    for level in alert_levels:
        data = callapi_earthquake(level)
        if data:
            data_list.append(data)
    
    #save to .json file
    with open('data.json', 'w') as outfile:
        json.dump(data_list, outfile)

getJsonData()  
print('JSON file created.')

{'format': 'geojson', 'starttime': '2010-01-01', 'endtime': '2020-12-31', 'alertlevel': 'green'}
200
{'format': 'geojson', 'starttime': '2010-01-01', 'endtime': '2020-12-31', 'alertlevel': 'yellow'}
200
{'format': 'geojson', 'starttime': '2010-01-01', 'endtime': '2020-12-31', 'alertlevel': 'orange'}
200
{'format': 'geojson', 'starttime': '2010-01-01', 'endtime': '2020-12-31', 'alertlevel': 'red'}
200
JSON file created.


In [5]:
#Opening JSON file
f = open('data.json',)
 
#Returns JSON object 
data = json.load(f)

# Closing file
f.close()




In [7]:
#Create a list to append values and data required for analysis
earthquake_subdata= []

# Iterating through the json 

for feature in data[0]['features']:
    try:
        #setup a dictionary to hold data for a dataframe
        dEarthquake = {}
        dEarthquake['id']= feature['id']
        dEarthquake['Latitude']= feature['geometry']['coordinates'][0]
        dEarthquake['Longitude']= feature['geometry']['coordinates'][1]
        dEarthquake['Depth']= feature['geometry']['coordinates'][2]
        #created tuple for finding country
        dEarthquake['Lat_Lng'] = (dEarthquake['Latitude'],dEarthquake['Longitude'])
        dEarthquake['Alert']= feature['properties']['alert']
        dEarthquake['Place']= feature['properties']['place']
        #return date as a readable format
        dEarthquake['Date']= dt.fromtimestamp(feature['properties']['time']/1000)
        dEarthquake['Magnitude']= feature['properties']['mag']
        dEarthquake['Tsunami']= feature['properties']['tsunami']
        dEarthquake['Type']= feature['properties']['type']

        earthquake_subdata.append(dEarthquake)
    
    except:
        print(f"Could not gather some information from id: {feature['id']}")




#completed load
print('Completed retrieval of data')

Completed retrieval of data


In [8]:
# put the data to a DataFrame
earthquake_data= pd.DataFrame(earthquake_subdata)
earthquake_data.head()

Unnamed: 0,id,Latitude,Longitude,Depth,Lat_Lng,Alert,Place,Date,Magnitude,Tsunami,Type
0,us6000d454,-21.1005,-0.7603,10.0,"(-21.1005, -0.7603)",green,central Mid-Atlantic Ridge,2020-12-29 18:34:57.647,5.7,0,earthquake
1,us6000d3xn,-126.9951,43.389,10.0,"(-126.9951, 43.389)",green,"212 km W of Bandon, Oregon",2020-12-29 02:10:29.079,5.7,1,earthquake
2,us6000d3x8,154.6521,-6.3985,23.63,"(154.6521, -6.3985)",green,"92 km W of Panguna, Papua New Guinea",2020-12-29 01:23:29.769,5.4,0,earthquake
3,us6000d3ln,124.5061,-0.1917,51.77,"(124.5061, -0.1917)",green,"170 km S of Tomohon, Indonesia",2020-12-28 02:31:11.352,5.1,0,earthquake
4,us6000d3kx,56.78,14.2272,10.0,"(56.78, 14.2272)",green,Owen Fracture Zone region,2020-12-28 00:46:46.797,5.4,0,earthquake


In [9]:
# Check the length of data
len(earthquake_data)

5854

In [10]:
# Check if the data is clean
earthquake_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5854 entries, 0 to 5853
Data columns (total 11 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   id         5854 non-null   object        
 1   Latitude   5854 non-null   float64       
 2   Longitude  5854 non-null   float64       
 3   Depth      5854 non-null   float64       
 4   Lat_Lng    5854 non-null   object        
 5   Alert      5854 non-null   object        
 6   Place      5853 non-null   object        
 7   Date       5854 non-null   datetime64[ns]
 8   Magnitude  5852 non-null   float64       
 9   Tsunami    5854 non-null   int64         
 10  Type       5854 non-null   object        
dtypes: datetime64[ns](1), float64(4), int64(1), object(5)
memory usage: 503.2+ KB


In [11]:
# Data does not include Country information, get this information from reverse_geocoder
def reverseGeocode(coordinates):
    result = rg.search(coordinates)
    # result is a list containing ordered dictionary.
    return(result)

In [12]:
# TEST: Look at data recieved from reverse_geocoder
country_test = reverseGeocode(earthquake_data['Lat_Lng'][0])
country_test

Loading formatted geocoded file...


[OrderedDict([('lat', '-15.93872'),
              ('lon', '-5.71675'),
              ('name', 'Jamestown'),
              ('admin1', 'Saint Helena'),
              ('admin2', ''),
              ('cc', 'SH')])]

In [13]:
# TEST: getting information into a dictionary
dict_country_info = {}
dict_country_info['City'] = country_test[0]['name']
dict_country_info['Country'] = country_test[0]['admin1']
dict_country_info['Country_Code'] = country_test[0]['cc']

dict_country_info

{'City': 'Jamestown', 'Country': 'Saint Helena', 'Country_Code': 'SH'}

In [16]:
# Gather City,Country, Country_Code information for all lat_lng
country_info_list = []
for index,row in earthquake_data.iterrows():
    dict_country_info = {}
    #match on row id 
    dict_country_info['id'] = row['id']   
    #get country info
    country_info = reverseGeocode(row['Lat_Lng']) 
    if country_info:
        try:
            #add country info to dictionary
            dict_country_info['city'] = country_info[0]['name']
            dict_country_info['country'] = country_info[0]['admin1']
            dict_country_info['country_code'] = country_info[0]['cc']
            #append dictionary to list
            country_info_list.append(dict_country_info)
        except:
            print(f"Error country information from id: {row['id']} ")

# Note: Can take 10 minutes
print('Done getting country data...')

Done getting country data...


In [17]:
# Put the country information into a DataFrame
country_info_df = pd.DataFrame(country_info_list)
country_info_df.head()

Unnamed: 0,id,city,country,country_code
0,us6000d454,Jamestown,Saint Helena,SH
1,us6000d3xn,Port-aux-Francais,Kerguelen,TF
2,us6000d3x8,Longyearbyen,Svalbard,SJ
3,us6000d3ln,Longyearbyen,Svalbard,SJ
4,us6000d3kx,Rydaholm,Joenkoeping,SE


In [18]:
#check length and check info for shape and nulls
country_info_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5854 entries, 0 to 5853
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   id            5854 non-null   object
 1   city          5854 non-null   object
 2   country       5854 non-null   object
 3   country_code  5854 non-null   object
dtypes: object(4)
memory usage: 183.1+ KB


In [19]:
#remove anything nulls:  All good 

In [20]:
# merge the two dataframes together
earthquake_country = earthquake_data.merge(country_info_df, how='left', on='id')
earthquake_country.head()

Unnamed: 0,id,Latitude,Longitude,Depth,Lat_Lng,Alert,Place,Date,Magnitude,Tsunami,Type,city,country,country_code
0,us6000d454,-21.1005,-0.7603,10.0,"(-21.1005, -0.7603)",green,central Mid-Atlantic Ridge,2020-12-29 18:34:57.647,5.7,0,earthquake,Jamestown,Saint Helena,SH
1,us6000d3xn,-126.9951,43.389,10.0,"(-126.9951, 43.389)",green,"212 km W of Bandon, Oregon",2020-12-29 02:10:29.079,5.7,1,earthquake,Port-aux-Francais,Kerguelen,TF
2,us6000d3x8,154.6521,-6.3985,23.63,"(154.6521, -6.3985)",green,"92 km W of Panguna, Papua New Guinea",2020-12-29 01:23:29.769,5.4,0,earthquake,Longyearbyen,Svalbard,SJ
3,us6000d3ln,124.5061,-0.1917,51.77,"(124.5061, -0.1917)",green,"170 km S of Tomohon, Indonesia",2020-12-28 02:31:11.352,5.1,0,earthquake,Longyearbyen,Svalbard,SJ
4,us6000d3kx,56.78,14.2272,10.0,"(56.78, 14.2272)",green,Owen Fracture Zone region,2020-12-28 00:46:46.797,5.4,0,earthquake,Rydaholm,Joenkoeping,SE


In [21]:
# Check for any nulls 
earthquake_country.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 5854 entries, 0 to 5853
Data columns (total 14 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   id            5854 non-null   object        
 1   Latitude      5854 non-null   float64       
 2   Longitude     5854 non-null   float64       
 3   Depth         5854 non-null   float64       
 4   Lat_Lng       5854 non-null   object        
 5   Alert         5854 non-null   object        
 6   Place         5853 non-null   object        
 7   Date          5854 non-null   datetime64[ns]
 8   Magnitude     5852 non-null   float64       
 9   Tsunami       5854 non-null   int64         
 10  Type          5854 non-null   object        
 11  city          5854 non-null   object        
 12  country       5854 non-null   object        
 13  country_code  5854 non-null   object        
dtypes: datetime64[ns](1), float64(4), int64(1), object(8)
memory usage: 686.0+ KB


In [22]:
# save cleaned data to csv file  
earthquake_country.to_csv(output_folder + "EarthquakeData.csv", encoding="utf-8", index = True)

In [25]:
# Load the clean information into a DataFrame for remainder of project
earthquake_data = pd.read_csv(output_folder + "EarthquakeData.csv", usecols=range(1,15))
earthquake_data.head()

Unnamed: 0,id,Latitude,Longitude,Depth,Lat_Lng,Alert,Place,Date,Magnitude,Tsunami,Type,city,country,country_code
0,us6000d454,-21.1005,-0.7603,10.0,"(-21.1005, -0.7603)",green,central Mid-Atlantic Ridge,2020-12-29 18:34:57.647,5.7,0,earthquake,Jamestown,Saint Helena,SH
1,us6000d3xn,-126.9951,43.389,10.0,"(-126.9951, 43.389)",green,"212 km W of Bandon, Oregon",2020-12-29 02:10:29.079,5.7,1,earthquake,Port-aux-Francais,Kerguelen,TF
2,us6000d3x8,154.6521,-6.3985,23.63,"(154.6521, -6.3985)",green,"92 km W of Panguna, Papua New Guinea",2020-12-29 01:23:29.769,5.4,0,earthquake,Longyearbyen,Svalbard,SJ
3,us6000d3ln,124.5061,-0.1917,51.77,"(124.5061, -0.1917)",green,"170 km S of Tomohon, Indonesia",2020-12-28 02:31:11.352,5.1,0,earthquake,Longyearbyen,Svalbard,SJ
4,us6000d3kx,56.78,14.2272,10.0,"(56.78, 14.2272)",green,Owen Fracture Zone region,2020-12-28 00:46:46.797,5.4,0,earthquake,Rydaholm,Joenkoeping,SE
