In [1]:
import requests         #connecting with website
import json             #encoding/decoding JSON data
import pandas as pd     #building dataframe

### Accessing data

In [2]:
#URL to SODA (Socrata Open Data API) - provides programmatic access to this dataset including the ability to filter, query, and aggregate data.
url = r"https://data.nasa.gov/resource/gh4g-9sfh.json"

In [3]:
#requests access to SODA API; Response [200] - authorized connection
response = requests.get(url)
response

<Response [200]>

In [4]:
#pull response from website as JSON structured data and assign it to a variable metrtdata
metrtdata = response.json()

#check data type
type(metrtdata)

list

### Investigating Data

Assignment: Extract the following features from that data to create a dataframe(columns should be in the order listed below):

    ID, Year, Fall, Name, Name Type, Mass, Latitude, Longitude

In [5]:
#look at first item in metrtdata
metrtdata[0]

{'name': 'Aachen',
 'id': '1',
 'nametype': 'Valid',
 'recclass': 'L5',
 'mass': '21',
 'fall': 'Fell',
 'year': '1880-01-01T00:00:00.000',
 'reclat': '50.775000',
 'reclong': '6.083330',
 'geolocation': {'latitude': '50.775', 'longitude': '6.08333'}}

In [6]:
#look at fifth item in metrtdata to see similarity
metrtdata[5]

{'name': 'Adhi Kot',
 'id': '379',
 'nametype': 'Valid',
 'recclass': 'EH4',
 'mass': '4239',
 'fall': 'Fell',
 'year': '1919-01-01T00:00:00.000',
 'reclat': '32.100000',
 'reclong': '71.800000',
 'geolocation': {'latitude': '32.1', 'longitude': '71.8'}}

Access to ID, Name, Name Type, Mass, etc. can be found in this first layer

In [7]:
#latitude and longitude are in list in next layer as a value for the key geolocation
metrtdata[0].keys()

dict_keys(['name', 'id', 'nametype', 'recclass', 'mass', 'fall', 'year', 'reclat', 'reclong', 'geolocation'])

In [8]:
metrtdata[0]['geolocation']

{'latitude': '50.775', 'longitude': '6.08333'}

In [10]:
#looking closer - reclat = latitude and reclong = longitude with more accuracy
metrtdata[0]['reclat']

'50.775000'

### Collecting Data

In [11]:
#create empty lists to fill data in
idls = []
yrls = []
falls = []
namels = []
name_typels = []
massls = []
latls = []
longls = []

In [12]:
def cleanText(text):
    import re
    
    #.compile -> what patteren to look for
    clean = re.compile('T00:00:00.000') #regular expression that looks for any pattern in a string that has T00:00:00.000
    
    #.sub (substitute) -> what to replace the pattern with
    return re.sub(clean, '', text)

In [13]:
metrtdata[0].keys()

dict_keys(['name', 'id', 'nametype', 'recclass', 'mass', 'fall', 'year', 'reclat', 'reclong', 'geolocation'])

In [14]:
#extract values for the meteorite dataset
for mite in metrtdata:
    
    try: id = mite['id']
    except: id = None
        
    try: yr = cleanText(mite['year'])
    except: yr = None
        
    try: fall = mite['fall']
    except: fall = None
        
    try: name = mite['name']
    except: name = None
        
    try: nameType = mite['nametype']
    except: nameType = None
        
    try: mass = mite['mass']
    except: mass = None
        
    try: lat = mite['reclat']
    except: lat = None
        
    try: long = mite['reclong']
    except: long = None        

    idls.append(id)
    yrls.append(yr)
    falls.append(fall)
    namels.append(name)
    name_typels.append(nameType)
    massls.append(mass)    
    latls.append(lat)
    longls.append(long)
    

In [15]:
#verify lists filled correctly (remove #'s to view)

#idls
#yrls
#falls
#namels
#name_typels
#massls
#latls
#longls

In [16]:
#verify lists are all the same length

print(len(idls))
print(len(yrls))
print(len(falls))
print(len(namels))
print(len(name_typels))
print(len(massls))
print(len(latls))
print(len(longls))

1000
1000
1000
1000
1000
1000
1000
1000


### Build Dataframe

In [17]:
#match indices of list and zip lists together
met_info = list(zip(idls, yrls, falls, namels, name_typels, massls, latls, longls))

#create field names
fields = ['ID', 'Year', 'Fall', 'Name', 'Name Type', 'Mass', 'Latitude', 'Longitude']

#build dataframe of meteorite information
df_met = pd.DataFrame(met_info, columns = fields)

df_met

Unnamed: 0,ID,Year,Fall,Name,Name Type,Mass,Latitude,Longitude
0,1,1880-01-01,Fell,Aachen,Valid,21,50.775000,6.083330
1,2,1951-01-01,Fell,Aarhus,Valid,720,56.183330,10.233330
2,6,1952-01-01,Fell,Abee,Valid,107000,54.216670,-113.000000
3,10,1976-01-01,Fell,Acapulco,Valid,1914,16.883330,-99.900000
4,370,1902-01-01,Fell,Achiras,Valid,780,-33.166670,-64.950000
...,...,...,...,...,...,...,...,...
995,24009,1934-01-01,Fell,Tirupati,Valid,230,13.633330,79.416670
996,54823,2011-01-01,Fell,Tissint,Valid,7000,29.481950,-7.611230
997,24011,1869-01-01,Fell,Tjabe,Valid,20000,-7.083330,111.533330
998,24012,1922-01-01,Fell,Tjerebon,Valid,16500,-6.666670,106.583330
