# API
Here, I'll gather data from an API

# Step 1

Import the data, load into a data frame

In [1]:
# imports
import requests
import pandas as pd

In [2]:
# requests and read JSON
r = requests.get('https://api.nasa.gov/neo/rest/v1/feed?detailed=true&api_key=DEMO_KEY')
json = r.json()

In [3]:
# here I loop through the JSON, and extract column names and orbital data to my data frame
NEO = json['near_earth_objects']
first_time = True
for date in NEO: # loop through each day
    sats = NEO[date]
    for sat in sats: # loop through each object
        row = []
        columns = ['name']
        sat_name = sat['name']
        row.append(sat_name)
        orbital_data = sat['orbital_data']
        if first_time: # create the data frame
            for column in orbital_data:
                columns.append(column)
            df = pd.DataFrame(columns=columns[:-2])
            first_time = False
        for column in orbital_data: # create row of data
            row.append(orbital_data[column])
        row = row[:-2]
        df.loc[len(df.index)] = row 

In [4]:
df.head()

Unnamed: 0,name,orbit_id,orbit_determination_date,first_observation_date,last_observation_date,data_arc_in_days,observations_used,orbit_uncertainty,minimum_orbit_intersection,jupiter_tisserand_invariant,...,semi_major_axis,inclination,ascending_node_longitude,orbital_period,perihelion_distance,perihelion_argument,aphelion_distance,perihelion_time,mean_anomaly,mean_motion
0,503871 (2000 SL),81,2023-03-29 06:36:52,2000-05-04,2023-03-28,8363,320,0,0.1246,4.162,...,1.540283568601054,38.35226293577747,207.73368465917,698.2315207878494,0.9324002107085844,70.1980304057716,2.148166926493524,2460116.1610278846,300.3665277221089,0.5155882959763749
1,(2011 EH17),9,2021-04-15 04:52:51,2011-03-06,2021-03-07,3654,63,3,0.00730455,7.682,...,0.730849011197336,3.000433805917445,333.1049025630576,228.212759525363,0.2485366217656095,35.95064504113311,1.213161400629063,2459900.0317060174,158.4862560221435,1.577475338139411
2,(2011 SH189),16,2021-04-15 05:24:33,2011-09-11,2011-12-02,82,63,4,0.106832,5.212,...,1.223574775504696,5.867995692891024,182.7226027500625,494.3609973563272,1.094679597955246,217.4615976459775,1.352469953054146,2459822.5331518953,129.5977345712941,0.728212787669651
3,(2015 VL64),6,2021-04-15 15:13:25,2015-11-07,2015-11-13,6,29,5,0.0023298,6.524,...,0.9090949676991688,0.7069711460919803,51.69155038506252,316.6010183703575,0.6467751993097297,225.1549173308097,1.171414736088608,2460096.876437495,250.4124892683682,1.137077833334303
4,(2018 SB3),9,2021-04-15 20:42:07,2018-09-30,2018-11-02,33,45,6,0.00938858,5.471,...,1.143541234479607,2.571300070806959,339.57263008,446.6590838233711,0.930823879823715,313.744343053062,1.356258589135499,2460113.1907355017,269.1730891638002,0.8059838320502175


# Step 2

I will turn the column "name" into the row index

In [5]:
# first strip out the parentheses
df['name'] = df['name'].apply(lambda name: name.replace('(', ''))
df['name'] = df['name'].apply(lambda name: name.replace(')', ''))

# set index
df.set_index('name', inplace=True)

# Step 3

The numerical values have too many decimal places...I'm going to round them

In [6]:
# function to round to 2 decimals
def round_2_places(number):
    number = float(number)
    return round(number, 2)

In [7]:
# round to 2 decimals
for column in df:
    try :
        df[column] = df[column].apply(round_2_places)
    except ValueError:
        pass

# Step 4 -  Rename columns

Add units to the columns (where known)

In [8]:
rename_mapper = {'semi_major_axis': 'semi_major_axis (AU)', 'inclination': 'inclination (deg)',
                'ascending_node_longitude': 'ascending_node_longitude (deg)', 'orbital_period':
                'orbital_period (days)', 'perihelion_distance': 'perihelion_distance (AU)',
                'aphelion_distance': 'aphelion_distance (AU)', 'mean_anomaly': 'mean_anomaly (deg)',
                'mean_motion': 'mean_motion (revs/year)'}

df.rename(columns=rename_mapper, inplace=True)

This is subject to change (maybe later).

# Step 5 - Drop some columns

Going to drop some columns that I think are redundant, or don't really know the meaning of.

In [9]:
drops = ['orbit_id', 'jupiter_tisserand_invariant']
df.drop(columns=drops, inplace=True)

In [10]:
df.head()

Unnamed: 0_level_0,orbit_determination_date,first_observation_date,last_observation_date,data_arc_in_days,observations_used,orbit_uncertainty,minimum_orbit_intersection,epoch_osculation,eccentricity,semi_major_axis (AU),inclination (deg),ascending_node_longitude (deg),orbital_period (days),perihelion_distance (AU),perihelion_argument,aphelion_distance (AU),perihelion_time,mean_anomaly (deg),mean_motion (revs/year)
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
503871 2000 SL,2023-03-29 06:36:52,2000-05-04,2023-03-28,8363.0,320.0,0.0,0.12,2460000.5,0.39,1.54,38.35,207.73,698.23,0.93,70.2,2.15,2460116.16,300.37,0.52
2011 EH17,2021-04-15 04:52:51,2011-03-06,2021-03-07,3654.0,63.0,3.0,0.01,2460000.5,0.66,0.73,3.0,333.1,228.21,0.25,35.95,1.21,2459900.03,158.49,1.58
2011 SH189,2021-04-15 05:24:33,2011-09-11,2011-12-02,82.0,63.0,4.0,0.11,2460000.5,0.11,1.22,5.87,182.72,494.36,1.09,217.46,1.35,2459822.53,129.6,0.73
2015 VL64,2021-04-15 15:13:25,2015-11-07,2015-11-13,6.0,29.0,5.0,0.0,2460000.5,0.29,0.91,0.71,51.69,316.6,0.65,225.15,1.17,2460096.88,250.41,1.14
2018 SB3,2021-04-15 20:42:07,2018-09-30,2018-11-02,33.0,45.0,6.0,0.01,2460000.5,0.19,1.14,2.57,339.57,446.66,0.93,313.74,1.36,2460113.19,269.17,0.81


Data set clean!