In [8]:
import os
import json
import requests
from dotenv import load_dotenv
import pandas as pd

In [9]:
load_dotenv()

True

In [10]:
BASE_URL = 'https://api.nasa.gov'
api_key = os.getenv("nasa_api_key")
initial_url = f'{BASE_URL}/neo/rest/v1/neo/browse?api_key={api_key}'

In [22]:
response = requests.get(initial_url)
if response.status_code == 200:
    print("200, OK")

200, OK


In [12]:
def get_neos_data():
    limit = 400
    url = initial_url
    neos_data = []

    for _ in range(limit):
        response = requests.get(url)

        if response.status_code != 200:
            print(f"Error: {response.status_code}")
            break

        try:
            data = response.json()
            neos_data.extend(data['near_earth_objects'])
            url = data['links'].get('next')
            if not url:
                break     
        except json.JSONDecodeError:
            print("Error decoding JSON response")
            break

    return neos_data

In [13]:
def clean_neos_data(neos_raw):
    neos_cleaned = []

    for neo in neos_raw:
        neo_clean = {
            'id': neo.get('id'),
            'name': neo.get('name'),
            'is_potentially_hazardous_asteroid': neo.get('is_potentially_hazardous_asteroid'),
            'estimated_diameter_km_max': neo.get('estimated_diameter', {}).get('kilometers', {}).get('estimated_diameter_max'),
            'approach_data': []
        }

        approach_data = neo.get('close_approach_data', [])

        for approach in approach_data:
            approach_clean = {
                'close_approach_date': approach.get('close_approach_date'),
                'relative_velocity_kph': float(approach.get('relative_velocity', {}).get('kilometers_per_hour', 0.0)),
                'miss_distance_km': float(approach.get('miss_distance', {}).get('kilometers', 0.0)),
                'orbiting_body': approach.get('orbiting_body')
            }
            neo_clean['approach_data'].append(approach_clean)

        neos_cleaned.append(neo_clean)

    return neos_cleaned

In [14]:
neos_raw = get_neos_data()
neos_cleaned = clean_neos_data(neos_raw)

In [15]:
asteroids_df = pd.DataFrame(neos_cleaned)
asteroids_df.head()

Unnamed: 0,id,name,is_potentially_hazardous_asteroid,estimated_diameter_km_max,approach_data
0,2000433,433 Eros (A898 PA),False,49.208483,"[{'close_approach_date': '1900-12-27', 'relati..."
1,2000719,719 Albert (A911 TB),False,4.529393,"[{'close_approach_date': '1909-08-21', 'relati..."
2,2000887,887 Alinda (A918 AA),False,10.328565,"[{'close_approach_date': '1974-01-04', 'relati..."
3,2001036,1036 Ganymed (A924 UB),False,86.704169,"[{'close_approach_date': '1910-02-25', 'relati..."
4,2001221,1221 Amor (1932 EA1),False,1.995446,"[{'close_approach_date': '1908-03-14', 'relati..."


In [16]:
asteroids_df.to_csv('./datasets/asteroid.csv')

In [19]:
df_exploded = asteroids_df.explode('approach_data')
df_exploded = df_exploded[df_exploded['approach_data'].apply(lambda x: isinstance(x, dict))]
approach_df = pd.DataFrame(df_exploded['approach_data'].tolist())
df_exploded = df_exploded.reset_index(drop=True)
approach_df["asteroid_id"] = df_exploded["id"].reset_index(drop=True)
approach_df["asteroid_name"] = df_exploded["name"].reset_index(drop=True)
approach_df.head()

Unnamed: 0,close_approach_date,relative_velocity_kph,miss_distance_km,orbiting_body,asteroid_id,asteroid_name
0,1900-12-27,20083.029075,47112730.0,Earth,2000433,433 Eros (A898 PA)
1,1907-11-05,15820.167199,70533230.0,Earth,2000433,433 Eros (A898 PA)
2,1917-04-20,17340.422466,74687810.0,Earth,2000433,433 Eros (A898 PA)
3,1924-03-05,16545.797588,53823290.0,Earth,2000433,433 Eros (A898 PA)
4,1931-01-30,21314.946723,26040970.0,Earth,2000433,433 Eros (A898 PA)


In [20]:
approach_df.to_csv('./datasets/approach.csv')