In [None]:
# fetch NASA data
from src.client.api_client import fetch_neo_data
data = fetch_neo_data()

In [None]:
# store raw data in S3
from src.db.aws_client import AWSClient
aws_client = AWSClient()
aws_client.save_data_to_s3(data)

In [None]:
# transform and clean data for storage
from src.transform.flatten_neo import extract_neo
flat_data = extract_neo(data)

In [None]:
# create a separate list for close approaches
from src.transform.clean_approaches import get_new_neo_approaches

neo, approaches = get_new_neo_approaches(flat_data)

In [None]:
# store both neo data and approaches data in pandas dataframes
import pandas as pd
neo_df = pd.DataFrame(neo["neo"])
approaches_df = pd.DataFrame(approaches)

In [None]:
neo_df.head()

In [None]:
approaches_df.head()

In [None]:
# standardize NEO names to a format: "YYYY identifier"
import re

regex = r"\d{4} [\w\d]+"
neo_df["clean_neo_name"] = neo_df["neo_name"].apply(
  lambda name: (
    re.search(regex, name).group()  # type: ignore
    if re.search(regex, name) is not None 
    else name
  )
)

neo_df.insert(2, "clean_neo_name", neo_df.pop("clean_neo_name"))
neo_df.head()

In [None]:
# store both datasets in Postgres
from src.db.sql_client import SQLClient
sql_client = SQLClient()

sql_client.store_neo_data(neo_df)
sql_client.store_approach_data(approaches_df)

sql_client.close()