In [2]:
import requests
import pandas as pd
import pymongo
from config import api_key

# API Calls & DataFrame Creation

In [21]:
#set API URL to retrieve upcoming launches
url = "https://api.spacexdata.com/v4/launches/upcoming"
#retreive upcoming launches in JSON format
results = requests.get(url).json()
#set up dictionary to hold data
space=[]
#iterate through JSON results and add them to dictionary
for result in results:
    space.append({"flight_id": result["id"],
                "launch_date": result["date_local"],
                "launch_pad": result["launchpad"]})
#convert dictionary to datafrome
space_df=pd.DataFrame(space)
space_df.head(5)

Unnamed: 0,flight_id,launch_date,launch_pad
0,60428aafc041c16716f73cd7,2021-03-24T04:58:00-04:00,5e9e4501f509094ba4566f84
1,60428ac4c041c16716f73cd8,2021-02-28T19:00:00-05:00,5e9e4501f509094ba4566f84
2,5fe3af58b3467846b324215f,2021-04-22T06:11:00-04:00,5e9e4502f509094188566f88
3,5fe3af43b3467846b324215e,2021-04-30T17:00:00-07:00,5e9e4502f509092b78566f87
4,600f9b6d8f798e2a4d5f979f,2021-05-31T20:00:00-04:00,5e9e4501f509094ba4566f84


In [20]:
#set API URL to retrieve launch pad data
url_b="https://api.spacexdata.com/v4/launchpads"
#retreived launch pad data in JSON format
results_b = requests.get(url_b).json()
#set up dictionary to hold data
pads = []
#iterate through JSON results and add them to dictionary
for result in results_b:
    pads.append({"Name":result["full_name"],
                 "launch_pad": result["id"],
                 "lat": result["latitude"],
                 "long": result["longitude"]})
#convert dictionary to datafrome
pads_df=pd.DataFrame(pads)
pads_df.head(5)

Unnamed: 0,Name,launch_pad,lat,long
0,Vandenberg Air Force Base Space Launch Complex 3W,5e9e4501f5090910d4566f83,34.64409,-120.593144
1,Cape Canaveral Space Force Station Space Launc...,5e9e4501f509094ba4566f84,28.561857,-80.577366
2,SpaceX South Texas Launch Site,5e9e4502f5090927f8566f85,25.997264,-97.156085
3,Kwajalein Atoll Omelek Island,5e9e4502f5090995de566f86,9.047721,167.743129
4,Vandenberg Air Force Base Space Launch Complex 4E,5e9e4502f509092b78566f87,34.632093,-120.610829


In [19]:
#set up dictionary to hold data
weather = []
#set up params variable for weather API URL to retrieve 16 day weather data
params = {
    "key": api_key
}
#set base URL for weather API
weath_url="http://api.weatherbit.io/v2.0/forecast/daily"

#iterate through rows to retrieve weather data based on latitude and longitude
for index, row in launch_location.iterrows():
    #add latitude and longitude columns to params variable
    params["lat"]= row["lat"]
    params["lon"]= row["long"]
    #construct weather API address
    address= requests.get(weath_url, params=params)
    #retreive weather data in JSON format
    address_j=address.json()
    #iterate through JSON data and append to dictionary
    for result in address_j["data"]:
        post= {"lat": address_j["lat"],
               "long":address_j["lon"],
               "Valid_date":result["valid_date"],
               "Ave_Temp": result["temp"],
               "Max_Temp":result["max_temp"],
               "Min_Temp": result["min_temp"],
               "H_Level_Cloud":result["clouds_hi"],
               "Wind_Gust_Spd":result["wind_gust_spd"],
               "weather_condition":result["weather"]["description"]}
        weather.append(post)
#convert dictionary to datafrome
weather_df=pd.DataFrame(weather)
weather_df.head(5)

Unnamed: 0,lat,long,Valid_date,Ave_Temp,Max_Temp,Min_Temp,H_Level_Cloud,Wind_Gust_Spd,weather_condition
0,28.56,-80.58,2021-03-20,17.5,19.2,16.2,0,18.796875,Light rain
1,28.56,-80.58,2021-03-21,15.7,20.4,12.4,0,13.898438,Scattered clouds
2,28.56,-80.58,2021-03-22,14.1,16.8,12.2,0,5.796875,Scattered clouds
3,28.56,-80.58,2021-03-23,21.7,25.8,18.2,32,9.734375,Scattered clouds
4,28.56,-80.58,2021-03-24,23.0,27.7,19.3,96,12.609375,Overcast clouds


# Merges

In [18]:
#merge space_df with pads_df to associate flight IDs with launch pads and Lat/Long co-ordinates
launch_location=space_df.merge(pads_df,on="launch_pad")
#Split launch date/time into seperate columns
launch_location[['launch_date', 'launch_time']] = launch_location['launch_date'].str.split('T', expand=True)
#round lat/long columns to two decimal places to allow for later merging
launch_location=launch_location.round({'lat':2, 'long':2})
launch_location.head(5)

Unnamed: 0,flight_id,launch_date,launch_pad,Name,lat,long,launch_time
0,60428aafc041c16716f73cd7,2021-03-24,5e9e4501f509094ba4566f84,Cape Canaveral Space Force Station Space Launc...,28.56,-80.58,04:58:00-04:00
1,60428ac4c041c16716f73cd8,2021-02-28,5e9e4501f509094ba4566f84,Cape Canaveral Space Force Station Space Launc...,28.56,-80.58,19:00:00-05:00
2,600f9b6d8f798e2a4d5f979f,2021-05-31,5e9e4501f509094ba4566f84,Cape Canaveral Space Force Station Space Launc...,28.56,-80.58,20:00:00-04:00
3,5fe3afc1b3467846b3242164,2021-05-31,5e9e4501f509094ba4566f84,Cape Canaveral Space Force Station Space Launc...,28.56,-80.58,20:00:00-04:00
4,5fe3af6db3467846b3242160,2021-03-31,5e9e4501f509094ba4566f84,Cape Canaveral Space Force Station Space Launc...,28.56,-80.58,20:00:00-04:00


In [17]:
#merge launch location and weather dataframes 
launch_location_weather=weather_df.merge(launch_location, how="left", on=("lat", "long"))
#drop duplicate rows leaving only unique values
launch_location_weather.drop_duplicates(inplace=True)
#reset index for clarity
launch_location_weather = launch_location_weather.reset_index(drop=True)
launch_location_weather.head(5)

Unnamed: 0,lat,long,Valid_date,Ave_Temp,Max_Temp,Min_Temp,H_Level_Cloud,Wind_Gust_Spd,weather_condition,flight_id,launch_date,launch_pad,Name,launch_time
0,28.56,-80.58,2021-03-20,17.5,19.2,16.2,0,18.796875,Light rain,60428aafc041c16716f73cd7,2021-03-24,5e9e4501f509094ba4566f84,Cape Canaveral Space Force Station Space Launc...,04:58:00-04:00
1,28.56,-80.58,2021-03-20,17.5,19.2,16.2,0,18.796875,Light rain,60428ac4c041c16716f73cd8,2021-02-28,5e9e4501f509094ba4566f84,Cape Canaveral Space Force Station Space Launc...,19:00:00-05:00
2,28.56,-80.58,2021-03-20,17.5,19.2,16.2,0,18.796875,Light rain,600f9b6d8f798e2a4d5f979f,2021-05-31,5e9e4501f509094ba4566f84,Cape Canaveral Space Force Station Space Launc...,20:00:00-04:00
3,28.56,-80.58,2021-03-20,17.5,19.2,16.2,0,18.796875,Light rain,5fe3afc1b3467846b3242164,2021-05-31,5e9e4501f509094ba4566f84,Cape Canaveral Space Force Station Space Launc...,20:00:00-04:00
4,28.56,-80.58,2021-03-20,17.5,19.2,16.2,0,18.796875,Light rain,5fe3af6db3467846b3242160,2021-03-31,5e9e4501f509094ba4566f84,Cape Canaveral Space Force Station Space Launc...,20:00:00-04:00


# Export to MongoDB

In [13]:
# The default port used by MongoDB is 27017
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

# Define the 'spacex_launchDB' database in Mongo
db = client.spacex_launchDB

# Define launches Collection
launches = db.launches

In [14]:
#iterate through final dataframe to import rows into MongoDB as seperate documents
for index, row in launch_location_weather.iterrows():
    launch_data={    "flight_id" : row['flight_id'],
        "launch_date" : row['launch_date'],
        "launch_date" : row['launch_date'],
        "launchpad_id" : row['launch_pad'],
        "launchpad_name" : row['Name'],
        "launch_time" : row['launch_time'],
        "weather_date" : row['Valid_date'],
        "lat" : row['lat'],
        "long" : row['long'],
        "weather_condition" : row['weather_condition'],
        "avg_temp_C" : row['Ave_Temp'],
        "max_temp_C" : row['Max_Temp'],
        "min_temp_C" : row['Min_Temp'],
        "high_level_clouds" : row['H_Level_Cloud'],
        "wind_gust_spd_MPS" : row['Wind_Gust_Spd'],
         }
   #import into MongoDB         
   launches.insert_one(launch_data)
    