In [11]:
import requests
import pandas as pd
import numpy as np
import datetime

# Set pandas options for better display
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)



# Fetch SpaceX launch data
spacex_url = "https://api.spacexdata.com/v4/launches/past"
response = requests.get(spacex_url)
response.raise_for_status()  # Raise an error for failed requests

# Flatten JSON data into a pandas DataFrame
data = pd.json_normalize(response.json())

# Display first 5 rows
print(data.head(5))



# Initialize global variables
BoosterVersion = []
PayloadMass = []
Orbit = []
LaunchSite = []
Outcome = []
Flights = []
GridFins = []
Reused = []
Legs = []
LandingPad = []
Block = []
ReusedCount = []
Serial = []
Longitude = []
Latitude = []



def getBoosterVersion(data):
    for rocket_id in data['rocket']:
        response = requests.get(f"https://api.spacexdata.com/v4/rockets/{rocket_id}").json()
        BoosterVersion.append(response['name'])

        
        
def getLaunchSite(data):
    for launchpad_id in data['launchpad']:
        response = requests.get(f"https://api.spacexdata.com/v4/launchpads/{launchpad_id}").json()
        LaunchSite.append(response['name'])
        Longitude.append(response['longitude'])
        Latitude.append(response['latitude'])
def getPayloadData(data):
    for payload_id_list in data['payloads']:
        payload_id = payload_id_list[0]  # Assuming one payload per launch
        response = requests.get(f"https://api.spacexdata.com/v4/payloads/{payload_id}").json()
        PayloadMass.append(response.get('mass_kg', np.nan))
        Orbit.append(response['orbit'])
def getCoreData(data):
    for cores_list in data['cores']:
        core = cores_list[0]  # Assuming one core per launch
        if core['core']:
            response = requests.get(f"https://api.spacexdata.com/v4/cores/{core['core']}").json()
            Block.append(response.get('block', None))
            ReusedCount.append(response.get('reuse_count', None))
            Serial.append(response.get('serial', None))
        else:
            Block.append(None)
            ReusedCount.append(None)
            Serial.append(None)
        Outcome.append(f"{core['landing_success']} {core['landing_type']}")
        Flights.append(core['flight'])
        GridFins.append(core['gridfins'])
        Reused.append(core['reused'])
        Legs.append(core['legs'])
        LandingPad.append(core['landpad'])
# Apply helper functions
getBoosterVersion(data)
getLaunchSite(data)
getPayloadData(data)
getCoreData(data)
launch_dict = {
    'FlightNumber': list(range(1, len(data) + 1)),
    'Date': data['date_utc'],
    'BoosterVersion': BoosterVersion,
    'PayloadMass': PayloadMass,
    'Orbit': Orbit,
    'LaunchSite': LaunchSite,
    'Outcome': Outcome,
    'Flights': Flights,
    'GridFins': GridFins,
    'Reused': Reused,
    'Legs': Legs,
    'LandingPad': LandingPad,
    'Block': Block,
    'ReusedCount': ReusedCount,
    'Serial': Serial,
    'Longitude': Longitude,
    'Latitude': Latitude
}

# Create DataFrame
launch_df = pd.DataFrame(launch_dict)

# Display summary
print(launch_df.head())
# Filter DataFrame for Falcon 9 launches
data_falcon9 = launch_df[launch_df['BoosterVersion'] == 'Falcon 9']

# Reset FlightNumber
data_falcon9['FlightNumber'] = list(range(1, len(data_falcon9) + 1))

# Check for missing values
print(data_falcon9.isnull().sum())
# Replace NaN values in PayloadMass with the mean
payload_mass_mean = data_falcon9['PayloadMass'].mean()
data_falcon9['PayloadMass'].fillna(payload_mass_mean, inplace=True)

# Verify changes
print(data_falcon9['PayloadMass'].isnull().sum())
# Export the cleaned DataFrame to CSV
data_falcon9.to_csv('dataset_part_1.csv', index=False)
print("Data exported to 'dataset_part_1.csv'.")
        

       static_fire_date_utc  static_fire_date_unix    net  window  \
0  2006-03-17T00:00:00.000Z           1.142554e+09  False     0.0   
1                      None                    NaN  False     0.0   
2                      None                    NaN  False     0.0   
3  2008-09-20T00:00:00.000Z           1.221869e+09  False     0.0   
4                      None                    NaN  False     0.0   

                     rocket success  \
0  5e9d0d95eda69955f709d1eb   False   
1  5e9d0d95eda69955f709d1eb   False   
2  5e9d0d95eda69955f709d1eb   False   
3  5e9d0d95eda69955f709d1eb    True   
4  5e9d0d95eda69955f709d1eb    True   

                                                                                                            failures  \
0                                                [{'time': 33, 'altitude': None, 'reason': 'merlin engine failure'}]   
1            [{'time': 301, 'altitude': 289, 'reason': 'harmonic oscillation leading to premature engine shutd

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return self._update_inplace(result)
