# **SpaceX  Falcon 9 First Stage Landing Prediction**


 ## Assignment: Exploring and Preparing Data


## Import Libraries

In [None]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
import seaborn as sns 
import plotly.express as px


## Load DataFrame 

In [None]:
df=pd.read_csv("https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBM-DS0321EN-SkillsNetwork/datasets/dataset_part_2.csv")
df.head()

## Data Visualization With Matplotlib And Seaborn Libraries

In [None]:
sns.catplot(y="PayloadMass", x="FlightNumber", hue="Class", data=df, aspect=5)
plt.xlabel("Flight Number", fontsize=20)
plt.ylabel("Payload Mass (kg)", fontsize=20)
plt.show()

In [None]:
sns.catplot(y="LaunchSite", x="FlightNumber", hue="Class", data=df, aspect=5)
plt.xlabel("Flight Number", fontsize=20)
plt.ylabel("Launch Site", fontsize=20)
plt.show()

In [None]:
sns.catplot(y="PayloadMass", x="LaunchSite", hue="Class", data=df, aspect=2)
plt.xlabel("Launch Site", fontsize=20)
plt.ylabel("Payload Mass", fontsize=20)
plt.show()

In [None]:
success_rates = df.groupby('Orbit')['Class'].mean().sort_values(ascending=False)

plt.figure(figsize=(12, 7))
plot = sns.barplot(x=success_rates.index, y=success_rates.values, palette='viridis')
plt.ylabel('Success Rate', fontsize=14)
plt.xlabel('Orbit Type', fontsize=14)
plt.title('Success Rate of Each Orbit Type', fontsize=16)

for p in plot.patches:
    plot.annotate(f"{p.get_height() * 100:.2f}%",
                  (p.get_x() + p.get_width() / 2., p.get_height()),
                  ha='center', va='center', fontsize=11, color='black', xytext=(0,10),
                  textcoords='offset points')
plt.tight_layout()
plt.savefig("bar_chart.png")
plt.show()



In [None]:
plt.figure(figsize=(12,8))
sns.scatterplot(data= df, x="FlightNumber", y="Orbit", hue="Class", palette="viridis")

plt.xlabel("Flight Number")
plt.ylabel("Orbit")
plt.title('Scatter Plot of Flight number vs Orbit')
plt.legend(title="Launch Success")
plt.grid(True, which="both", linestyle="--", linewidth=0.5)
plt.show()

In [None]:
plt.figure(figsize=(12,8))
sns.scatterplot(data= df, x="PayloadMass", y="Orbit", hue="Class", palette="viridis")

plt.xlabel("Payload Mass")
plt.ylabel("Orbit")
plt.title('Scatter Plot of Payload Mass vs Orbit')
plt.legend(title="Launch Success")
plt.grid(True, which="both", linestyle="--", linewidth=0.5)
plt.show()

In [None]:
years = [ ]
def Extract_year(dates): 
    for i in dates:
        years.append(i.split("-")[0])
    return years

if 'Year' in df.columns:
    df.drop('Year', axis=1, inplace=True)

df["Year"] = Extract_year(df["Date"])


In [None]:
success_rate_by_year = df.groupby('Year')['Class'].mean()

plt.figure(figsize=(12, 6))
sns.lineplot(data=success_rate_by_year)

plt.title("Launch Success Rate by Year")
plt.xlabel("Year")
plt.ylabel("Average Success Rate")
plt.grid(True, which="both", linestyle="--", linewidth=0.5)
plt.tight_layout()

plt.show()


## Feature Engineering

In [None]:
features = df[['FlightNumber', 'PayloadMass', 'Orbit', 'LaunchSite', 'Flights', 'GridFins', 'Reused', 'Legs', 'LandingPad', 'Block', 'ReusedCount', 'Serial']]
features.head()

In [None]:
features_one_hot = pd.get_dummies(features, columns=['Orbit', 'LaunchSite', 'LandingPad', 'Serial'])
features_one_hot.head()


In [None]:
features_one_hot = features_one_hot.astype("float64")

In [None]:
features_one_hot.to_csv("dataset_part3.csv", index=False)

In [None]:
features_one_hot.info()