# **Space X  Falcon 9 First Stage Landing Prediction**


![](https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DS0701EN-SkillsNetwork/api/Images/landing_1.gif)


Several examples of an unsuccessful landing are shown here:


![](https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DS0701EN-SkillsNetwork/api/Images/crash.gif)


Most unsuccessful landings are planed. Space X; performs a controlled landing in the oceans.


## Import Libraries And Define Functions

In [None]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns

import requests
import io 

from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier

In [None]:
def plot_confusion_matrix(y, y_predict):
    from sklearn.metrics import confusion_matrix

    cm = confusion_matrix(y, y_predict)
    ax = plt.subplot()
    sns.heatmap(cm, annot=True, ax=ax); 
    ax.set_xlabel("Predicted Labels")
    ax.set_ylabel("True Labels")
    ax.set_title("Confusion Matrix"); 
    ax.xaxis.set_ticklabels(["did not land", "land"])
    ax.yaxis.set_ticklabels(['did not land', 'landed'])


## Load The DataFrame

In [None]:

URL1 = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBM-DS0321EN-SkillsNetwork/datasets/dataset_part_2.csv"
response = requests.get(URL1)
data = pd.read_csv(io.StringIO(response.text))

data.head()

In [None]:
URL2 = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBM-DS0321EN-SkillsNetwork/datasets/dataset_part_3.csv"
response = requests.get(URL2)
X = pd.read_csv(io.StringIO(response.text))

X.head()

In [None]:
Y = data["Class"].to_numpy()

In [None]:
transform = preprocessing.StandardScaler()
X = transform.fit_transform(X)

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=2)

## Apply Machine Learning Models

### **Logistic Regression Model**

In [None]:
logreg = LogisticRegression(max_iter=10000)


parameters ={'C':[0.01,0.1,1],
             'penalty':['l2'],
             'solver':['lbfgs']
             }


logreg_cv = GridSearchCV(logreg, parameters, cv=10)

logreg_cv.fit(X_train, Y_train)

In [None]:
print("tuned hpyerparameters :(best parameters) ",logreg_cv.best_params_)
print("accuracy :",logreg_cv.best_score_)

In [None]:
accuracy = logreg_cv.score(X_test, Y_test)
print(f"Accuracy on the test data: {accuracy:.2f}")


In [None]:
yhat = logreg_cv.predict(X_test)
plot_confusion_matrix(Y_test, yhat)

### **SVC Model**

In [None]:
parameters = {'kernel':('linear', 'rbf','poly','rbf', 'sigmoid'),
              'C': np.logspace(-3, 3, 5),
              'gamma':np.logspace(-3, 3, 5)}
svm = SVC()

svm_cv = GridSearchCV(svm, parameters, cv=10)

svm_cv.fit(X_train, Y_train)
print("tuned hpyerparameters :(best parameters) ",svm_cv.best_params_)
print("accuracy :",svm_cv.best_score_)

accuracy = svm_cv.score(X_test, Y_test)
print(f"Accuracy on the test data: {accuracy:.2f}")

In [None]:
yhat= svm_cv.predict(X_test)
plot_confusion_matrix(Y_test, yhat)

### **Desicion Tree Classifier Model**

In [None]:
parameters = {'criterion': ['gini', 'entropy'],
     'splitter': ['best', 'random'],
     'max_depth': [2*n for n in range(1,10)],
     'max_features': ["auto",'sqrt'],
     'min_samples_leaf': [1, 2, 4],
     'min_samples_split': [2, 5, 10]}

tree = DecisionTreeClassifier()

tree_cv = GridSearchCV(tree, parameters, cv=10)
tree_cv.fit(X_train, Y_train)

print("tuned hpyerparameters :(best parameters) ",tree_cv.best_params_)
print("accuracy :",tree_cv.best_score_)

accuracy = tree_cv.score(X_test, Y_test)
print(f"Accuracy on the test data: {accuracy:.2f}")

In [None]:
yhat = tree_cv.predict(X_test)
plot_confusion_matrix(Y_test, yhat)

### **KNN Model**

In [None]:
parameters = {'n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
              'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
              'p': [1,2]}

KNN = KNeighborsClassifier()

knn_cv = GridSearchCV(KNN, parameters, cv=10)
knn_cv.fit(X_train, Y_train)

print("tuned hpyerparameters :(best parameters) ",knn_cv.best_params_)
print("accuracy :",knn_cv.best_score_)

accuracy= knn_cv.score(X_test, Y_test)
print(f"Accuracy on the test data: {accuracy:.2f}")



In [None]:
yhat = knn_cv.predict(X_test)
plot_confusion_matrix(Y_test, yhat)

### **Best Model**


In [None]:
# Assuming you have already fitted and tested all the models
methods = ['Logistic Regression', 'Support Vector Machine', 'Decision Tree', 'K-Nearest Neighbors']
scores = [
    logreg_cv.score(X_test, Y_test),
    svm_cv.score(X_test, Y_test),  # Assuming you have a variable named svm_cv
    tree_cv.score(X_test, Y_test),
    knn_cv.score(X_test, Y_test)
]

# Zip the methods and scores and find the method with the highest score
best_score = max(scores)
best_method = methods[scores.index(best_score)]

print(f"The best method is {best_method} with a test accuracy of {best_score:.2f}")


In [None]:

sns.set_style("whitegrid")
plt.figure(figsize=(10,6))

palette = sns.color_palette("viridis", len(methods))
sns.barplot(x=methods, y=scores, palette=palette)

plt.title('Classification Models Accuracy Comparison', fontsize=18)
plt.ylabel('Accuracy', fontsize=14)
plt.xlabel('Classification Models', fontsize=14)
plt.xticks(rotation=45, fontsize=12)
plt.yticks(fontsize=12)

for i, v in enumerate(accuracy):
    plt.text(i, v + 0.01, f"{v:.2f}", ha='center', va='bottom', fontsize=12, fontweight='bold')

plt.tight_layout()
plt.show()
