# IBM Data Science Capstone Project
## Lab 7: Machine Learning Prediction lab
### Project Title: SpaceX Falcon 9 First Stage Landing Prediction
Objective: Build and evaluate multiple ML models to predict if the Falcon 9 first stage will land successfully.

In [None]:
# ------------------------------------------
# INSTALL LIBRARIES (LOCAL JUPYTER ONLY)
# ------------------------------------------
# If you are running locally, uncomment the below lines:
# !pip install numpy pandas seaborn matplotlib scikit-learn

# If running on IBM Skills Network Labs, you may need:
# import piplite
# await piplite.install(['numpy', 'pandas', 'seaborn', 'matplotlib', 'scikit-learn'])


## IMPORT LIBRARIES

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn import preprocessing
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier

## FUNCTION TO PLOT CONFUSION MATRIX

In [None]:
def plot_confusion_matrix(y_true, y_pred):
    """Plots a confusion matrix using seaborn."""
    from sklearn.metrics import confusion_matrix

    cm = confusion_matrix(y_true, y_pred)
    ax = plt.subplot()
    sns.heatmap(cm, annot=True, fmt='d', ax=ax, cmap="Blues")

    ax.set_xlabel('Predicted labels')
    ax.set_ylabel('True labels')
    ax.set_title('Confusion Matrix')
    ax.xaxis.set_ticklabels(['Did not land', 'Landed'])
    ax.yaxis.set_ticklabels(['Did not land', 'Landed'])
    plt.show()

## LOAD DATA

In [None]:
from js import fetch
import io

# Load dataset_part_2.csv
URL1 = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBM-DS0321EN-SkillsNetwork/datasets/dataset_part_2.csv"
resp1 = await fetch(URL1)
text1 = io.BytesIO((await resp1.arrayBuffer()).to_py())
data = pd.read_csv(text1)

# Load dataset_part_3.csv
URL2 = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBM-DS0321EN-SkillsNetwork/datasets/dataset_part_3.csv"
resp2 = await fetch(URL2)
text2 = io.BytesIO((await resp2.arrayBuffer()).to_py())
X = pd.read_csv(text2)

data.head(), X.head()


## TASK 1 – Create target variable Y

In [None]:
Y = data['Class'].to_numpy()
Y[:10]


## TASK 2 – Standardize X

In [None]:
transform = preprocessing.StandardScaler()
X = transform.fit_transform(X)

## TASK 3 – Train/test split

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(
    X, Y, test_size=0.2, random_state=2
)

Y_test.shape



## TASK 4 – Logistic Regression

In [None]:
parameters = {"C":[0.01,0.1,1], "penalty":["l2"], "solver":["lbfgs"]}
lr = LogisticRegression()

logreg_cv = GridSearchCV(lr, parameters, cv=10)
logreg_cv.fit(X_train, Y_train)

print("Tuned hyperparameters (best parameters):", logreg_cv.best_params_)
print("Accuracy:", logreg_cv.best_score_)



## TASK 5 – Logistic Regression test accuracy

In [None]:
print("Test Accuracy:", logreg_cv.score(X_test, Y_test))
yhat = logreg_cv.predict(X_test)
plot_confusion_matrix(Y_test, yhat)


## TASK 6 – Support Vector Machine

In [None]:
parameters = {
    'kernel': ('linear', 'rbf', 'poly', 'sigmoid'),
    'C': np.logspace(-3, 3, 5),
    'gamma': np.logspace(-3, 3, 5)
}
svm = SVC()

svm_cv = GridSearchCV(svm, parameters, cv=10)
svm_cv.fit(X_train, Y_train)

print("Tuned hyperparameters (best parameters):", svm_cv.best_params_)
print("Accuracy:", svm_cv.best_score_)


## TASK 7 – SVM test accuracy

In [None]:
print("Test Accuracy:", svm_cv.score(X_test, Y_test))
yhat = svm_cv.predict(X_test)
plot_confusion_matrix(Y_test, yhat)


## TASK 8 – Decision Tree

In [None]:
parameters = {
    'criterion': ['gini', 'entropy'],
    'splitter': ['best', 'random'],
    'max_depth': [2*n for n in range(1,10)],
    'max_features': ['auto', 'sqrt'],
    'min_samples_leaf': [1, 2, 4],
    'min_samples_split': [2, 5, 10]
}

tree = DecisionTreeClassifier()
tree_cv = GridSearchCV(tree, parameters, cv=10)
tree_cv.fit(X_train, Y_train)

print("Tuned hyperparameters (best parameters):", tree_cv.best_params_)
print("Accuracy:", tree_cv.best_score_)



## TASK 9 – Decision Tree test accuracy


In [None]:
print("Test Accuracy:", tree_cv.score(X_test, Y_test))
yhat = tree_cv.predict(X_test)
plot_confusion_matrix(Y_test, yhat)


## TASK 10 – K Nearest Neighbors

In [None]:
parameters = {
    'n_neighbors': [1,2,3,4,5,6,7,8,9,10],
    'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
    'p': [1, 2]
}

KNN = KNeighborsClassifier()
knn_cv = GridSearchCV(KNN, parameters, cv=10)
knn_cv.fit(X_train, Y_train)

print("Tuned hyperparameters (best parameters):", knn_cv.best_params_)
print("Accuracy:", knn_cv.best_score_)


## ASK 11 – KNN test accuracy

In [None]:
print("Test Accuracy:", knn_cv.score(X_test, Y_test))
yhat = knn_cv.predict(X_test)
plot_confusion_matrix(Y_test, yhat)


## TASK 12 – Best Model

In [None]:
models = {
    "Logistic Regression": logreg_cv.score(X_test, Y_test),
    "SVM": svm_cv.score(X_test, Y_test),
    "Decision Tree": tree_cv.score(X_test, Y_test),
    "KNN": knn_cv.score(X_test, Y_test)
}

best_model = max(models, key=models.get)
print("Best Model:", best_model, "with accuracy:", models[best_model])



## Complete The Lab 7 : Machine Learning Prediction lab
Md. Anwar Hossain