In [77]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, f1_score, classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier

In [78]:
df = pd.read_csv('..\data\exoplanents_cleaned_new.csv')

  df = pd.read_csv('..\data\exoplanents_cleaned_new.csv')


In [79]:
df.sample(2)

Unnamed: 0,koi_disposition,koi_score,koi_fpflag_nt,koi_fpflag_ss,koi_fpflag_co,koi_fpflag_ec,koi_period,koi_time0bk,koi_impact,koi_duration,koi_depth,koi_prad,koi_teq,koi_insol,koi_model_snr,koi_steff,koi_slogg,koi_srad
7755,FALSE POSITIVE,0.083,1,0,0,0,264.63654,190.9169,1.247,42.77,431.8,42.55,376,4.75,28.2,6564,4.198,1.461
4631,FALSE POSITIVE,0.0,0,1,0,0,0.838597,141.537817,1.274,1.68668,2482.1,44.67,2278,6353.25,303.7,6366,4.352,1.236


In [80]:
feature_cols = [
    'koi_score', 'koi_fpflag_nt', 'koi_fpflag_ss', 'koi_fpflag_co', 'koi_fpflag_ec',
    'koi_period', 'koi_time0bk', 'koi_impact', 'koi_duration', 'koi_depth',
    'koi_prad', 'koi_teq', 'koi_insol', 'koi_model_snr', 'koi_steff',
    'koi_slogg', 'koi_srad'
]

In [81]:
X = df[feature_cols]

In [82]:
y = df['koi_disposition'].apply(lambda x: 1 if x == 'CONFIRMED' else 0)

In [83]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, 
)

In [84]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [85]:
models = {
    "KNN": KNeighborsClassifier(),
    "Naive Bayes": GaussianNB(),
    "Decision Tree": DecisionTreeClassifier(),
    "SVM": SVC(probability=True),
    "RF":RandomForestClassifier(n_estimators=100, random_state=42)
}


In [86]:
results = []

In [87]:
for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    results.append({
        'Model': name,
        'Accuracy': round(acc, 4),
        'F1 Score': round(f1, 4)
    })


In [88]:
results

[{'Model': 'KNN', 'Accuracy': 0.8897, 'F1 Score': 0.8295},
 {'Model': 'Naive Bayes', 'Accuracy': 0.9122, 'F1 Score': 0.8713},
 {'Model': 'Decision Tree', 'Accuracy': 0.9356, 'F1 Score': 0.8928},
 {'Model': 'SVM', 'Accuracy': 0.9005, 'F1 Score': 0.8568},
 {'Model': 'RF', 'Accuracy': 0.9532, 'F1 Score': 0.9231}]

In [89]:
import joblib
joblib.dump(models['RF'],'../models/Random_forest_exo.pkl')
joblib.dump(scaler,'../models/scaler.pkl')
joblib.dump(X.columns.tolist(),'../models/columns.pkl')

['../models/columns.pkl']