# Model Training Notebook for Startup Bankruptcy Prediction

In [None]:
# Model Training for Startup Bankruptcy Prediction

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report, roc_auc_score
from imblearn.over_sampling import SMOTE

# Load data
df = pd.read_csv("data.csv")
X = df.drop("Bankrupt?", axis=1)
y = df["Bankrupt?"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# SMOTE to handle imbalance
sm = SMOTE(random_state=42)
X_train, y_train = sm.fit_resample(X_train, y_train)

# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [3]:
# Train and evaluate models

models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Random Forest": RandomForestClassifier(n_estimators=100),
    "SVC": SVC(probability=True)
}

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    y_prob = model.predict_proba(X_test)[:,1] if hasattr(model, "predict_proba") else None

    print(f"\n{name}")
    print(classification_report(y_test, y_pred))
    if y_prob is not None:
        print("ROC AUC:", roc_auc_score(y_test, y_prob))


Logistic Regression
              precision    recall  f1-score   support

           0       0.99      0.89      0.94      1313
           1       0.19      0.69      0.30        51

    accuracy                           0.88      1364
   macro avg       0.59      0.79      0.62      1364
weighted avg       0.96      0.88      0.91      1364

ROC AUC: 0.8900138882666546

Random Forest
              precision    recall  f1-score   support

           0       0.98      0.96      0.97      1313
           1       0.36      0.57      0.44        51

    accuracy                           0.95      1364
   macro avg       0.67      0.76      0.71      1364
weighted avg       0.96      0.95      0.95      1364

ROC AUC: 0.9266161910308678

SVC
              precision    recall  f1-score   support

           0       0.99      0.92      0.95      1313
           1       0.24      0.65      0.35        51

    accuracy                           0.91      1364
   macro avg       0.61      0.