In [1]:
!pip install -q kaggle
!pip install -q scikit-learn pandas matplotlib seaborn

import os
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier


In [2]:
from google.colab import files
files.upload()  # Upload kaggle.json manually here

# Setup kaggle directory
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

# Example: Download Titanic dataset (replace with your dataset name)
!kaggle competitions download -c titanic

# Unzip
!unzip -o titanic.zip


Saving kaggle.json to kaggle.json
Downloading titanic.zip to /content
  0% 0.00/34.1k [00:00<?, ?B/s]
100% 34.1k/34.1k [00:00<00:00, 67.2MB/s]
Archive:  titanic.zip
  inflating: gender_submission.csv   
  inflating: test.csv                
  inflating: train.csv               


**Load & Preprocess Data**

In [3]:
# Load dataset
df = pd.read_csv("train.csv")  # Adjust as per your dataset

# Basic EDA
print(df.head())
print(df.info())

# Example preprocessing for Titanic dataset
df = df[["Survived", "Pclass", "Sex", "Age", "Fare"]]
df = df.dropna()
df["Sex"] = df["Sex"].map({"male": 0, "female": 1})

# Features & target
X = df.drop("Survived", axis=1)
y = df["Survived"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


   PassengerId  Survived  Pclass  \
0            1         0       3   
1            2         1       1   
2            3         1       3   
3            4         1       1   
4            5         0       3   

                                                Name     Sex   Age  SibSp  \
0                            Braund, Mr. Owen Harris    male  22.0      1   
1  Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1   
2                             Heikkinen, Miss. Laina  female  26.0      0   
3       Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0      1   
4                           Allen, Mr. William Henry    male  35.0      0   

   Parch            Ticket     Fare Cabin Embarked  
0      0         A/5 21171   7.2500   NaN        S  
1      0          PC 17599  71.2833   C85        C  
2      0  STON/O2. 3101282   7.9250   NaN        S  
3      0            113803  53.1000  C123        S  
4      0            373450   8.0500   NaN        S  
<c

**Train & Evaluate Multiple Models**

In [4]:
models = {
    "Logistic Regression": LogisticRegression(),
    "SVM": SVC(),
    "Random Forest": RandomForestClassifier(),
    "Decision Tree": DecisionTreeClassifier()
}

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    print(f"\nModel: {name}")
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print("Precision:", precision_score(y_test, y_pred))
    print("Recall:", recall_score(y_test, y_pred))
    print("F1-score:", f1_score(y_test, y_pred))



Model: Logistic Regression
Accuracy: 0.7412587412587412
Precision: 0.6666666666666666
Recall: 0.6785714285714286
F1-score: 0.672566371681416

Model: SVM
Accuracy: 0.7412587412587412
Precision: 0.6727272727272727
Recall: 0.6607142857142857
F1-score: 0.6666666666666666

Model: Random Forest
Accuracy: 0.7692307692307693
Precision: 0.7017543859649122
Recall: 0.7142857142857143
F1-score: 0.7079646017699115

Model: Decision Tree
Accuracy: 0.7062937062937062
Precision: 0.625
Recall: 0.625
F1-score: 0.625


**Hyperparameter Tuning**

GridSearchCV for RandomForest

In [5]:
param_grid = {
    'n_estimators': [50, 100, 150],
    'max_depth': [None, 5, 10],
    'min_samples_split': [2, 5],
}

grid_search = GridSearchCV(RandomForestClassifier(), param_grid, cv=5, scoring='f1', n_jobs=-1)
grid_search.fit(X_train, y_train)

print("Best Parameters:", grid_search.best_params_)
best_rf = grid_search.best_estimator_

y_pred = best_rf.predict(X_test)
print("\nAfter GridSearchCV:")
print(classification_report(y_test, y_pred))


Best Parameters: {'max_depth': 5, 'min_samples_split': 5, 'n_estimators': 50}

After GridSearchCV:
              precision    recall  f1-score   support

           0       0.81      0.85      0.83        87
           1       0.75      0.70      0.72        56

    accuracy                           0.79       143
   macro avg       0.78      0.77      0.78       143
weighted avg       0.79      0.79      0.79       143



**RandomizedSearchCV for SVM**

In [6]:
from scipy.stats import uniform

param_dist = {
    'C': uniform(0.1, 10),
    'kernel': ['linear', 'rbf', 'poly'],
    'gamma': ['scale', 'auto']
}

random_search = RandomizedSearchCV(SVC(), param_distributions=param_dist, n_iter=10, cv=5, scoring='f1', random_state=42, n_jobs=-1)
random_search.fit(X_train, y_train)

print("Best Parameters:", random_search.best_params_)
best_svm = random_search.best_estimator_

y_pred = best_svm.predict(X_test)
print("\nAfter RandomizedSearchCV:")
print(classification_report(y_test, y_pred))


Best Parameters: {'C': np.float64(5.347746602583891), 'gamma': 'scale', 'kernel': 'rbf'}

After RandomizedSearchCV:
              precision    recall  f1-score   support

           0       0.80      0.80      0.80        87
           1       0.69      0.68      0.68        56

    accuracy                           0.76       143
   macro avg       0.74      0.74      0.74       143
weighted avg       0.75      0.76      0.75       143



**Export the Best Model**

In [7]:
import joblib

# Save the best model from GridSearchCV
joblib.dump(best_rf, 'best_random_forest.pkl')

# Download from Colab
from google.colab import files
files.download('best_random_forest.pkl')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>