<a href="https://colab.research.google.com/github/asritha7125/WarrantyML/blob/4/phase_4_ML_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import files
uploaded = files.upload()


Saving warranty_final.csv to warranty_final.csv


In [None]:
import pandas as pd
df = pd.read_csv("warranty_final.csv")


In [None]:
# 1. Upload CSV from local machine
# 2. Imports and setup
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, classification_report, roc_curve

# 3. Load uploaded file
file_path = "warranty_final.csv"
df = pd.read_csv(file_path)

# 4. Define target variable
# Will_Claim: 1 if Claim_Under_Warranty == True, else 0
if 'Claim_Under_Warranty' in df.columns:
    df['Will_Claim'] = df['Claim_Under_Warranty'].astype(int)
else:
    raise KeyError("'Claim_Under_Warranty' column not found in warranty_final.csv")

# 5. Select features and labels
X = df.drop(['Will_Claim'], axis=1)
y = df['Will_Claim']

# 6. Encode categorical features
cat_cols = X.select_dtypes(include=['object', 'category']).columns.tolist()
num_cols = X.select_dtypes(include=[np.number]).columns.tolist()

# Apply Label Encoding for binary or low-cardinality
le = LabelEncoder()
for col in cat_cols:
    if X[col].nunique() <= 2:
        X[col] = le.fit_transform(X[col])

# One-hot encode remaining categorical
X = pd.get_dummies(X, columns=[c for c in cat_cols if X[c].nunique() > 2], drop_first=True)

# 7. Handle missing values
X[num_cols] = X[num_cols].fillna(X[num_cols].mean())
X = X.fillna(0)

# 8. Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# 9. Feature scaling
scaler = StandardScaler()
X_train[num_cols] = scaler.fit_transform(X_train[num_cols])
X_test[num_cols] = scaler.transform(X_test[num_cols])

# 10. Train models
# 10.1 Logistic Regression
lr = LogisticRegression(max_iter=1000)
lr.fit(X_train, y_train)

# 10.2 Random Forest
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# 11. Evaluate models
models = {'Logistic Regression': lr, 'Random Forest': rf}
results = []
for name, model in models.items():
    y_pred = model.predict(X_test)
    y_proba = model.predict_proba(X_test)[:, 1]
    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred)
    rec = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    auc = roc_auc_score(y_test, y_proba)
    results.append({
        'Model': name,
        'Accuracy': acc,
        'Precision': prec,
        'Recall': rec,
        'F1 Score': f1,
        'AUC': auc
    })

# Display evaluation table
results_df = pd.DataFrame(results)
print(results_df)

# 12. Save predictions from Random Forest
predictions = X_test.copy()
predictions['Actual'] = y_test.values
predictions['Predicted_RF'] = rf.predict(X_test)
predictions['Prob_RF'] = rf.predict_proba(X_test)[:, 1]
predictions.to_csv('predictions.csv', index=False)
print("Predictions saved to predictions.csv")


                 Model  Accuracy  Precision  Recall  F1 Score  AUC
0  Logistic Regression      1.00        1.0    1.00  1.000000  1.0
1        Random Forest      0.99        1.0    0.98  0.989899  1.0
Predictions saved to predictions.csv


In [None]:
from google.colab import files
files.download('predictions.csv')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>