<a href="https://colab.research.google.com/github/fjadidi2001/Insurance/blob/main/ImproveNov4Paper.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, roc_auc_score
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import StandardScaler

# Load and preprocess data (as per your code)
df = pd.read_csv('/content/drive/My Drive/Insurance/telematics_syn.csv')
df['ClaimYN'] = ((df['NB_Claim'] >= 1) & (df['AMT_Claim'] > 1000)).astype(int)
df = df.drop(['NB_Claim', 'AMT_Claim'], axis=1)
df = pd.get_dummies(df, drop_first=True)
X = df.drop('ClaimYN', axis=1)
y = df['ClaimYN']
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_scaled, y)

# Split into train and validation
X_train, X_val, y_train, y_val = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# Train models
gb_model = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=5, random_state=42)
nn_model = MLPClassifier(hidden_layer_sizes=(100, 50), activation='relu', solver='adam', max_iter=1000, random_state=42)
gb_model.fit(X_train, y_train)
nn_model.fit(X_train, y_train)

# Get validation predictions and weights
gb_val_proba = gb_model.predict_proba(X_val)[:, 1]
nn_val_proba = nn_model.predict_proba(X_val)[:, 1]
gb_val_acc = accuracy_score(y_val, (gb_val_proba > 0.5).astype(int))
nn_val_acc = accuracy_score(y_val, (nn_val_proba > 0.5).astype(int))
weight_gb = gb_val_acc / (gb_val_acc + nn_val_acc)
weight_nn = nn_val_acc / (gb_val_acc + nn_val_acc)

# Weighted ensemble on test set
X_test, _, y_test, _ = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)
gb_test_proba = gb_model.predict_proba(X_test)[:, 1]
nn_test_proba = nn_model.predict_proba(X_test)[:, 1]
ensemble_proba = (weight_gb * gb_test_proba + weight_nn * nn_test_proba)
ensemble_pred = (ensemble_proba > 0.5).astype(int)

# Evaluate
print("\nWeighted Ensemble Performance:")
print(f"Accuracy: {accuracy_score(y_test, ensemble_pred):.4f}")
print(f"AUC-ROC: {roc_auc_score(y_test, ensemble_proba):.4f}")