In [1]:
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import accuracy_score
import pandas as pd

In [2]:
file_path = "/content/synthetic_property_insurance_claims_dataset.csv"
df = pd.read_csv(file_path)

In [3]:
missing_values = df.isnull().sum()

In [4]:
X = df.drop("claim_statues", axis=1)
y = df["claim_statues"]

In [5]:
numerical_features = ["Claim_Amount", "Building_Age", "Repair_Estimate", "Policy_Coverage_Amount", "Amount_Paid"]
categorical_features = ["Property_Type", "Property_Location", "Incident_Type", "Severity", "Building_Material", "Fraudulent"]

# Preprocessing pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ("num", StandardScaler(), numerical_features),
        ("cat", OneHotEncoder(handle_unknown="ignore"), categorical_features)
    ]
)

# Try RandomForestClassifier with pipeline
model_pipeline_rf = Pipeline(steps=[
    ("preprocessor", preprocessor),
    ("classifier", RandomForestClassifier(n_estimators=100, random_state=42))
])

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
model_pipeline_rf.fit(X_train, y_train)
y_pred_rf = model_pipeline_rf.predict(X_test)
accuracy_rf = accuracy_score(y_test, y_pred_rf)

In [8]:
model_pipeline_gb = Pipeline(steps=[
    ("preprocessor", preprocessor),
    ("classifier", GradientBoostingClassifier(n_estimators=100, random_state=42))
])

In [9]:
model_pipeline_gb.fit(X_train, y_train)
y_pred_gb = model_pipeline_gb.predict(X_test)
accuracy_gb = accuracy_score(y_test, y_pred_gb)

In [10]:
print("Random Forest Accuracy:", accuracy_rf)
print("Gradient Boosting Accuracy:", accuracy_gb)

Random Forest Accuracy: 0.4982
Gradient Boosting Accuracy: 0.5086


In [12]:
import pickle

with open("2nd-Gradient-Boosting.pkl", "wb") as f:
    pickle.dump(model_pipeline_gb, f)
