In [1]:
import pandas as pd  # External library for data manipulation
from sklearn.model_selection import train_test_split  # External library for ML model preparation
from sklearn.preprocessing import LabelEncoder  # External library for data encoding
from sklearn.ensemble import RandomForestClassifier  # External library for ML model
from sklearn.metrics import classification_report, accuracy_score  # External library for model evaluation
from joblib import dump, load  # External library for saving and loading models
import os  # Standard library for OS interaction
import json  # Standard library for JSON manipulation

# Tuple to store file paths
file_paths = ('insurance_claims.csv', 'policy_data.txt')  # Feature -> Tuple (Storing immutable file paths) #change filename here


df = pd.read_csv(file_paths[0])  # Feature -> Data management using pandas
df = df[df['police_report_available'] != '?']
df = df.drop('incident_date', axis=1)

# Encode categorical variables
categorical_columns = [col for col in df.columns if df[col].dtype == 'object']  # Feature -> List comprehension (Creating a list of categorical columns)
label_encoders = {}  # Feature -> Dictionary (Storing label encoders for each column)

for col in categorical_columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col].str.lower())  # Convert to lowercase before encoding
    label_encoders[col] = le  # Feature -> Dictionary (Assigning a LabelEncoder to each categorical column)


unique_categories = set(categorical_columns)  # Feature -> Set (Ensuring unique categories)


os.makedirs('label_encoders', exist_ok=True)  # Feature -> Exception handling (Handling existing directory)
for col, le in label_encoders.items():
    dump(le, f'label_encoders/{col}_encoder.joblib')  # Feature -> Data management using joblib (Saving the label encoders for future use)


In [2]:
from sklearn.model_selection import train_test_split  # External library for ML model preparation
from sklearn.ensemble import RandomForestClassifier  # External library for ML model
from sklearn.metrics import accuracy_score, classification_report  # External library for model evaluation
from joblib import dump  # External library for saving models

# Split the data into features and the target
X = df.drop('fraud_reported', axis=1)  # Features
y = df['fraud_reported'].apply(str)  

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)  # Feature -> Functions (Splitting dataset into training and test sets)

# Inheritance: Subclassing RandomForestClassifier
class CustomRandomForestClassifier(RandomForestClassifier):  # Feature -> Inheritance (Creating a subclass of RandomForestClassifier)
    def __init__(self, **kwargs):
        super().__init__(**kwargs)  # Feature -> Inheritance (Initializing the parent class)

# Initialize the CustomRandomForestClassifier
custom_random_forest_classifier = CustomRandomForestClassifier(n_estimators=100, random_state=42)

# Fit the model on the training data
custom_random_forest_classifier.fit(X_train, y_train)  # Feature -> Functions (Fitting the model on the training data)

# Make predictions on the test data
y_pred = custom_random_forest_classifier.predict(X_test)  # Feature -> Functions (Making predictions with the trained model)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)  # Feature -> Functions (Calculating accuracy of the model)
classification_report = classification_report(y_test, y_pred)  # Feature -> Functions (Generating a classification report)

# Print the accuracy and classification report
print(f'Accuracy: {accuracy}')
print(f'Classification Report:\n{classification_report}')
model_filename = 'custom_random_forest_classifier.joblib'
dump(custom_random_forest_classifier, model_filename)  # Feature -> Data management using joblib (Saving the trained model)
print(f"Model saved as {model_filename}")


Accuracy: 0.7373737373737373
Classification Report:
              precision    recall  f1-score   support

           0       0.77      0.91      0.84       145
           1       0.52      0.26      0.35        53

    accuracy                           0.74       198
   macro avg       0.65      0.59      0.59       198
weighted avg       0.70      0.74      0.71       198

Model saved as custom_random_forest_classifier.joblib


In [3]:
#fraud detected data
import pandas as pd  # External library for data manipulation
from joblib import load  # External library for loading models
import json  # Standard library for JSON manipulation

# Load test data
with open(file_paths[1], 'r') as file:
    json_data = json.load(file)  # Feature -> Data management using JSON (Loading JSON data)


transform_to_df = lambda data: pd.DataFrame([data])  # Feature -> Lambda function (Defining a quick transformation function)
test_df = transform_to_df(json_data)  # Feature -> Functions (Using a lambda function for data transformation)


columns_to_remove = set(['policy_type', 'registration_number', 'insurance_amount', 'user_id', 'expiry_date', 'qr_code_path', 'policy_id'])  # Feature -> Set (Handling unique values)
for column in columns_to_remove.intersection(test_df.columns):  # Feature -> Set (Using set intersection for efficient column removal)
    test_df = test_df.drop(column, axis=1)  # Feature -> Functions (Dropping columns from DataFrame)


for col in unique_categories:  # Use the set of unique categories
    if col in test_df.columns:  # Check if the column is in the test data
        le = load(f'label_encoders/{col}_encoder.joblib')  # Feature -> Data management using joblib (Loading a label encoder)
        test_df[col] = le.transform(test_df[col].astype(str).str.lower())  # Feature -> Functions (Applying the label encoder to the column)

# Load the trained model
model = load('custom_random_forest_classifier.joblib')  # Feature -> Data management using joblib (Loading the trained model)

# Make predictions
predictions = model.predict(test_df)  # Feature -> Functions (Making predictions with the trained model)

# Output predictions
print(predictions)

# Interpret and display the prediction result
result = "Fraud Detected" if predictions[0] == '1' else "No Fraud Detected"  # Feature -> Exception handling (Handling prediction outcomes)
print(result)


['1']
Fraud Detected


In [4]:
import pandas as pd  # External library for data manipulation
from sklearn.model_selection import train_test_split  # External library for ML model preparation
from sklearn.preprocessing import LabelEncoder  # External library for data encoding
from sklearn.ensemble import RandomForestClassifier  # External library for ML model
from sklearn.metrics import classification_report, accuracy_score  # External library for model evaluation
from joblib import dump, load  # External library for saving and loading models
import os  # Standard library for OS interaction
import json  # Standard library for JSON manipulation

# Tuple to store file paths
file_paths = ('insurance_claims.csv', 'policy_data2.txt')  # Feature -> Tuple (Storing immutable file paths) #change filename here

# Load your dataset
df = pd.read_csv(file_paths[0])  # Feature -> Data management using pandas (Reading a CSV file)
df = df[df['police_report_available'] != '?']
df = df.drop('incident_date', axis=1)

# Encode categorical variables
categorical_columns = [col for col in df.columns if df[col].dtype == 'object']  # Feature -> List comprehension (Creating a list of categorical columns)
label_encoders = {}  # Feature -> Dictionary (Storing label encoders for each column)

for col in categorical_columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col].str.lower())  # Convert to lowercase before encoding
    label_encoders[col] = le  # Feature -> Dictionary (Assigning a LabelEncoder to each categorical column)

# Use a set to ensure unique categories are handled
unique_categories = set(categorical_columns)  # Feature -> Set (Ensuring unique categories)

# Save the label encoders
os.makedirs('label_encoders', exist_ok=True)  # Feature -> Exception handling (Handling existing directory)
for col, le in label_encoders.items():
    dump(le, f'label_encoders/{col}_encoder.joblib')  # Feature -> Data management using joblib (Saving the label encoders for future use)


In [5]:
#no fraud detected data
import pandas as pd  # External library for data manipulation
from joblib import load  # External library for loading models
import json  # Standard library for JSON manipulation

# Load test data
with open(file_paths[1], 'r') as file:
    json_data = json.load(file)  # Feature -> Data management using JSON (Loading JSON data)

# Convert JSON data to pandas DataFrame using a lambda function for quick transformation
transform_to_df = lambda data: pd.DataFrame([data])  # Feature -> Lambda function (Defining a quick transformation function)
test_df = transform_to_df(json_data)  # Feature -> Functions (Using a lambda function for data transformation)

# Remove unnecessary columns
columns_to_remove = set(['policy_type', 'registration_number', 'insurance_amount', 'user_id', 'expiry_date', 'qr_code_path', 'policy_id'])  # Feature -> Set (Handling unique values)
for column in columns_to_remove.intersection(test_df.columns):  # Feature -> Set (Using set intersection for efficient column removal)
    test_df = test_df.drop(column, axis=1)  # Feature -> Functions (Dropping columns from DataFrame)

# Load label encoders and apply them to the test data
for col in unique_categories:  # Use the set of unique categories
    if col in test_df.columns:  # Check if the column is in the test data
        le = load(f'label_encoders/{col}_encoder.joblib')  # Feature -> Data management using joblib (Loading a label encoder)
        test_df[col] = le.transform(test_df[col].astype(str).str.lower())  # Feature -> Functions (Applying the label encoder to the column)

# Load the trained model
model = load('custom_random_forest_classifier.joblib')  # Feature -> Data management using joblib (Loading the trained model)

# Make predictions
predictions = model.predict(test_df)  # Feature -> Functions (Making predictions with the trained model)

# Output predictions
print(predictions)

# Interpret and display the prediction result
result = "Fraud Detected" if predictions[0] == '1' else "No Fraud Detected"  # Feature -> Exception handling (Handling prediction outcomes)
print(result)


['0']
No Fraud Detected
