In [1]:
# Importing Libraries

import numpy as np
import pandas as pd
import datetime as dt
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score
import joblib
import pickle


# Loading dataset
data = pd.read_csv(r"fraudTrain.csv")

testing = data[:10000]

useful_columns = ['category', 'amt', 'zip', 'lat', 'long', 'city_pop', 'merch_lat', 'merch_long',
                  'trans_date_trans_time', 'dob', 'is_fraud']
sample = data[useful_columns].copy()

# Converting DOB into age

sample['age'] = dt.date.today().year-pd.to_datetime(sample['dob']).dt.year
sample['hour'] = pd.to_datetime(sample['trans_date_trans_time']).dt.hour
sample['day'] = pd.to_datetime(sample['trans_date_trans_time']).dt.dayofweek
sample['month'] = pd.to_datetime(sample['trans_date_trans_time']).dt.month
sample.pop('trans_date_trans_time')
sample.pop('dob')

y = sample.pop('is_fraud')

# Converting categorical data into dummy variables
X = pd.get_dummies(sample, drop_first=True)

# Train Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

new_X_train, new_y_train = SMOTE().fit_resample(X_train, y_train)
new_X_test, new_y_test = SMOTE().fit_resample(X_test, y_test)


# Model fitting

rfc = RandomForestClassifier(random_state=42)
rfc.fit(new_X_train, new_y_train)
print(classification_report(new_y_test, rfc.predict(new_X_test)))

"""filename = "rfc_model_1.joblib"
joblib.dump(rfc, filename)"""

# loaded_model = joblib.load("rfc_model_1.joblib")

# Saving the model to a pickle file
pickle.dump(rfc, open('model.pkl', 'wb'))


              precision    recall  f1-score   support

           0       0.91      1.00      0.95    257815
           1       1.00      0.90      0.95    257815

    accuracy                           0.95    515630
   macro avg       0.95      0.95      0.95    515630
weighted avg       0.95      0.95      0.95    515630

