In [None]:
import pandas as pd
import numpy as np

In [None]:
train = pd.read_csv("fraudTrain.csv")
train.head()

In [None]:
train.isnull().sum()

In [None]:
train.describe()

In [None]:
test = pd.read_csv("fraudTest.csv")

data = pd.concat([train, test], axis=0)
data.head()

In [None]:
data.reset_index(inplace=True)
data.head()

In [None]:
data.isnull().sum()

In [None]:
import matplotlib.pylab as plt
import seaborn as sns

In [None]:
plt.figure(figsize=(12,6),dpi=100)
sns.countplot(x= data['is_fraud'])

In [None]:
data.info()

In [None]:
data = data.drop(['index', 'Unnamed: 0'], axis = 1)

In [None]:
plt.figure(figsize = (12, 6), dpi = 200)
sns.countplot(x = 'category', hue = 'is_fraud', data = data)
plt.xticks(rotation = 60)
plt.show()

In [None]:
x = data.drop(['is_fraud'], axis=1)
y = data['is_fraud']

In [None]:
from sklearn.preprocessing import OrdinalEncoder
cols = ['trans_date_trans_time', 'merchant', 'category', 'first', 'last',
        'gender', 'street', 'city', 'state', 'job', 'dob', 'trans_num']
encoder = OrdinalEncoder()
x[cols] = encoder.fit_transform(x[cols])

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
x = scaler.fit_transform(x)

In [None]:
y = data[['is_fraud']].values

In [None]:
y

## OverSampling

In [None]:
from imblearn.over_sampling import SMOTE

In [None]:
smote = SMOTE(sampling_strategy='auto', random_state=42)

In [None]:
x_res, y_res = smote.fit_resample(x, y)

In [None]:
plt.figure(figsize=(10, 8)) 
sns.heatmap(data.corr(numeric_only = True), annot=True)

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x_res, y_res, test_size = 0.2, random_state =42)

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
lr = LogisticRegression()
lr.fit(x_train, y_train)

In [None]:
pred1 = lr.predict(x_test)

In [None]:
from sklearn.metrics import accuracy_score, precision_score

In [None]:
def evaluate_classification(y_true, y_pred):
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    return accuracy, precision

In [None]:
evaluate_classification(y_test, pred1)

In [None]:
from sklearn.naive_bayes import GaussianNB, MultinomialNB, BernoulliNB

In [None]:
gnb = GaussianNB()
mnb = MultinomialNB()
bnb = BernoulliNB()

In [None]:
gnb.fit(x_train, y_train)

In [None]:
pred2 = gnb.predict(x_test)

In [None]:
evaluate_classification(y_test, pred2)

In [None]:
from sklearn.tree import DecisionTreeClassifier

In [None]:
dtc = DecisionTreeClassifier(max_depth = 50, random_state = 100)
dtc.fit(x_train, y_train)

In [None]:
pred3 = dtc.predict(x_test)

In [None]:
evaluate_classification(y_test, pred3)

In [None]:
from sklearn.naive_bayes import GaussianNB, MultinomialNB, BernoulliNB

In [None]:
gnb = GaussianNB()
mnb = MultinomialNB()
bnb = BernoulliNB()

In [None]:
gnb.fit(x_train, y_train)
pred4 = gnb.predict(x_test)

In [None]:
evaluate_classification(y_test, pred4)

In [None]:
mnb.fit(x_train, y_train)
pred5= mnb.predict(x_test)

In [None]:
evaluate_classification(y_test, pred5)

In [None]:
bnb.fit(x_train, y_train)
pred6= bnb.predict(x_test)

In [None]:
evaluate_classification(y_test, pred6)

In [None]:
x_train[0]

In [None]:
import joblib

In [None]:
joblib.dump(dtc, 'model.joblib')

In [None]:
model = joblib.load('model.joblib')

In [None]:
inputData = np.array([[9.56736704e-01, 1.20412968e-03, 5.51458031e-01, 3.38575184e-01,
       1.09775808e-02, 3.72881356e-01, 3.91752577e-02, 1.00000000e+00,
       3.08617234e-01, 8.43093923e-01, 4.00000000e-02, 7.14597016e-01,
       2.85274996e-01, 7.46281288e-01, 8.52519905e-04, 3.93145161e-01,
       7.67039674e-01, 4.54927831e-01, 9.75671348e-01, 3.02413488e-01,
       7.36689065e-01]])

In [None]:
prediction = model.predict(inputData)

print(prediction)

In [None]:
if prediction == 0:
    print("Not Fraud")
else:
    print("Is Fraud")

In [None]:
inputData = np.array([[1, 1, 500, 3.38575184e-01,
       1.09775808e-02, 3.72881356e-01, 3.91752577e-02, 1.00000000e+00,
       3.08617234e-01, 8.43093923e-01, 4.00000000e-02, 7.14597016e-01,
       2.85274996e-01, 7.46281288e-01, 8.52519905e-04, 3.93145161e-01,
       7.67039674e-01, 4.54927831e-01, 9.75671348e-01, 3.02413488e-01,
       7.36689065e-01]])

In [None]:
prediction = model.predict(inputData)

print(prediction)

In [None]:
if prediction == 0:
    print("Not Fraud")
else:
    print("Is Fraud")