<a href="https://colab.research.google.com/github/akshay-akm/codsoft/blob/main/creditcard.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#@title Copyright 2023 Google LLC. Double-click for license information.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

In [None]:
import numpy as np

import pandas as pd

import matplotlib.pyplot as plt



from sklearn.preprocessing import LabelEncoder

from sklearn.model_selection import train_test_split

from sklearn.linear_model import LogisticRegression

from sklearn.tree import DecisionTreeClassifier

from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import accuracy_score,classification_report



import warnings

warnings.filterwarnings('ignore')

In [None]:
train_data = pd.read_csv('/content/fraudTest.csv.zip')

test_data = pd.read_csv('/content/fraudTest.csv.zip')

In [None]:
train_data.info()

In [None]:
train_data.isnull().sum()

In [None]:
train_data.head(3)

In [None]:
test_data.info()

In [None]:
test_data.isnull().sum()

In [None]:
test_data.head(3)

In [None]:
data = pd.concat([train_data,test_data])

In [None]:
data.shape

In [None]:
corr_result = {}
for col in data.columns:

    if data[col].dtype != 'object' and col != 'is_fraud':

        corr = data[col].corr(data['is_fraud'])

        corr_result[col] = corr
corr_result

In [None]:
data.drop(columns=['Unnamed: 0','trans_date_trans_time','first','last','gender','street','job','dob','trans_num'],inplace=True)

In [None]:
data.info()

In [None]:
data['is_fraud'].value_counts()

In [None]:
plt.bar(data['is_fraud'].unique(),data['is_fraud'].value_counts(),width = 0.2)

plt.xlabel('Legitimate/Fraud')

plt.ylabel('No of transactions')

plt.show()

In [None]:
legitimate = data[data['is_fraud'] == 0]

fraud = data[data['is_fraud'] == 1]

In [None]:
legitimate = legitimate.sample(n = len(fraud))

legitimate.shape

In [None]:
fraud.shape

In [None]:
data = pd.concat([legitimate,fraud])

In [None]:
plt.bar(data['is_fraud'].unique(),data['is_fraud'].value_counts(),width = 0.2)

plt.xlabel('Legitimate/Fraud')

plt.ylabel('No of transactions')

plt.show()

In [None]:
le = LabelEncoder()

data['merchant'] = le.fit_transform(data['merchant'])

data['category'] = le.fit_transform(data['category'])

data['city'] = le.fit_transform(data['city'])

data['state'] = le.fit_transform(data['state'])

In [None]:
data.info()

In [None]:
x_data = data.iloc[:,:-1].values

y_data = data.iloc[:,-1].values

In [None]:
x_train,x_test,y_train,y_test = train_test_split(x_data,y_data,test_size=0.3,random_state=42,shuffle=True)

In [None]:
log_reg = LogisticRegression()

log_reg.fit(x_train,y_train)

In [None]:
log_pred = log_reg.predict(x_train)

print("Accuracy score: ",round(accuracy_score(y_train,log_pred),3))

print("Classification report:\n",classification_report(y_train,log_pred))

In [None]:
dt = DecisionTreeClassifier()

dt.fit(x_train,y_train)

In [None]:
dt_pred = dt.predict(x_train)

print("Accuracy score: ",round(accuracy_score(y_train,dt_pred),3))

print("Classification report:\n",classification_report(y_train,dt_pred))

In [None]:
rfc = RandomForestClassifier(n_estimators = 50)

rfc.fit(x_train,y_train)

In [None]:
rfc_pred = rfc.predict(x_train)

print("Accuracy score: ",round(accuracy_score(y_train,rfc_pred),3))

print("Classification report:\n",classification_report(y_train,rfc_pred))

In [None]:
test_pred = log_reg.predict(x_test)

print("Accuracy score: ",round(accuracy_score(y_test,test_pred),3))

print("Classification report:\n",classification_report(y_test,test_pred))