In [None]:
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

In [None]:
df =pd.read_csv("data/creditcard.csv")
df.head()

In [None]:
df.info()

In [None]:
df.isnull().sum()

In [None]:
df.shape

In [None]:
df.describe()

In [None]:
sns.set_style("darkgrid")
sns.countplot(data=df,x="Class")

In [None]:
df["Class"].value_counts()

In [None]:
## will try to analyze fraud data

fraud = df[df["Class"]==1]

normal = df[df["Class"]==0]

print(fraud.shape,normal.shape)

In [None]:
## we will analyze transition amount during fraud cases
fraud.Amount.describe()

In [None]:
normal.Amount.describe()

In [None]:
df.hist(bins=30,figsize=(15,15))
plt.show()

In [None]:
f, (ax1, ax2) = plt.subplots(2, 1, sharex=True)
f.suptitle('Amount per transaction by class')
bins = 50
ax1.hist(fraud.Amount, bins = bins)
ax1.set_title('Fraud')
ax2.hist(normal.Amount, bins = bins)
ax2.set_title('Normal')
plt.xlabel('Amount ($)')
plt.ylabel('Number of Transactions')
plt.xlim((0, 20000))
plt.yscale('log')
plt.show();

In [None]:
# correlation gives us relation between each varibale. how much each variable is contributing.
correlation =df.corr()

plt.figure(figsize=(22,15))
sns.heatmap(correlation,annot=True,cmap="RdYlGn")

In [None]:
## Seprating independent and dependent variable
inputs = df.drop("Class",axis="columns")
target = df.Class
inputs.head()

In [None]:
target.head()

In [None]:
X_train,X_test,y_train,y_test = train_test_split(inputs,target,test_size=0.2,random_state=20)
print("Training data:{}".format(X_train.shape))
print("Test data:{}".format(X_test.shape))

In [None]:
clf = LogisticRegression(max_iter=10000)
clf.fit(X_train,y_train)
y_predicted = clf.predict(X_test)

In [None]:
score = clf.score(X_test,y_test)
print(score)
print (classification_report(y_test, y_predicted))