In [None]:
# Importing the dependecies
import itertools
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn import metrics

In [None]:
# Loading the dataset to a pandas Dataframe
dataset = pd.read_csv('../input/creditcardfraud/creditcard.csv')

In [None]:
# Checking the shape of the dataset
print(dataset.shape)

In [None]:
# Printing the first five row of the dataset
dataset.head()

In [None]:
# Printing the last five row of the dataset
dataset.tail()

In [None]:
# Checking the missing values in the dataset
dataset.isnull().sum()

In [None]:
# Checking the class column value counts
dataset['Class'].value_counts()

In [None]:
# Checking some information about the dataset
dataset.info()

In [None]:
# Introducing some statistical information about the dataset
dataset.describe()

In [None]:
# Separating the data 
legit = dataset[dataset.Class == 0]
fraud = dataset[dataset.Class == 1]

In [None]:
# Compare the values for both transactions
dataset.groupby('Class').mean()

In [None]:
# Build a sample dataset containing similar distribution of normal transactions and fraudulent transactions 
legit_sample = legit.sample(n=492)

In [None]:
# Contcatenting two Dataframes
new_dataset = pd.concat([legit_sample, fraud], axis=0)

In [None]:
# Checking the shape of the dataset
new_dataset.shape

In [None]:
# Showing the first five row of the dataset
new_dataset.head()

In [None]:
# Showing the last five row of the dataset
new_dataset.tail()

In [None]:
# Checking the class column value counts
new_dataset['Class'].value_counts()

In [None]:
# Compare the values for both transactions
new_dataset.groupby('Class').mean()

In [None]:
# Cutting part of the data
X_data = new_dataset.iloc[:, -7:]

In [None]:
# Checking correlation between the cutting data
corrx = X_data.corr()

# Visulazing correlation using heatmap
sns.heatmap(data=corrx, annot=True, cmap='plasma', square=True)

In [None]:
# Pair grid showing relationship between every feature and target are used for forecasting
sns.pairplot(X_data)

In [None]:
# Splitting dataset into features and targets
x = new_dataset.drop(['Class'], axis=1)
y = new_dataset['Class']

# Splitting data into training and testing data
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1, stratify=y, random_state=2)

# Loading the logistic regression model
model = LogisticRegression()

# Fitting the training data into the model
model.fit(x_train, y_train)

In [None]:
# Introducing accuracy on training data
train_data_pred = model.predict(x_train)

# Checking accuracy of the model on training data
train_score = metrics.accuracy_score(y_train, train_data_pred)
print('Accuracy score : ', train_score)

In [None]:
# Introducing accuracy on testing data
test_data_pred = model.predict(x_test)

# Checking accuracy of the model on testing data
train_score = metrics.accuracy_score(y_test, test_data_pred)
print('Accuracy score : ', train_score)

In [None]:
# Function to plot confusion Matrix
def plot_confusion_matrix(cm, 
                          classes,
                          normalize=False,
                          title='Confusion Matrix',
                          cmap=plt.cm.Blues):
  plt.imshow(cm, interpolation='nearest', cmap='Accent')
  plt.title(title)
  plt.colorbar()
  tick_marks = np.arange(len(classes))
  plt.xticks(tick_marks, classes, rotation=45)
  plt.yticks(tick_marks, classes)
  
  if normalize:
    cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    print("Normalized confusion matrix")
  else:
      print('Confusion matrix, without normalization')
  thresh = cm.max() / 2.
  for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
      plt.text(j, i, cm[i, j],horizontalalignment="center", 
               color="white" if cm[i, j] > thresh else "black")
  plt.tight_layout()
  plt.ylabel('True label')
  plt.xlabel('Predicted label')
  
cm = metrics.confusion_matrix(y_train, train_data_pred)
plot_confusion_matrix(cm, classes=['Fraudlent', 'Normal'])

In [None]:
# Building a predictive system
input_data = (1,-1.358354062,-1.340163075,1.773209343,0.379779593,-0.503198133,1.800499381,0.791460956,0.247675787,-1.514654323,0.207642865,0.624501459,0.066083685,0.717292731,-0.165945923,2.345864949,-2.890083194,1.109969379,-0.121359313,-2.261857095,0.524979725,0.247998153,0.771679402,0.909412262,-0.689280956,-0.327641834,-0.139096572,-0.055352794,-0.059751841,378.66)

# Making input data as numpy array
input_data_as_numpy_array = np.asarray(input_data)

# Reshaping the input data
reshaped_input_data = input_data_as_numpy_array.reshape(1, -1)

# predicting the input data
prediction = model.predict(reshaped_input_data)
# print(prediction)

if (prediction[0] == 1):
    print(" Fraudlent")
else:
    print(" Normal")