#CEN 376 - Data Mining Project
In this implementation, we will be classifying credit cards from creditcard.csv.

We will first initialize our program, by loading the data into a dataframe (using Pandas) and split the data into training and testing sets.

We will then train classifiers using six different techniques: Random Forest Classifier, Multi-layer Perceptron (MLP) Classifier, Gradient Boosting Classifier, Ada Boost Classifier, Linear Discriminant Analysis Classifier and Quadratic Discriminant Analysis Classifier.

We will also print the confusion matrix, classification report and accoracy score for each classification that we've made.

---

Topic: Data mining applications in finance 

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# load the data
data = pd.read_csv('creditcard.csv')

# split the data into training and testing sets
X = data.drop(['Class'], axis=1)
y = data['Class']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

In [None]:
# train a random forest classifier
from sklearn.ensemble import RandomForestClassifier

classifier = RandomForestClassifier(n_estimators=10, max_depth=3)
classifier.fit(X_train, y_train)
# predict the target variable on the testing set
y_pred = classifier.predict(X_test)
# evaluate the performance of the model

print('Confusion Matrix:')
print(confusion_matrix(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred))
print('Accuracy Score:')
print(accuracy_score(y_test, y_pred))

Confusion Matrix:
[[85283    12]
 [   47   101]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     85295
           1       0.89      0.68      0.77       148

    accuracy                           1.00     85443
   macro avg       0.95      0.84      0.89     85443
weighted avg       1.00      1.00      1.00     85443

Accuracy Score:
0.9993094811745842


In [None]:
# train an MLP classifier
from sklearn.neural_network import MLPClassifier

classifier = MLPClassifier(hidden_layer_sizes=(10, 5), max_iter=1000)
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)

print('Confusion Matrix:')
print(confusion_matrix(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred))
print('Accuracy Score:')
print(accuracy_score(y_test, y_pred))

Confusion Matrix:
[[85302     0]
 [  141     0]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     85302
           1       0.00      0.00      0.00       141

    accuracy                           1.00     85443
   macro avg       0.50      0.50      0.50     85443
weighted avg       1.00      1.00      1.00     85443

Accuracy Score:
0.9983497770443454


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
# Gradient Boosting
from sklearn.ensemble import GradientBoostingClassifier

classifier = GradientBoostingClassifier(n_estimators=10, max_depth=3)
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)

print('Confusion Matrix:')
print(confusion_matrix(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred))
print('Accuracy Score:')
print(accuracy_score(y_test, y_pred))

Confusion Matrix:
[[85288    16]
 [   66    73]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     85304
           1       0.82      0.53      0.64       139

    accuracy                           1.00     85443
   macro avg       0.91      0.76      0.82     85443
weighted avg       1.00      1.00      1.00     85443

Accuracy Score:
0.9990402958697612


In [None]:
# Ada Boost
from sklearn.ensemble import AdaBoostClassifier

classifier = AdaBoostClassifier(n_estimators=10)
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)

print('Confusion Matrix:')
print(confusion_matrix(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred))
print('Accuracy Score:')
print(accuracy_score(y_test, y_pred))

Confusion Matrix:
[[85270    25]
 [   42   106]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     85295
           1       0.81      0.72      0.76       148

    accuracy                           1.00     85443
   macro avg       0.90      0.86      0.88     85443
weighted avg       1.00      1.00      1.00     85443

Accuracy Score:
0.9992158515033414


In [None]:
# Linear Discriminant Analysis

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

classifier = LinearDiscriminantAnalysis()
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)

print('Confusion Matrix:')
print(confusion_matrix(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred))
print('Accuracy Score:')
print(accuracy_score(y_test, y_pred))

Confusion Matrix:
[[85280    15]
 [   36   112]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     85295
           1       0.88      0.76      0.81       148

    accuracy                           1.00     85443
   macro avg       0.94      0.88      0.91     85443
weighted avg       1.00      1.00      1.00     85443

Accuracy Score:
0.999403110845827


In [None]:
# Quadratic Discriminant Analysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

classifier = QuadraticDiscriminantAnalysis()
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)

print('Confusion Matrix:')
print(confusion_matrix(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred))
print('Accuracy Score:')
print(accuracy_score(y_test, y_pred))

Confusion Matrix:
[[83346  1949]
 [   19   129]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.98      0.99     85295
           1       0.06      0.87      0.12       148

    accuracy                           0.98     85443
   macro avg       0.53      0.92      0.55     85443
weighted avg       1.00      0.98      0.99     85443

Accuracy Score:
0.9769671008742671
