# Novelty for transfer learning 

## Importing libraries

In [3]:
import pandas as pd
import os
import shutil
from PIL import Image
from keras.models import Model
from keras.applications import VGG16, ResNet50
from keras.applications.vgg16 import preprocess_input
from keras.preprocessing.image import ImageDataGenerator
import numpy as np
import seaborn as sns
from sklearn.utils import shuffle, resample
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, f1_score
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, BatchNormalization, GlobalAveragePooling2D
import tensorflow.keras as keras
from keras import layers
import matplotlib.pyplot as plt
from tabulate import tabulate
import joblib
from sklearn.svm import SVC
from skimage import color, io, transform
import pickle
class_label = ['Non-Default(0)','Default(1)'] # env var

## Loading the dataset and balancing 

In [4]:
# Load the dataset
df = pd.read_csv('D:\Barath Suresh Docs\PROGRAMMING\MACHINE LEARNING\credit_card_fraud_detection\creditcard.csv')
# Separate the fraud and non-fraud examples
fraud = df[df['Class'] == 1]
non_fraud = df[df['Class'] == 0]
# Random under-sampling of the majority class
non_fraud_downsampled = resample(non_fraud, replace=False, n_samples=len(fraud), random_state=42)
# Combine minority class with downsampled majority class
balanced_df = pd.concat([fraud, non_fraud_downsampled])
# Shuffle the examples
balanced_df = shuffle(balanced_df, random_state=42)
# Print the class distribution of the balanced dataset
print(balanced_df['Class'].value_counts())

0    492
1    492
Name: Class, dtype: int64


## Creating images of the dataset according to binary class 

In [5]:
# # Create images of transaction amount against time
# for index, row in balanced_df.iterrows():
#     filename = f"datasetImages//{str(int(row['Class']))}//{index}.png"
#     img = Image.new('RGB', (224, 224), color='white')
#     pixels = img.load()
#     for i in range(224):
#         for j in range(224):
#             pixel = int(df.iloc[index, j % 28] * 255)
#             pixels[i, j] = (pixel, pixel, pixel)
#     img.save(filename)

## Define the path to the directory containing the PNG files

In [6]:
data_dir = 'D:\\Barath Suresh Docs\\PROGRAMMING\\MACHINE LEARNING\\credit_card_fraud_detection\\novelty\\datasetImages'

## Split the files into training and validation sets

In [7]:
# train_dir = os.path.join(data_dir, "training")
# val_dir = os.path.join(data_dir, "validation")
# if not os.path.exists(train_dir):
#     os.makedirs(train_dir)
# if not os.path.exists(val_dir):
#     os.makedirs(val_dir)

# classes = ["0", "1"]  # classes corresponding to fraud and non-fraud transactions
# for cls in classes:
#     filenames = os.listdir(os.path.join(data_dir, cls))
#     train_files, val_files = train_test_split(filenames, test_size=0.2, random_state=42)
    
#     for filename in train_files:
#         src = os.path.join(data_dir, cls, filename)
#         dst = os.path.join(train_dir, cls, filename)
#         if not os.path.exists(os.path.join(train_dir, cls)):
#             os.makedirs(os.path.join(train_dir, cls))
#         shutil.copyfile(src, dst)
        
#     for filename in val_files:
#         src = os.path.join(data_dir, cls, filename)
#         dst = os.path.join(val_dir, cls, filename)
#         if not os.path.exists(os.path.join(val_dir, cls)):
#             os.makedirs(os.path.join(val_dir, cls))
#         shutil.copyfile(src, dst)

## Load the VGG16 model and creating custom model

In [10]:
vgg_model = VGG16(include_top=False, input_shape=(224, 224, 3))
x = vgg_model.output
x = GlobalAveragePooling2D()(x)
predictions = Dense(1, activation='sigmoid')(x)

custom_model = Model(inputs=vgg_model.input, outputs=predictions)
custom_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

## Generate image features using the VGG16 model

In [20]:
# train_datagen = ImageDataGenerator(rescale=1./255, shear_range=0.2, zoom_range=0.2, horizontal_flip=True)
# test_datagen = ImageDataGenerator(rescale=1./255)

# train_generator = train_datagen.flow_from_directory(directory='datasetImages/training',
#                                                     target_size=(224, 224),
#                                                     batch_size=32,
#                                                     class_mode="binary"
#                                                     )

# validation_generator = test_datagen.flow_from_directory(directory='datasetImages/validation',
#                                                          target_size=(224, 224),
#                                                          batch_size=32,
#                                                          class_mode='binary',
#                                                          )

# custom_model.fit_generator(generator=train_generator, validation_data=validation_generator,epochs=10)

train_datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)
train_generator = train_datagen.flow_from_directory(directory='datasetImages',target_size=(224,224),batch_size=32,class_mode="binary",subset="training")
test_generator = train_datagen.flow_from_directory(directory='datasetImages',target_size=(224,224),batch_size=32,class_mode="binary",subset="validation")

train_features = custom_model.predict(train_generator)
test_features = custom_model.predict(test_generator)

Found 788 images belonging to 2 classes.
Found 196 images belonging to 2 classes.


## Train an SVM classifier on the generated image features

In [21]:
# Train an SVM classifier on the generated image features
print(train_features.shape)
flattened_feat_train = train_features.reshape(788,-1)
print(flattened_feat_train)
svm_classifier = SVC(kernel="linear")
svm_classifier.fit(flattened_feat_train, train_generator.classes)

(788, 1)
[[0.49551424]
 [0.49564776]
 [0.49601874]
 [0.49281555]
 [0.47323114]
 [0.50964814]
 [0.4377977 ]
 [0.4743622 ]
 [0.47757357]
 [0.49003944]
 [0.4774382 ]
 [0.45651042]
 [0.5166119 ]
 [0.516746  ]
 [0.48349842]
 [0.51338404]
 [0.49440697]
 [0.48903805]
 [0.47939247]
 [0.49832126]
 [0.4655985 ]
 [0.49305084]
 [0.48907596]
 [0.4793256 ]
 [0.49230158]
 [0.48118538]
 [0.49652067]
 [0.49440345]
 [0.49438938]
 [0.4679296 ]
 [0.493644  ]
 [0.5192512 ]
 [0.5040227 ]
 [0.5066155 ]
 [0.48029405]
 [0.45292202]
 [0.49189666]
 [0.50296557]
 [0.4829487 ]
 [0.4816285 ]
 [0.48870492]
 [0.5067266 ]
 [0.47005883]
 [0.5008109 ]
 [0.49889475]
 [0.47144902]
 [0.4908481 ]
 [0.44615883]
 [0.50013685]
 [0.5012045 ]
 [0.48319927]
 [0.49022198]
 [0.49216875]
 [0.5036187 ]
 [0.49717548]
 [0.47428992]
 [0.49762663]
 [0.45347595]
 [0.48610264]
 [0.48762956]
 [0.48439977]
 [0.506847  ]
 [0.5044687 ]
 [0.48209265]
 [0.47729635]
 [0.48695844]
 [0.5096726 ]
 [0.4647845 ]
 [0.46798563]
 [0.503437  ]
 [0.4829154

## Evaluate the SVM classifier on the testing set

In [26]:
print(test_features.shape)
flattened_feat_test = test_features.reshape(196,-1)
svm_predictions = svm_classifier.predict(flattened_feat_test)
svm_accuracy = accuracy_score(test_generator.classes, svm_predictions)
print('SVM accuracy: {:.2f}%'.format(svm_accuracy * 100))

(196, 1)
SVM accuracy: 80.10%
[2.04476656e-06]
