In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, f1_score
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
from sklearn import datasets
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier

In [None]:
# Load the dataset
df = pd.read_csv('data/raw/creditcard.csv')

In [None]:
df

In [None]:
df.info()

In [None]:
df.isnull().sum()

In [None]:
df.shape

In [None]:
new_df = df.drop_duplicates()

In [None]:
new_df.shape

In [None]:
x = new_df.drop(columns=["Class","Time"])
y = new_df["Class"]

In [None]:
x.shape

In [None]:
y.shape

In [None]:
# Balancing with SMOTE
oversample = SMOTE()
X_resampled, y_resampled = oversample.fit_resample(x,y)
# X, y = oversample.fit_resample(df.drop('Class', axis=1), df['Class'])

In [None]:
# Train-Test split
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=0)


In [None]:
# Feature Scaling
scaler = StandardScaler()  
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
# Model Building 
model = RandomForestClassifier()


In [None]:
# Model Training
model.fit(X_train, y_train)

In [None]:
# Prediction
y_pred = model.predict(X_test)

In [None]:
# Model Evaluation
accuracy_score(y_test, y_pred)

In [None]:
f1_score(y_test, y_pred)

In [None]:

params = {
    'n_estimators': [100, 200, 500],
    'max_depth': [10, 50, 100], 
    'min_samples_split': [2, 10, 20],
    'min_samples_leaf': [1, 2, 5]
}

grid = GridSearchCV(RandomForestClassifier(), param_grid=params, cv=5)


In [None]:
grid.fit(X_train, y_train)

In [None]:
grid.best_params_

In [None]:
# Model Improvement 

# Try XGBoost classifier
from xgboost import XGBClassifier
xgb = XGBClassifier()
xgb.fit(X_train, y_train)
print(xgb.score(X_test, y_test))

# SMOTE handling class imbalance
from imblearn.over_sampling import SMOTE 
oversample = SMOTE()
X_sm, y_sm = oversample.fit_resample(X, y)

# Feature Selection
from sklearn.feature_selection import RFECV
selector = RFECV(RandomForestClassifier())
selector.fit(X_sm, y_sm)
X_reduced = selector.transform(X_sm)

# Stacked Model
from sklearn.ensemble import StackingClassifier
rf = RandomForestClassifier()
xgb = XGBClassifier()
stack = StackingClassifier(estimators=[('rf', rf), ('xgb', xgb)])
stack.fit(X_reduced, y_sm)

In [None]:
# Model Artifact
import pickle
pickle.dump(model, open('rf_model.pkl', 'wb')) 

# Flask App
from flask import Flask, request, jsonify
app = Flask(__name__)

@app.route('/predict', methods=['POST'])  
def predict():
    data = request.get_json() 
    # preprocessing
    
    prediction = model.predict(data) 
    return jsonify({'prediction': prediction})

if __name__ == '__main__':
    app.run(debug=True, host='0.0.0.0', port=9696) 

# Dockerfile
FROM python:3.7
COPY . /app
WORKDIR /app
RUN pip install -r requirements.txt
EXPOSE 9696
ENTRYPOINT ["python", "app.py"]

# Kubernetes Deployment
apiVersion: apps/v1
kind: Deployment
metadata:
  name: fraud-deployment
spec:
  replicas: 3
  selector:
    matchLabels:
      app: fraud
  template:
    metadata:
      labels:
        app: fraud
    spec:
      containers:
      - name: fraud-model
        image: fraud_image
        ports:
        - containerPort: 9696
        
# Horizontal Pod Autoscaler 
apiVersion: autoscaling/v1
kind: HorizontalPodAutoscaler
metadata:
  name: fraud-hpa
spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: fraud-deployment
  minReplicas: 3
  maxReplicas: 10
  targetCPUUtilizationPercentage: 50