In [1]:
import pandas as pd

# Load the dataset
data = pd.read_csv('fraudTest.csv')

# Check the first few rows of the data
print(data.head())


   Unnamed: 0 trans_date_trans_time            cc_num  \
0           0   2020-06-21 12:14:25  2291163933867244   
1           1   2020-06-21 12:14:33  3573030041201292   
2           2   2020-06-21 12:14:53  3598215285024754   
3           3   2020-06-21 12:15:15  3591919803438423   
4           4   2020-06-21 12:15:17  3526826139003047   

                               merchant        category    amt   first  \
0                 fraud_Kirlin and Sons   personal_care   2.86    Jeff   
1                  fraud_Sporer-Keebler   personal_care  29.84  Joanne   
2  fraud_Swaniawski, Nitzsche and Welch  health_fitness  41.28  Ashley   
3                     fraud_Haley Group        misc_pos  60.05   Brian   
4                 fraud_Johnston-Casper          travel   3.19  Nathan   

       last gender                       street  ...      lat      long  \
0   Elliott      M            351 Darlene Green  ...  33.9659  -80.9355   
1  Williams      F             3638 Marsh Union  ...  40.3207 

In [2]:
# Check for missing values
print(data.isnull().sum())

# Example: Dropping rows with missing values
data = data.dropna()


Unnamed: 0               0
trans_date_trans_time    0
cc_num                   0
merchant                 0
category                 0
amt                      0
first                    0
last                     0
gender                   0
street                   0
city                     0
state                    0
zip                      0
lat                      0
long                     0
city_pop                 0
job                      0
dob                      0
trans_num                0
unix_time                0
merch_lat                0
merch_long               0
is_fraud                 0
dtype: int64


In [2]:
# Import necessary libraries
import pandas as pd
import numpy as np
import joblib
import warnings
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# Ignore warnings for cleaner output
warnings.filterwarnings("ignore")

# Load the dataset
data = pd.read_csv('fraudTest.csv')

# Display basic information
print("Dataset Loaded Successfully!")
print("Number of Transactions:", data.shape[0])
print("Number of Features:", data.shape[1])
print(data.head())

# Drop unnecessary columns (like timestamps and unique IDs)
drop_cols = ['trans_date_trans_time', 'cc_num', 'first', 'last', 'street', 'city', 
             'state', 'zip', 'dob', 'trans_num', 'unix_time']
data = data.drop(columns=drop_cols)

# Encode categorical variables (e.g., merchant, category, job, gender)
categorical_cols = ['merchant', 'category', 'job', 'gender']
for col in categorical_cols:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])

# Normalize numerical columns
num_cols = ['amt', 'lat', 'long', 'city_pop', 'merch_lat', 'merch_long']
scaler = StandardScaler()
data[num_cols] = scaler.fit_transform(data[num_cols])

# Define feature variables (X) and target variable (y)
X = data.drop(columns=['is_fraud'])
y = data['is_fraud']

# Split data into Training and Testing sets (80% Train, 20% Test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Train a Random Forest model
print("\nTraining the Fraud Detection Model...")
rf_model = RandomForestClassifier(n_estimators=100, random_state=42, class_weight="balanced")
rf_model.fit(X_train, y_train)

# Make Predictions
y_pred = rf_model.predict(X_test)

# Evaluate the Model
print("\nModel Evaluation:")
print("Accuracy Score:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))

# Save the Trained Model
joblib.dump(rf_model, 'fraud_detection_model.pkl')
print("\nModel saved as fraud_detection_model.pkl")

# Save the Scaler (for future data processing)
joblib.dump(scaler, 'scaler.pkl')
print("Scaler saved as scaler.pkl")

# Save Label Encoders (for categorical data transformation)
for col in categorical_cols:
    joblib.dump(le, f'label_encoder_{col}.pkl')
    print(f"Label encoder for {col} saved as label_encoder_{col}.pkl")

print("\n🚀 Training Complete! You can now use 'fraud_detection_model.pkl' for predictions.")


Dataset Loaded Successfully!
Number of Transactions: 555719
Number of Features: 23
   Unnamed: 0 trans_date_trans_time            cc_num  \
0           0   2020-06-21 12:14:25  2291163933867244   
1           1   2020-06-21 12:14:33  3573030041201292   
2           2   2020-06-21 12:14:53  3598215285024754   
3           3   2020-06-21 12:15:15  3591919803438423   
4           4   2020-06-21 12:15:17  3526826139003047   

                               merchant        category    amt   first  \
0                 fraud_Kirlin and Sons   personal_care   2.86    Jeff   
1                  fraud_Sporer-Keebler   personal_care  29.84  Joanne   
2  fraud_Swaniawski, Nitzsche and Welch  health_fitness  41.28  Ashley   
3                     fraud_Haley Group        misc_pos  60.05   Brian   
4                 fraud_Johnston-Casper          travel   3.19  Nathan   

       last gender                       street  ...      lat      long  \
0   Elliott      M            351 Darlene Green  ...  

In [1]:
pip install plaid-python flask pandas joblib scikit-learn

Note: you may need to restart the kernel to use updated packages.


In [2]:
from plaid.api import plaid_api
from plaid.model import *
from plaid import Configuration, ApiClient

import datetime

# Your Plaid API credentials (Replace with **NEW** credentials after revoking old ones)
PLAID_CLIENT_ID = "6f80dd6a256345750015f719b20c53"
PLAID_SECRET = "d96d5223c442c4d4b9ea76834bb209"
PLAID_ENV = "sandbox"  # Change to "development" or "production" as needed

# Plaid API Configuration
configuration = Configuration(
    host="https://sandbox.plaid.com",  # Change for different environments
    api_key={
        "clientId": PLAID_CLIENT_ID,
        "secret": PLAID_SECRET
    }
)

# Create API client
api_client = ApiClient(configuration)
client = plaid_api.PlaidApi(api_client)

print("Plaid client initialized successfully!")



Plaid client initialized successfully!


In [6]:
PLAID_CLIENT_ID = "6f80dd6a256345750015f719b20c53"
PLAID_SECRET = "d96d5223c442c4d4b9ea76834bb209"


In [8]:
from plaid.api import plaid_api
from plaid.model import *
from plaid import Configuration, ApiClient

import datetime

# Your Plaid API credentials (Replace with **NEW** credentials after revoking old ones)
PLAID_CLIENT_ID = "6f80dd6a256345750015f719b20c53"
PLAID_SECRET = "d96d5223c442c4d4b9ea76834bb209"
PLAID_ENV = "sandbox"  # Change to "development" or "production" as needed

# Plaid API Configuration
configuration = Configuration(
    host="https://sandbox.plaid.com",  # Change for different environments
    api_key={
        "clientId": PLAID_CLIENT_ID,
        "secret": PLAID_SECRET
    }
)

# Create API client
api_client = ApiClient(configuration)
client = plaid_api.PlaidApi(api_client)

print("Plaid client initialized successfully!")



Plaid client initialized successfully!


In [15]:
def get_access_token(public_token):
    try:
        # Exchange public_token for access_token
        request = ItemPublicTokenExchangeRequest(public_token=public_token)
        response = client.item_public_token_exchange(request)
        access_token = response.access_token
        return access_token
    except plaid.ApiException as e:
        print("Error exchanging public token for access token:", e)
        return None


In [16]:
public_token = "your_public_token_here"  # Replace with your actual public_token
access_token = get_access_token(public_token)
if access_token:
    print("Access Token:", access_token)
else:
    print("Failed to get access token")


NameError: name 'plaid' is not defined

In [17]:
pip show plaid-python


Name: plaid-python
Version: 29.1.0
Summary: Python client library for the Plaid API and Link
Home-page: https://github.com/plaid/plaid-python
Author: Plaid Inc.
Author-email: developers@plaid.com
License: MIT
Location: c:\users\sharm\appdata\local\programs\python\python310\lib\site-packages
Requires: nulltype, python-dateutil, urllib3
Required-by: 
Note: you may need to restart the kernel to use updated packages.


In [18]:
from plaid.api import plaid_api
from plaid.model.item_public_token_exchange_request import ItemPublicTokenExchangeRequest
from plaid import Configuration, ApiClient

import datetime

# Your Plaid API credentials (Replace with **NEW** credentials after revoking old ones)
PLAID_CLIENT_ID = "6f80dd6a256345750015f719b20c53"
PLAID_SECRET = "d96d5223c442c4d4b9ea76834bb209"
PLAID_ENV = "sandbox"  # Change to "development" or "production" as needed

# Plaid API Configuration
configuration = Configuration(
    host="https://sandbox.plaid.com",  # Change for different environments
    api_key={
        "clientId": PLAID_CLIENT_ID,
        "secret": PLAID_SECRET
    }
)

# Create API client
api_client = ApiClient(configuration)
client = plaid_api.PlaidApi(api_client)

print("Plaid client initialized successfully!")


Plaid client initialized successfully!
