# Import Dataset and Display the column names

In [1]:
import pandas as pd

# Load the dataset
data = pd.read_csv(r"C:\Users\HP\OneDrive\Desktop\Preprocessed_Online_Payment_Data.csv")

# Display the column names
print(data.columns)


Index(['step', 'amount', 'nameOrig', 'oldbalanceOrg', 'newbalanceOrig',
       'nameDest', 'oldbalanceDest', 'newbalanceDest', 'isFraud',
       'isFlaggedFraud', 'type_CASH_IN', 'type_CASH_OUT', 'type_DEBIT',
       'type_PAYMENT', 'type_TRANSFER'],
      dtype='object')


# Drop the 'isFraud' column and Save the modified dataset

In [2]:
import pandas as pd

# Load the dataset
data = pd.read_csv(r'C:\Users\HP\OneDrive\Desktop\Preprocessed_Online_Payment_Data.csv')

# Drop the 'isFraud' column
data = data.drop(columns=['isFraud'])

# Save the modified dataset
data.to_csv(r'C:\Users\HP\OneDrive\Desktop\Infosys_Springboard_data.csv', index=False)
print("File saved successfully.")


File saved successfully.


# Load the modified dataset and inspect the data

In [3]:
import pandas as pd

# Load the CSV file to inspect its structure and contents
file_path = r"C:\Users\HP\OneDrive\Desktop\Infosys_Springboard_data.csv"
data = pd.read_csv(file_path)

# Display the first few rows and general information about the dataset
data.head(), data.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16426 entries, 0 to 16425
Data columns (total 14 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   step            16426 non-null  float64
 1   amount          16426 non-null  float64
 2   nameOrig        16426 non-null  object 
 3   oldbalanceOrg   16426 non-null  float64
 4   newbalanceOrig  16426 non-null  float64
 5   nameDest        16426 non-null  object 
 6   oldbalanceDest  16426 non-null  float64
 7   newbalanceDest  16426 non-null  float64
 8   isFlaggedFraud  16426 non-null  int64  
 9   type_CASH_IN    16426 non-null  bool   
 10  type_CASH_OUT   16426 non-null  bool   
 11  type_DEBIT      16426 non-null  bool   
 12  type_PAYMENT    16426 non-null  bool   
 13  type_TRANSFER   16426 non-null  bool   
dtypes: bool(5), float64(6), int64(1), object(2)
memory usage: 1.2+ MB


(       step    amount     nameOrig  oldbalanceOrg  newbalanceOrig  \
 0  0.533693  0.002432   C658247527       0.018398        0.023652   
 1  0.336927  0.000401  C1812418129       0.005692        0.006586   
 2  0.002695  0.000727  C1247938090       0.000384        0.000000   
 3  0.854447  0.005805  C1687063682       0.003067        0.000000   
 4  0.210243  0.150521   C751624512       0.079521        0.000000   
 
       nameDest  oldbalanceDest  newbalanceDest  isFlaggedFraud  type_CASH_IN  \
 0   C492670573         0.00935        0.009007               0          True   
 1  M1924423059         0.00000        0.000000               0         False   
 2  C1002031672         0.00000        0.000000               0         False   
 3   C451391923         0.00000        0.000000               0         False   
 4   C320991755         0.00000        0.020016               0         False   
 
    type_CASH_OUT  type_DEBIT  type_PAYMENT  type_TRANSFER  
 0          False       False

# predict the fraud status for each row in the full dataset by Preprocess the data by cleaning and preparing features and Train a machine learning model to classify transactions as "fraud" or "not fraud."


In [4]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

# Select features and target
X = data.drop(columns=['nameOrig', 'nameDest', 'isFlaggedFraud'])  # Drop unnecessary columns
y = data['isFlaggedFraud']  # Target variable

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a RandomForestClassifier model
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Predict on the test set
y_pred = model.predict(X_test)

# Display the classification report
classification_report_output = classification_report(y_test, y_pred)
classification_report_output


'              precision    recall  f1-score   support\n\n           0       1.00      1.00      1.00      3282\n           1       1.00      0.50      0.67         4\n\n    accuracy                           1.00      3286\n   macro avg       1.00      0.75      0.83      3286\nweighted avg       1.00      1.00      1.00      3286\n'

# predict fraud for each row ("The fraud prediction model has added a new column, isFraudPrediction, to indicate whether each transaction is predicted as fraud (1) or not fraud (0))

In [5]:
# Predict fraud status for each row in the dataset
data['isFraudPrediction'] = model.predict(X)

# Display the first few rows of the dataset with the predictions
data[['amount', 'oldbalanceOrg', 'newbalanceOrig', 'oldbalanceDest', 'newbalanceDest', 'isFlaggedFraud', 'isFraudPrediction']].head()


Unnamed: 0,amount,oldbalanceOrg,newbalanceOrig,oldbalanceDest,newbalanceDest,isFlaggedFraud,isFraudPrediction
0,0.002432,0.018398,0.023652,0.00935,0.009007,0,0
1,0.000401,0.005692,0.006586,0.0,0.0,0,0
2,0.000727,0.000384,0.0,0.0,0.0,0,0
3,0.005805,0.003067,0.0,0.0,0.0,0,0
4,0.150521,0.079521,0.0,0.0,0.020016,0,0


# Save the new predicted dataset

In [6]:
# Save the dataset with predictions to a new CSV file
output_file_path = 'Predicted_Infosys_data.csv'
data.to_csv(output_file_path, index=False)


In [7]:
import streamlit as st
import pandas as pd
from tensorflow.keras.models import load_model

# Load the trained Keras model
model = load_model(r'C:\\Users\\HP\\OneDrive\\Desktop\\fraud_detection_model.keras')

# Set up the Streamlit app
st.set_page_config(page_title="Fraud Detection System", layout="wide")

# Home Page
def home_page():
    st.title("Welcome to the Fraud Detection System")
    st.write("""
    This application helps detect potentially fraudulent transactions using a trained machine learning model.
    Use the sidebar to navigate between different sections of the app.
    """)

# Single Transaction Page
def single_transaction_page():
    st.title("Single Transaction Prediction")

    # Input fields for simplified transaction features
    amount = st.number_input("Amount", min_value=0.0, step=0.01)
    oldbalanceOrg = st.number_input("Old Balance (Origin)", min_value=0.0, step=0.01)
    newbalanceOrig = st.number_input("New Balance (Origin)", min_value=0.0, step=0.01)

    # Dropdown for transaction type
    type_ = st.selectbox("Transaction Type", ["CASH_IN", "CASH_OUT", "DEBIT", "PAYMENT", "TRANSFER"])
    type_encoded = [0, 0, 0, 0, 0]
    type_mapping = {"CASH_IN": 0, "CASH_OUT": 1, "DEBIT": 2, "PAYMENT": 3, "TRANSFER": 4}
    type_encoded[type_mapping[type_]] = 1

    # Assemble the features in the expected order for the model, filling in placeholders for missing fields
    input_data = [0, amount, oldbalanceOrg, newbalanceOrig, 0, 0, 0, 0] + type_encoded

    # Convert to DataFrame for model input
    input_data_df = pd.DataFrame([input_data])

    # Predict button
    if st.button("Predict Fraud"):
        prediction = model.predict(input_data_df)[0][0]
        prediction_label = "Fraudulent Transaction Detected!" if prediction > 0.5 else "Transaction is NOT Fraudulent."
        st.write(prediction_label)

        # Option to save and download prediction result
        download_df = pd.DataFrame([input_data + [int(prediction > 0.5)]], columns=[
            'step', 'amount', 'oldbalanceOrg', 'newbalanceOrig', 'oldbalanceDest', 'newbalanceDest', 
            'isFraud', 'isFlaggedFraud', 'type_CASH_IN', 'type_CASH_OUT', 'type_DEBIT', 'type_PAYMENT', 
            'type_TRANSFER', 'prediction'
        ])
        st.download_button(
            label="Download Prediction Result",
            data=download_df.to_csv(index=False),
            file_name="transaction_prediction.csv",
            mime="text/csv"
        )

# Bulk Transaction Page
def bulk_transaction_page():
    st.title("Bulk Transaction Prediction")
    st.write("Upload a CSV file containing transaction data for batch fraud detection.")
    
    # File upload for bulk transactions
    uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
    
    if uploaded_file:
        bulk_data = pd.read_csv(uploaded_file)
        st.write("Uploaded data preview:", bulk_data.head())

        # Add batch processing functionality
        if st.button("Predict Fraud for All Transactions"):
            predictions = model.predict(bulk_data)
            bulk_data['Prediction'] = (predictions > 0.5).astype(int)
            st.write("Prediction Results:", bulk_data)

# Transaction History Page
def transaction_history_page():
    st.title("Transaction History")
    st.write("Display the transaction history here.")
    # Implement logic to load and display previous transaction predictions, if saved

# About Page
def about_page():
    st.title("About This Application")
    st.write("""
    This fraud detection system uses machine learning to help identify potentially fraudulent transactions.
    """)

# Sidebar for navigation
st.sidebar.title("Navigation")
page = st.sidebar.radio("Choose a page", ["Home", "Single Transaction", "Bulk Transaction", "Transaction History", "About"])

# Page selection logic
if page == "Home":
    home_page()
elif page == "Single Transaction":
    single_transaction_page()
elif page == "Bulk Transaction":
    bulk_transaction_page()
elif page == "Transaction History":
    transaction_history_page()
elif page == "About":
    about_page()


  saveable.load_own_variables(weights_store.get(inner_path))
2024-11-11 22:50:36.743 
  command:

    streamlit run C:\Users\HP\AppData\Roaming\Python\Python310\site-packages\ipykernel_launcher.py [ARGUMENTS]
2024-11-11 22:50:36.764 Session state does not function when running a script without `streamlit run`
