In [None]:

# Load Dataset (Modify path as needed)
data = pd.read_csv("/content/drive/MyDrive/Bitsecure/all_three.csv")

data.columns


Index(['Unnamed: 0', 'balance', 'label', 'rec/sent', 'amount', 'block_id',
       'size', 'weight', 'version', 'lock_time', 'is_coinbase', 'has_witness',
       'input_count', 'output_count', 'input_total', 'input_total_usd',
       'output_total', 'output_total_usd', 'fee', 'fee_usd', 'fee_per_kb',
       'fee_per_kb_usd', 'fee_per_kwu', 'fee_per_kwu_usd', 'cdd_total'],
      dtype='object')

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score

# Load Dataset (Modify path as needed)
#data = pd.read_csv("your_dataset.csv")

# Data Preprocessing
def preprocess_data(df):
    X = df.drop(columns=['label'])  # 'label' is the fraud indicator column
    y = df['label']
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    return X_scaled, y, scaler

X, y, scaler = preprocess_data(data)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Reshape Data for LSTM
X_train_lstm = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_test_lstm = X_test.reshape((X_test.shape[0], 1, X_test.shape[1]))

# LSTM Model
def build_lstm(input_shape):
    model = Sequential([
        LSTM(50, return_sequences=True, input_shape=input_shape),
        Dropout(0.2),
        LSTM(50),
        Dropout(0.2),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

lstm_model = build_lstm((1, X_train.shape[1]))
lstm_model.fit(X_train_lstm, y_train, epochs=10, batch_size=32, validation_data=(X_test_lstm, y_test))

# LSTM Predictions
lstm_pred = lstm_model.predict(X_test_lstm).flatten()
lstm_pred = (lstm_pred > 0.5).astype(int)

# Random Forest Model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
rf_pred = rf_model.predict(X_test)

# XGBoost Model
xgb_model = XGBClassifier(n_estimators=100, random_state=42, use_label_encoder=False, eval_metric='logloss')
xgb_model.fit(X_train, y_train)
xgb_pred = xgb_model.predict(X_test)

# Soft Voting Ensemble
def ensemble_predictions(pred1, pred2, pred3):
    return np.round((pred1 + pred2 + pred3) / 3).astype(int)

final_pred = ensemble_predictions(lstm_pred, rf_pred, xgb_pred)

# Model Evaluation
lstm_acc = accuracy_score(y_test, lstm_pred)
rf_acc = accuracy_score(y_test, rf_pred)
xgb_acc = accuracy_score(y_test, xgb_pred)
ensemble_acc = accuracy_score(y_test, final_pred)

print(f"LSTM Accuracy: {lstm_acc:.4f}")
print(f"Random Forest Accuracy: {rf_acc:.4f}")
print(f"XGBoost Accuracy: {xgb_acc:.4f}")
print(f"Ensemble Model Accuracy: {ensemble_acc:.4f}")


  super().__init__(**kwargs)


Epoch 1/10
[1m10691/10691[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m71s[0m 6ms/step - accuracy: 0.7376 - loss: -28.5393 - val_accuracy: 0.7637 - val_loss: -106.2502
Epoch 2/10
[1m10691/10691[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 6ms/step - accuracy: 0.7661 - loss: -131.6750 - val_accuracy: 0.7683 - val_loss: -209.0538
Epoch 3/10
[1m10691/10691[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 5ms/step - accuracy: 0.7689 - loss: -233.2707 - val_accuracy: 0.7681 - val_loss: -312.2851
Epoch 4/10
[1m10691/10691[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m87s[0m 6ms/step - accuracy: 0.7679 - loss: -335.5335 - val_accuracy: 0.7667 - val_loss: -418.1277
Epoch 5/10
[1m10691/10691[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 5ms/step - accuracy: 0.7697 - loss: -442.0724 - val_accuracy: 0.7723 - val_loss: -530.6028
Epoch 6/10
[1m10691/10691[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 6ms/step - accuracy: 0.7701 - loss: -551.7919 - val_ac

Parameters: { "use_label_encoder" } are not used.



LSTM Accuracy: 0.7738
Random Forest Accuracy: 0.9843
XGBoost Accuracy: 0.9819
Ensemble Model Accuracy: 0.9106


In [None]:
import numpy as np

def predict_fraud(ensemble_model, new_data):
    """
    Predicts fraud probability and class for new Bitcoin transactions.

    Parameters:
    - ensemble_model: Trained hybrid ensemble model
    - new_data: New transaction data (should be preprocessed similarly to training data)

    Returns:
    - fraud_probability: Probability of fraud (for threshold-based decision)
    - predicted_class: 0 (Non-Fraud) or 1 (Fraud)
    """
    # Ensure input is in the right shape (2D array)
    if len(new_data.shape) == 1:
        new_data = new_data.reshape(1, -1)

    # Get predicted fraud probability (if soft voting or probability output is available)
    fraud_probability = ensemble_model.predict_proba(new_data)[:, 1]

    # Convert probability to class (assuming 0.5 threshold)
    predicted_class = (fraud_probability >= 0.5).astype(int)

    return fraud_probability, predicted_class


In [None]:
# Example new transaction (replace with actual values)
new_transaction = np.array([0.2, 100.5, 0.05, 5000, 1, 0, 3.2, 0.7])  # Adjust feature size

# Predict fraud probability and class
fraud_prob, fraud_class = predict_fraud(ensemble_model, new_transaction)

print(f"Fraud Probability: {fraud_prob[0]:.4f}")
print(f"Predicted Class: {'Fraud' if fraud_class[0] == 1 else 'Not Fraud'}")


NameError: name 'ensemble_model' is not defined