In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import StackingClassifier, RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Load and prepare the dataset
df = pd.read_csv("C:/Users/asmit/Downloads/all_three.csv")
df = df.drop("Unnamed: 0", axis=1)

X = df.drop("label", axis=1)
y = df["label"]

# Optionally sample data for speed
X_sample = X.sample(frac=0.3, random_state=42)
y_sample = y.loc[X_sample.index]

X_train, X_test, y_train, y_test = train_test_split(X_sample, y_sample, test_size=0.2, random_state=42)

# Define base models with simpler settings
estimators = [
    ('rf', RandomForestClassifier(n_estimators=50, max_depth=5, n_jobs=-1, random_state=42)),
    ('lr', LogisticRegression(max_iter=500, random_state=42)),
    ('dt', DecisionTreeClassifier(max_depth=5, random_state=42))
]

# Meta model
final_estimator = LogisticRegression(max_iter=500, random_state=42)

# Stacking model with reduced CV
stacking_model = StackingClassifier(estimators=estimators, final_estimator=final_estimator, cv=3)

# Fit and evaluate
stacking_model.fit(X_train, y_train)
y_pred = stacking_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print("✅ Hybrid Stacking Accuracy (Fast Version):", round(accuracy * 100, 2), "%")


✅ Hybrid Stacking Accuracy (Fast Version): 98.87 %


In [3]:
print(df.columns.tolist())


['Unnamed: 0', 'balance', 'label', 'rec/sent', 'amount', 'block_id', 'size', 'weight', 'version', 'lock_time', 'is_coinbase', 'has_witness', 'input_count', 'output_count', 'input_total', 'input_total_usd', 'output_total', 'output_total_usd', 'fee', 'fee_usd', 'fee_per_kb', 'fee_per_kb_usd', 'fee_per_kwu', 'fee_per_kwu_usd', 'cdd_total']


In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.utils import to_categorical

# Load and prepare data
df = pd.read_csv("C:/Users/asmit/Downloads/all_three.csv")

# Drop unwanted column
if 'Unnamed: 0' in df.columns:
    df = df.drop('Unnamed: 0', axis=1)

# Features and labels
X = df.drop("label", axis=1)
y = df["label"]

# Normalize features
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Reshape for LSTM: [samples, timesteps, features]
X_reshaped = X_scaled.reshape((X_scaled.shape[0], 1, X_scaled.shape[1]))

# One-hot encode labels if needed (for multiclass)
# For binary classification, skip this
if len(np.unique(y)) > 2:
    y_encoded = to_categorical(y)
    loss = 'categorical_crossentropy'
    metrics = ['accuracy']
    final_activation = 'softmax'
    output_units = y_encoded.shape[1]
else:
    y_encoded = y.values
    loss = 'binary_crossentropy'
    metrics = ['accuracy']
    final_activation = 'sigmoid'
    output_units = 1

# Split data
X_train, X_test, y_train, y_test = train_test_split(X_reshaped, y_encoded, test_size=0.2, random_state=42)

# Build LSTM model
model = Sequential()
model.add(LSTM(64, input_shape=(X_reshaped.shape[1], X_reshaped.shape[2]), return_sequences=False))
model.add(Dropout(0.3))
model.add(Dense(32, activation='relu'))
model.add(Dense(output_units, activation=final_activation))

model.compile(optimizer='adam', loss=loss, metrics=metrics)

# Train model
model.fit(X_train, y_train, epochs=20, batch_size=32, verbose=1, validation_data=(X_test, y_test))

# Predict and evaluate
if output_units == 1:
    y_pred = (model.predict(X_test) > 0.5).astype("int32")
else:
    y_pred = np.argmax(model.predict(X_test), axis=1)
    y_test = np.argmax(y_test, axis=1)

# Accuracy
accuracy = accuracy_score(y_test, y_pred)
print("LSTM Accuracy:", round(accuracy * 100, 2), "%")


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
LSTM Accuracy: 98.62 %
