In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import os

# 🔹 Load dataset
csv_path = "../metrics_cache/combined_df.csv"
df = pd.read_csv(csv_path)

# 🔹 Select features (inputs) and target (output)
features = ["tps", "avg_fee_sol", "total_fees_sol", "failed_tx_count", "tx_count"]
target = "failed_tx_count"  # Predicting congestion (failed transactions)

# 🔹 Drop missing values
df = df[features].dropna()

# 🔹 Normalize data using MinMaxScaler
scaler = MinMaxScaler()
df_scaled = scaler.fit_transform(df)

# 🔹 Create sequences for LSTM
def create_sequences(data, target_index, seq_length=10):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i : i + seq_length])
        y.append(data[i + seq_length, target_index])
    return np.array(X), np.array(y)

SEQ_LENGTH = 10  # Time steps
X, y = create_sequences(df_scaled, target_index=df.columns.get_loc(target), seq_length=SEQ_LENGTH)

# 🔹 Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# 🔹 Print dataset shapes
print("✅ X_train shape:", X_train.shape)
print("✅ X_test shape:", X_test.shape)

# 🔹 Define LSTM model
model = Sequential([
    LSTM(64, return_sequences=True, input_shape=(SEQ_LENGTH, X.shape[2])),
    Dropout(0.2),
    LSTM(32, return_sequences=False),
    Dropout(0.2),
    Dense(16, activation="relu"),
    Dense(1, activation="linear")  # Regression output
])

# 🔹 Compile model
model.compile(optimizer="adam", loss="mse", metrics=["mae"])
model.summary()

# 🔹 Train the model
history = model.fit(X_train, y_train, epochs=50, batch_size=16, validation_data=(X_test, y_test))

# 🔹 Plot training loss
plt.plot(history.history['loss'], label="Train Loss")
plt.plot(history.history['val_loss'], label="Validation Loss")
plt.legend()
plt.title("LSTM Training Loss")
plt.show()

# 🔹 Make predictions
y_pred = model.predict(X_test)

# 🔹 Convert back to original scale
y_test_original = scaler.inverse_transform([[0] * len(features) + [val] for val in y_test])[:, -1]
y_pred_original = scaler.inverse_transform([[0] * len(features) + [val] for val in y_pred.flatten()])[:, -1]

# 🔹 Plot Actual vs Predicted values
plt.figure(figsize=(10, 5))
plt.plot(y_test_original, label="Actual", color='blue')
plt.plot(y_pred_original, label="Predicted", color='red', linestyle='dashed')
plt.legend()
plt.title("Actual vs Predicted Failed Transactions")
plt.show()

# 🔹 Save trained model
model.save("/Users/mukeshdurga/Desktop/solhive/lstm_congestion_model.h5")
print("✅ Model saved successfully!")


: 

In [None]:
!pip install ipykernel -U --user --force-reinstall

: 