<a href="https://colab.research.google.com/github/didulanthaisuru/time_series_forecasting/blob/main/lstm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Step 1: Import necessary libraries
import pandas as pd
import re
from sentence_transformers import SentenceTransformer
import hdbscan
import numpy as np
import time
from sklearn.preprocessing import StandardScaler


In [None]:
from google.colab import drive

# New Section

In [None]:
import os

# List files in the sample_data folder
sample_data_path = '/content/sample_data'
files = os.listdir(sample_data_path)
print(files)


['anscombe.json', 'README.md', 'mnist_test.csv', 'california_housing_train.csv', 'mnist_train_small.csv', 'california_housing_test.csv']


In [None]:
file_path = '/content/sample_data/nadilFinalizedDataset.xlsx'

In [None]:
df=pd.read_excel(file_path)

FileNotFoundError: [Errno 2] No such file or directory: '/content/sample_data/nadilFinalizedDataset.xlsx'

In [None]:
df.head()

Unnamed: 0,date,desiption,debit,credit,balance
0,2022-09-29,CSH DEP,,1000.0,1000.0
1,2022-10-31,TRF,,17480.0,18480.0
2,2022-10-31,INT,,4.64,18484.64
3,2022-11-06,t ahirt OTHBNK T,6030.0,,12454.64
4,2022-11-06,010001088282101 OTHBNK T,3030.0,,9424.64


In [None]:
# Step 3: Data Preprocessing - Normalize Text & Expand Abbreviations

# Define abbreviation dictionary
abbreviations = {
    'PYT': 'Payment',
    'TRF': 'Transfer',
    'DEP': 'Deposit',
    'WDL': 'Withdrawal',
    'WD': 'Withdrawal',
    'POS': 'Point of Sale',
    'ATM': 'ATM Withdrawal',
    'CHQ': 'Cheque',
    'DD': 'Demand Draft',
    'BT': 'Bank Transfer',
    'ACH': 'Automated Clearing House',
    'NEFT': 'National Electronic Funds Transfer',
    'RTGS': 'Real-Time Gross Settlement',
    'IMPS': 'Immediate Payment Service',
    'UPI': 'Unified Payments Interface',
    'INT': 'Interest',
    'CHG': 'Charge',
    'FEE': 'Fee',
    'TXN': 'Transaction',
    'REV': 'Reversal',
    'EMI': 'Equated Monthly Installment',
    'CC': 'Credit Card',
    'POS REF': 'Point of Sale Refund',
    'BIL': 'Bill Payment',
    'BILP': 'Bill Payment',
    'INV': 'Investment',
    'REF': 'Refund',
    'SAL': 'Salary Credit',
    'SL': 'Salary Credit',
    'TFR': 'Transfer'
}

# Function to clean text
def clean_text(text, abbr_dict):
    text = str(text).lower()  # Convert to lowercase
    for abbr, full_form in abbr_dict.items():
        text = re.sub(rf'\b{abbr.lower()}\b', full_form.lower(), text)
    return text

# Apply text cleaning to the 'Particulars' column
df['cleaned_particulars'] = df['desiption'].apply(lambda x: clean_text(x, abbreviations))


In [None]:
# Step 4: Separate Payments and Receipts Transactions

# Payments: Non-null 'Payments' column, null 'Receipts'
df_debit = df[df['debit'].notna() & df['credit'].isna()].copy()

# Receipts: Non-null 'Receipts' column, null 'Payments'
df_credit = df[df['credit'].notna() & df['debit'].isna()].copy()


In [None]:
df_credit.head()

Unnamed: 0,date,desiption,debit,credit,balance,cleaned_particulars
0,2022-09-29,CSH DEP,,1000.0,1000.0,csh deposit
1,2022-10-31,TRF,,17480.0,18480.0,transfer
2,2022-10-31,INT,,4.64,18484.64,interest
6,2022-11-16,TRF,,2587.5,11987.14,transfer
8,2022-11-30,INT,,20240.0,31777.14,interest


In [None]:
df_debit.head()

Unnamed: 0,date,desiption,debit,credit,balance,cleaned_particulars
3,2022-11-06,t ahirt OTHBNK T,6030.0,,12454.64,t ahirt othbnk t
4,2022-11-06,010001088282101 OTHBNK T,3030.0,,9424.64,010001088282101 othbnk t
5,2022-11-15,RIB/RMB SE.CH 20 IBMB Chg,25.0,,9399.64,rib/rmb se.ch 20 ibmb charge
7,2022-11-18,nadil Siriwardha MB SA TF,450.0,,11537.14,nadil siriwardha mb sa tf
11,2022-12-24,nadil OTHBNK T,7530.0,,27264.9,nadil othbnk t


In [None]:
# Step 5: Generate Sentence Embeddings

from sentence_transformers import SentenceTransformer

# Initialize Sentence Transformer model
model = SentenceTransformer('gtr-t5-large')

# Generate embeddings for Payments
credit_embeddings = model.encode(df_credit['cleaned_particulars'].tolist())

# Generate embeddings for Receipts
debit_embeddings = model.encode(df_debit['cleaned_particulars'].tolist())


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [None]:
# Step 6: Apply HDBSCAN Clustering for credit

# Standardize embeddings
scaler_credit = StandardScaler()
credit_embeddings_scaled = scaler_credit.fit_transform(credit_embeddings)

# Apply HDBSCAN
hdbscan_credit = hdbscan.HDBSCAN(min_cluster_size=2, gen_min_span_tree=True)
df_credit['Cluster'] = hdbscan_credit.fit_predict(credit_embeddings_scaled)




In [None]:
# Step 7: Apply HDBSCAN Clustering for debit

# Standardize embeddings
scaler_debit = StandardScaler()
debit_embeddings_scaled = scaler_debit.fit_transform(debit_embeddings)

# Apply HDBSCAN
hdbscan_debit = hdbscan.HDBSCAN(min_cluster_size=5, gen_min_span_tree=True)
df_debit['Cluster'] = hdbscan_debit.fit_predict(debit_embeddings_scaled)




In [None]:
# Step 8: Print Clusters for Payments
print("\n### Credit Clusters ###")

for cluster in sorted(df_credit['Cluster'].unique()):
    print(f"\nCluster {cluster}:")
    cluster_data = df_credit[df_credit['Cluster'] == cluster]
    print(cluster_data[['date','desiption', 'debit', 'credit','balance']])



### Credit Clusters ###

Cluster -1:
          date                  desiption  debit   credit   balance
14  2022-12-28            Harsha EFT OTHE    NaN  10000.0   58234.9
45  2023-03-02        Gajanayake EFT OTHE    NaN   6000.0  69048.99
89  2023-07-03  Boardim fees sav EFT OTHE    NaN   6000.0  39110.39
113 2023-08-05          069RB01 DEP (NOV)    NaN  12000.0  44163.23
114 2023-08-05  savindu board fe EFT OTHE    NaN   6000.0  50163.23
134 2023-08-19              susi EFT OTHE    NaN   1000.0  12803.23
146 2023-08-25           ROSHANA EFT OTHE    NaN    500.0  11993.23
158 2023-09-02            router EFT OTHE    NaN  11000.0  13297.97
159 2023-09-02  From Loku appach EFT OTHE    NaN   5000.0  18297.97
178 2023-09-22        water bill EFT OTHE    NaN    500.0   5590.47
184 2023-09-30         0022RB01 DEP (NOV)    NaN   5600.0  11870.47
230 2023-10-31                        INI    NaN     39.0   1039.35
232 2023-11-01             Apple EFT OTHE    NaN   1500.0    2537.4
273 2023-1

In [None]:
# Step 9: Print Clusters for Receipts
print("\n### Debit Clusters ###")

for cluster in sorted(df_debit['Cluster'].unique()):
    print(f"\nCluster {cluster}:")
    cluster_data = df_debit[df_debit['Cluster'] == cluster]
    print(cluster_data[['date','desiption', 'debit', 'credit','balance']])



### Debit Clusters ###

Cluster -1:
          date                  desiption     debit  credit   balance
7   2022-11-18  nadil Siriwardha MB SA TF    450.00     NaN  11537.14
15  2022-12-28                    CSH WDR  10000.00     NaN   48234.9
19  2022-12-29                    CSH WDR   5000.00     NaN  51891.15
22  2023-01-06              film OTHBNK T   3130.00     NaN   65435.2
23  2023-01-08              film OTHBNK T   3030.00     NaN   62405.2
27  2023-01-31                    CSH WDR  21200.00     NaN   73015.2
31  2023-02-15                    CSH WDR   3850.00     NaN  72691.63
35  2023-02-28                    C5H WDR    600.00     NaN  60001.63
48  2023-03-07   nadil , roshana OTHBNK T  12030.00     NaN  44883.99
98  2023-07-22    04199071001696 POS Trns   6500.00     NaN  36760.39
108 2023-08-01        0713201322 BILL PYT    200.00     NaN  50723.23
138 2023-08-20        0723728777 BILL PYT    200.00     NaN  16343.23
153 2023-08-31    004199071001696 ATM WDR   3030.00  

In [None]:
# Step 1: Import necessary libraries for LSTM
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import MinMaxScaler
import pandas as pd


In [None]:
! pip install tensorflow transformers sentence-transformers




In [None]:
# Step 1: Import necessary libraries for LSTM
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import MinMaxScaler
import pandas as pd


In [None]:
# Step 2: Function to prepare data for LSTM

def prepare_lstm_data(df_cluster):
    # Sort by Date (assuming there is a 'Date' column)
    df_cluster = df_cluster.sort_values(by='date')

    # Select relevant features (transaction amount and balance)
    numerical_features = df_cluster[['credit', 'debit', 'balance']].fillna(0)

    # Normalize data
    scaler = MinMaxScaler()
    normalized_data = scaler.fit_transform(numerical_features)

    # Create sequences for LSTM (X -> inputs, Y -> next transaction)
    sequence_length = 5  # Look at the last 5 transactions to predict the next one
    X, Y = [], []

    for i in range(len(normalized_data) - sequence_length):
        X.append(normalized_data[i:i+sequence_length])
        Y.append(normalized_data[i+sequence_length])  # Predict next transaction

    X, Y = np.array(X), np.array(Y)

    return X, Y, scaler  # Return data & scaler for inverse transform


In [None]:
# Step 3: Function to build and train LSTM

def train_lstm(X, Y):
    model = Sequential([
        LSTM(50, activation='relu', return_sequences=True, input_shape=(X.shape[1], X.shape[2])),
        LSTM(50, activation='relu'),
        Dense(Y.shape[1])  # Output layer
    ])

    model.compile(optimizer='adam', loss='mse')

    # Train the model
    model.fit(X, Y, epochs=20, batch_size=16, verbose=1)

    return model


In [None]:
# Step 4: Function to predict next transaction with date

def predict_next_transaction_with_date(model, X, scaler, last_date):
    # Get the last sequence for prediction
    last_sequence = X[-1].reshape(1, X.shape[1], X.shape[2])

    # Predict the next transaction amount
    predicted = model.predict(last_sequence)

    # Convert back to original scale (real values)
    predicted_original = scaler.inverse_transform(predicted)

    # Get predicted amount (Payments or Receipts) - you can extract whichever you want
    predicted_amount = predicted_original[0][0]  # Example: Payments column for simplicity

    # Generate next predicted date (assuming daily frequency)
    next_date = last_date + pd.Timedelta(days=1)

    return next_date, predicted_amount


In [None]:
# Step 5: Apply LSTM prediction for each cluster and output date + amount format

# List to store predictions
predictions = {}

for cluster in sorted(df_credit['Cluster'].unique()):
    print(f"\nTraining LSTM for Payments - Cluster {cluster}...")

    df_cluster = df_credit[df_credit['Cluster'] == cluster]

    if len(df_cluster) < 6:  # Ensure enough data for LSTM
        print(f"Skipping cluster {cluster}, not enough data.")
        continue

    # Prepare data
    X, Y, scaler = prepare_lstm_data(df_cluster)

    # Train LSTM
    model = train_lstm(X, Y)

    # Get the last transaction date (the last row of the cluster)
    last_date = df_cluster['date'].iloc[-1]

    # Predict the next transaction date and amount
    next_date, predicted_amount = predict_next_transaction_with_date(model, X, scaler, last_date)

    # Store predictions in dictionary
    predictions[cluster] = {"Date": next_date, "Predicted Amount": predicted_amount}

    # Print prediction in date and amount format
    print(f"Next predicted transaction for Cluster {cluster}: Date: {next_date}, Amount: {predicted_amount}")



Training LSTM for Payments - Cluster -1...
Epoch 1/20


  numerical_features = df_cluster[['credit', 'debit', 'balance']].fillna(0)
  super().__init__(**kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 46ms/step - loss: 0.0702
Epoch 2/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step - loss: 0.0756
Epoch 3/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step - loss: 0.0704
Epoch 4/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step - loss: 0.0615
Epoch 5/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step - loss: 0.0647
Epoch 6/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step - loss: 0.0574
Epoch 7/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step - loss: 0.0543
Epoch 8/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step - loss: 0.0518
Epoch 9/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step - loss: 0.0489
Epoch 10/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step - loss: 0.0465 
Epoch 11/20
[1m2/2[0m [32m

  numerical_features = df_cluster[['credit', 'debit', 'balance']].fillna(0)
  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step - loss: 0.1035
Epoch 2/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step - loss: 0.1008
Epoch 3/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step - loss: 0.0981
Epoch 4/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step - loss: 0.0954
Epoch 5/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step - loss: 0.0927
Epoch 6/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step - loss: 0.0901
Epoch 7/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step - loss: 0.0875
Epoch 8/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step - loss: 0.0849
Epoch 9/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step - loss: 0.0823
Epoch 10/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step - loss: 0.0797
Epoch 11/20
[1m1/1[0m [32m━━━

  numerical_features = df_cluster[['credit', 'debit', 'balance']].fillna(0)
  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step - loss: 0.0028
Epoch 2/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step - loss: 0.0026
Epoch 3/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step - loss: 0.0024
Epoch 4/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step - loss: 0.0023
Epoch 5/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step - loss: 0.0021
Epoch 6/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step - loss: 0.0020
Epoch 7/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step - loss: 0.0019
Epoch 8/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step - loss: 0.0018
Epoch 9/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step - loss: 0.0017
Epoch 10/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step - loss: 0.0017
Epoch 11/20
[1m1/1[0m [32m━━━

  numerical_features = df_cluster[['credit', 'debit', 'balance']].fillna(0)
  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step - loss: 0.0868
Epoch 2/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step - loss: 0.0841
Epoch 3/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step - loss: 0.0818
Epoch 4/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step - loss: 0.0798
Epoch 5/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step - loss: 0.0780
Epoch 6/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step - loss: 0.0763
Epoch 7/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step - loss: 0.0746
Epoch 8/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step - loss: 0.0730
Epoch 9/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step - loss: 0.0714
Epoch 10/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step - loss: 0.0697
Epoch 11/20
[1m1/1[0m [32m━━━

  numerical_features = df_cluster[['credit', 'debit', 'balance']].fillna(0)
  super().__init__(**kwargs)


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - loss: 0.0356
Epoch 2/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - loss: 0.0379
Epoch 3/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - loss: 0.0239
Epoch 4/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - loss: 0.0286
Epoch 5/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - loss: 0.0279
Epoch 6/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - loss: 0.0243
Epoch 7/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - loss: 0.0220
Epoch 8/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - loss: 0.0138
Epoch 9/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: 0.0099
Epoch 10/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: 0.0120
Epoch 11/20
[1m3/3[0m [32m━

  numerical_features = df_cluster[['credit', 'debit', 'balance']].fillna(0)
  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6s/step - loss: 0.2811
Epoch 2/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step - loss: 0.2761
Epoch 3/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step - loss: 0.2716
Epoch 4/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step - loss: 0.2675
Epoch 5/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step - loss: 0.2636
Epoch 6/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step - loss: 0.2601
Epoch 7/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step - loss: 0.2569
Epoch 8/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step - loss: 0.2536
Epoch 9/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step - loss: 0.2505
Epoch 10/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step - loss: 0.2474
Epoch 11/20
[1m1/1[0m [32m━━━

  numerical_features = df_cluster[['credit', 'debit', 'balance']].fillna(0)
  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step - loss: 0.2694
Epoch 2/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step - loss: 0.2625
Epoch 3/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step - loss: 0.2560
Epoch 4/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step - loss: 0.2501
Epoch 5/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step - loss: 0.2448
Epoch 6/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step - loss: 0.2401
Epoch 7/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step - loss: 0.2356
Epoch 8/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step - loss: 0.2312
Epoch 9/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step - loss: 0.2268
Epoch 10/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step - loss: 0.2222
Epoch 11/20
[1m1/1[0m [32m━━━

  numerical_features = df_cluster[['credit', 'debit', 'balance']].fillna(0)
  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step - loss: 0.0233
Epoch 2/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step - loss: 0.0224
Epoch 3/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 141ms/step - loss: 0.0216
Epoch 4/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 78ms/step - loss: 0.0209
Epoch 5/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 145ms/step - loss: 0.0202
Epoch 6/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 78ms/step - loss: 0.0196
Epoch 7/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 101ms/step - loss: 0.0190
Epoch 8/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 144ms/step - loss: 0.0184
Epoch 9/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 147ms/step - loss: 0.0178
Epoch 10/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 114ms/step - loss: 0.0173
Epoch 11/20
[1m1/1[0m [

  numerical_features = df_cluster[['credit', 'debit', 'balance']].fillna(0)
  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step - loss: 0.3227
Epoch 2/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step - loss: 0.3145
Epoch 3/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step - loss: 0.3066
Epoch 4/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step - loss: 0.2988
Epoch 5/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step - loss: 0.2912
Epoch 6/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step - loss: 0.2835
Epoch 7/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step - loss: 0.2758
Epoch 8/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step - loss: 0.2679
Epoch 9/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step - loss: 0.2597
Epoch 10/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step - loss: 0.2511
Epoch 11/20
[1m1/1[0m [32m━━━

  numerical_features = df_cluster[['credit', 'debit', 'balance']].fillna(0)
  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step - loss: 0.0293
Epoch 2/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step - loss: 0.0284
Epoch 3/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step - loss: 0.0277
Epoch 4/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 165ms/step - loss: 0.0269
Epoch 5/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 130ms/step - loss: 0.0262
Epoch 6/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 140ms/step - loss: 0.0255
Epoch 7/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 143ms/step - loss: 0.0247
Epoch 8/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 141ms/step - loss: 0.0240
Epoch 9/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 131ms/step - loss: 0.0232
Epoch 10/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 143ms/step - loss: 0.0224
Epoch 11/20
[1m1/1[0m 

  numerical_features = df_cluster[['credit', 'debit', 'balance']].fillna(0)
  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step - loss: 0.0225
Epoch 2/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step - loss: 0.0218
Epoch 3/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step - loss: 0.0211
Epoch 4/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step - loss: 0.0205
Epoch 5/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step - loss: 0.0199
Epoch 6/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step - loss: 0.0194
Epoch 7/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step - loss: 0.0189
Epoch 8/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step - loss: 0.0184
Epoch 9/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step - loss: 0.0179
Epoch 10/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step - loss: 0.0173
Epoch 11/20
[1m1/1[0m [32m━━━

  numerical_features = df_cluster[['credit', 'debit', 'balance']].fillna(0)
  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step - loss: 0.0028
Epoch 2/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step - loss: 0.0025
Epoch 3/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step - loss: 0.0023
Epoch 4/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step - loss: 0.0021
Epoch 5/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step - loss: 0.0019
Epoch 6/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step - loss: 0.0018
Epoch 7/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step - loss: 0.0017
Epoch 8/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step - loss: 0.0017
Epoch 9/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step - loss: 0.0016
Epoch 10/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step - loss: 0.0016
Epoch 11/20
[1m1/1[0m [32m━━━

In [None]:
# Step 5: Apply LSTM prediction for each cluster and output date + amount format

# List to store predictions
predictions = {}

for cluster in sorted(df_debit['Cluster'].unique()):
    print(f"\nTraining LSTM for Payments - Cluster {cluster}...")

    df_cluster = df_debit[df_credit['Cluster'] == cluster]

    if len(df_cluster) < 6:  # Ensure enough data for LSTM
        print(f"Skipping cluster {cluster}, not enough data.")
        continue

    # Prepare data
    X, Y, scaler = prepare_lstm_data(df_cluster)

    # Train LSTM
    model = train_lstm(X, Y)

    # Get the last transaction date (the last row of the cluster)
    last_date = df_cluster['date'].iloc[-1]

    # Predict the next transaction date and amount
    next_date, predicted_amount = predict_next_transaction_with_date(model, X, scaler, last_date)

    # Store predictions in dictionary
    predictions[cluster] = {"Date": next_date, "Predicted Amount": predicted_amount}

    # Print prediction in date and amount format
    print(f"Next predicted transaction for Cluster {cluster}: Date: {next_date}, Amount: {predicted_amount}")


NameError: name 'df_debit' is not defined