In [424]:
import random
import datetime
import csv

def generate_february_spending(seed=42):
    random.seed(seed)
    start_date = datetime.date(2025, 2, 1)
    spending_data = []

    for day in range(28):
        current_date = start_date + datetime.timedelta(days=day)
        if random.random() < 0.2:
            spending_data.append({
                "date": current_date.strftime("%Y-%m-%d"),
                "time": "00:00:00",
                "price": 0.00
            })
            continue  
        num_transactions = random.randint(1, 5)
        for _ in range(num_transactions):
            hour = random.randint(8, 22)
            minute = random.randint(0, 59)
            second = random.randint(0, 59)
            transaction_time = datetime.time(hour, minute, second)
            price = round(random.uniform(30, 100.00), 2)
            spending_data.append({
                "date": current_date.strftime("%Y-%m-%d"),
                "time": transaction_time.strftime("%H:%M:%S"),
                "price": price
            })

    return spending_data

february_data = generate_february_spending()

with open('spending.csv', 'w', newline='') as csvfile:
    fieldnames = ['date', 'time', 'price']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(february_data)

print("Data saved to february_spending.csv")


Data saved to february_spending.csv


In [533]:
import tensorflow as tf
import pandas as pd
import numpy as np
from tensorflow import keras
from keras.layers import Dense, Dropout, LSTM
from keras.models import Sequential
from keras.regularizers import l1_l2, l2
from keras.optimizers import Adam
from keras.initializers import HeNormal
from keras.callbacks import EarlyStopping, ReduceLROnPlateau

import sklearn
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [445]:
df = pd.read_csv('spending.csv')
df.head()

Unnamed: 0,date,time,price
0,2025-02-01,19:17:15,45.62
1,2025-02-02,09:37:27,32.22
2,2025-02-02,09:13:14,65.37
3,2025-02-02,08:35:12,80.12
4,2025-02-02,19:34:26,45.43


In [446]:
df = df.groupby('date').agg({
    'price': 'sum'
}).rename(columns={'price': 'total_price'})
df.reset_index(inplace=True)

In [447]:
def extract_date_features(df, date_col):
    df[date_col] = pd.to_datetime(df[date_col])
    df['month'] = df[date_col].dt.month
    df['day_of_week'] = df[date_col].dt.dayofweek
    df['day_sin'] = np.sin(2 * np.pi * df['day_of_week'] / 7)
    df['day_cos'] = np.cos(2 * np.pi * df['day_of_week'] / 7)
    df['month_sin'] = np.sin(2 * np.pi * df['month'] / 12)
    df['month_cos'] = np.cos(2 * np.pi * df['month'] / 12)
    
    return df
df = extract_date_features(df, 'date')

In [448]:
if df.shape[0] > 60:
    for i in list(range(1, 8)) + [14, 21]:
        df[f'lag_{i}'] = df['total_price'].shift(i)
else:
    for i in list(range(1, 8)):
        df[f'lag_{i}'] = df['total_price'].shift(i)

In [454]:
df.head()

Unnamed: 0,date,total_price,month,day_of_week,day_sin,day_cos,month_sin,month_cos,lag_1,lag_2,lag_3,lag_4,lag_5,lag_6,lag_7
0,2025-02-01,45.62,2,5,-0.974928,-0.222521,0.866025,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2025-02-02,313.99,2,6,-0.781831,0.62349,0.866025,0.5,45.62,0.0,0.0,0.0,0.0,0.0,0.0
2,2025-02-03,86.6,2,0,0.0,1.0,0.866025,0.5,313.99,45.62,0.0,0.0,0.0,0.0,0.0
3,2025-02-04,184.7,2,1,0.781831,0.62349,0.866025,0.5,86.6,313.99,45.62,0.0,0.0,0.0,0.0
4,2025-02-05,327.42,2,2,0.974928,-0.222521,0.866025,0.5,184.7,86.6,313.99,45.62,0.0,0.0,0.0


In [455]:
df.ffill(inplace=True)
df.fillna(0, inplace=True)             

In [456]:
df.dropna(inplace=True)

In [457]:
df.sort_values('date', inplace=True)
df.set_index('date', inplace=True)

In [458]:
x = df.drop(columns=['total_price'])
y = df['total_price']

In [459]:
scale_x = MinMaxScaler()
scale_y = MinMaxScaler()
x_scaled = scale_x.fit_transform(x)
y_scaled = scale_y.fit_transform(y.values.reshape(-1,1))

In [460]:
def create_sequences(X, y, lookback):
    Xs, ys = [], []
    for i in range(len(X) - lookback):
        Xs.append(X[i:(i + lookback)])
        ys.append(y[i + lookback])
    return np.array(Xs), np.array(ys)
lookback = 7
x_seq, y_seq = create_sequences(x_scaled, y_scaled, lookback)

In [461]:
x_seq.shape

(21, 7, 13)

In [483]:
x_train, x_test, y_train, y_test = train_test_split(x_seq, y_seq, test_size=0.2)

In [542]:
def create_model(input_shape):
    model = Sequential()
    model.add(LSTM(16, activation='relu', kernel_initializer=HeNormal(),
                   kernel_regularizer=l1_l2(l1=0.0001, l2=0.00005),
                   input_shape=input_shape, return_sequences=False))
    model.add(Dropout(0.3))
    model.add(Dense(8, activation='relu', kernel_initializer=HeNormal(),
                    kernel_regularizer=l1_l2(l1=0.0001, l2=0.00005)))
    model.add(Dense(1))
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
    return model

kf = KFold(n_splits=5, shuffle=True, random_state=42)
fold_losses = []

for fold, (train_index, val_index) in enumerate(kf.split(x_seq)):
    x_train, x_val = x_seq[train_index], x_seq[val_index]
    y_train, y_val = y_seq[train_index], y_seq[val_index]

    model = create_model((x_seq.shape[1], x_seq.shape[2]))

    early_stopping = EarlyStopping(monitor='val_loss', patience=4, restore_best_weights=True)
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.7, patience=6, min_lr=1e-5)

    history = model.fit(x_train, y_train, epochs=200, batch_size=8,
                        validation_data=(x_val, y_val),
                        callbacks=[early_stopping, reduce_lr], verbose=1)

    val_loss = model.evaluate(x_val, y_val, verbose=0)
    fold_losses.append(val_loss)
    print(f"Fold {fold + 1} Validation Loss: {val_loss}")

print(f"Average Validation Loss: {np.mean(fold_losses)}")

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Fold 1 Validation Loss: 0.16177067160606384
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Fold 2 Validation Loss: 0.1440313309431076
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Fold 3 Validation Loss: 0.06280773133039474
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Fold 4 Validation Loss: 0.04040476307272911
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200

In [543]:


y_pred_scaled = model.predict(x_test)
y_pred = scale_y.inverse_transform(y_pred_scaled)
y_test_original = scale_y.inverse_transform(y_test)




In [544]:
print(y_train.min(), y_train.max())
print(y_test.min(), y_test.max())

0.0 0.8128259559189663
0.0 0.7324737269701497


In [545]:
mae = mean_absolute_error(y_test_original, y_pred)
rmse = np.sqrt(mean_squared_error(y_test_original, y_pred))
print(f'MAE: {mae:.2f}, RMSE: {rmse:.2f}')

MAE: 86.82, RMSE: 103.32


In [546]:
for i in range(y_test.shape[0]):
    print(y_pred[i], y_test_original[i])

[74.37211] [54.8]
[58.985386] [0.]
[116.18812] [239.09]
[100.74642] [156.48]
[102.572235] [279.49]


In [530]:
from datetime import datetime, timedelta
next_day = datetime.strptime('28-02-2025', '%d-%m-%Y') + timedelta(days=1)
next_data = {
    'month': next_day.month,
    'day_of_week': next_day.weekday(),
    'day_sin': np.sin(2 * np.pi * next_day.day / 31),
    'day_cos': np.cos(2 * np.pi * next_day.day / 31),
    'month_sin': np.sin(2 * np.pi * next_day.month / 12),
    'month_cos': np.cos(2 * np.pi * next_day.month / 12),
}
if df.shape[0] > 60:
    for i in list(range(1, 8)) + [14, 21]:
        next_data[f'lag_{i}'] = x_scaled[-i, 0]
else:
    for i in range(1, 8):
        next_data[f'lag_{i}'] = x_scaled[-i, 0]
next_day_input = pd.DataFrame([next_data])
next_day_input = scale_x.transform(next_day_input)
x_input = next_day_input.reshape(1, 1, -1)
predicted_value = model.predict(x_input)
original_predicted_value = scale_y.inverse_transform(predicted_value)
print(f"Predicted Value for {next_day.strftime('%d-%m-%Y')}: {original_predicted_value[0][0]}")

Predicted Value for 01-03-2025: 22.104232788085938


In [531]:
last_week_data = y[-7:]
print("Last Week's Spending Data:", last_week_data)

Last Week's Spending Data: date
2025-02-22     54.80
2025-02-23     36.89
2025-02-24    134.71
2025-02-25    310.15
2025-02-26     98.97
2025-02-27    290.49
2025-02-28     65.20
Name: total_price, dtype: float64


In [532]:
df[df['day_of_week'] == 5]

Unnamed: 0_level_0,total_price,month,day_of_week,day_sin,day_cos,month_sin,month_cos,lag_1,lag_2,lag_3,lag_4,lag_5,lag_6,lag_7
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2025-02-01,45.62,2,5,-0.974928,-0.222521,0.866025,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2025-02-08,252.06,2,5,-0.974928,-0.222521,0.866025,0.5,381.57,199.31,327.42,184.7,86.6,313.99,45.62
2025-02-15,0.0,2,5,-0.974928,-0.222521,0.866025,0.5,131.32,0.0,156.48,279.49,99.73,85.45,252.06
2025-02-22,54.8,2,5,-0.974928,-0.222521,0.866025,0.5,133.08,76.83,219.04,333.34,0.0,239.09,0.0
