In [65]:
import random
import datetime
import csv

def generate_february_spending(seed=42):
    random.seed(seed)
    start_date = datetime.date(2025, 2, 1)
    spending_data = []

    for day in range(28):
        current_date = start_date + datetime.timedelta(days=day)
        if random.random() < 0.2:
            spending_data.append({
                "date": current_date.strftime("%Y-%m-%d"),
                "time": "00:00:00",
                "price": 0.00
            })
            continue  
        num_transactions = random.randint(1, 5)
        for _ in range(num_transactions):
            hour = random.randint(8, 22)
            minute = random.randint(0, 59)
            second = random.randint(0, 59)
            transaction_time = datetime.time(hour, minute, second)
            price = round(random.uniform(0.01, 100.00), 2)
            spending_data.append({
                "date": current_date.strftime("%Y-%m-%d"),
                "time": transaction_time.strftime("%H:%M:%S"),
                "price": price
            })

    return spending_data

february_data = generate_february_spending()

with open('spending.csv', 'w', newline='') as csvfile:
    fieldnames = ['date', 'time', 'price']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(february_data)

print("Data saved to february_spending.csv")


Data saved to february_spending.csv


In [206]:
import tensorflow as tf
import pandas as pd
import numpy as np
from tensorflow import keras
from keras.layers import Dense, Dropout, LSTM
from keras.models import Sequential
from keras.regularizers import l1_l2, l2
from keras.optimizers import Adam
from keras.initializers import HeNormal
from keras.callbacks import EarlyStopping

import sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [175]:
df = pd.read_csv('spending.csv')
df.head()

Unnamed: 0,date,time,price
0,2025-02-01,19:17:15,22.33
1,2025-02-02,09:37:27,3.19
2,2025-02-02,09:13:14,50.54
3,2025-02-02,08:35:12,71.6
4,2025-02-02,19:34:26,22.05


In [178]:
df = df.groupby('date').agg({
    'price': 'sum'
}).rename(columns={'price': 'total_price'})
df.reset_index(inplace=True)

In [180]:
def extract_date_features(df, date_col):
    df[date_col] = pd.to_datetime(df[date_col])
    df['month'] = df[date_col].dt.month
    df['day_of_week'] = df[date_col].dt.dayofweek
    df['day_sin'] = np.sin(2 * np.pi * df['day_of_week'] / 7)
    df['day_cos'] = np.cos(2 * np.pi * df['day_of_week'] / 7)
    df['month_sin'] = np.sin(2 * np.pi * df['month'] / 12)
    df['month_cos'] = np.cos(2 * np.pi * df['month'] / 12)
    
    return df
df = extract_date_features(df, 'date')

In [182]:
for i in range(1, 8):
    df[f'lag_{i}'] = df['total_price'].shift(i)

In [184]:
df.dropna(inplace=True)

In [185]:
df.sort_values('date', inplace=True)
df.set_index('date', inplace=True)

In [187]:
x = df.drop(columns=['total_price'])
y = df['total_price']

In [188]:
scale_x = MinMaxScaler()
scale_y = MinMaxScaler()
x_scaled = scale_x.fit_transform(x)
y_scaled = scale_y.fit_transform(y.values.reshape(-1,1))

In [189]:
def create_sequences(X, y, lookback):
    Xs, ys = [], []
    for i in range(len(X) - lookback):
        Xs.append(X[i:(i + lookback)])
        ys.append(y[i + lookback])
    return np.array(Xs), np.array(ys)
lookback = 7
x_seq, y_seq = create_sequences(x_scaled, y_scaled, lookback)

In [217]:
model = Sequential()
model.add(LSTM(units=24, activation='tanh', kernel_initializer=HeNormal(), kernel_regularizer=l1_l2(l1=0.01, l2=0.001), input_shape=(x_seq.shape[1], x_seq.shape[2]), return_sequences=True))
model.add(Dropout(0.3))
model.add(LSTM(units=12, activation='tanh', kernel_initializer=HeNormal(), kernel_regularizer=l1_l2(l1=0.01, l2=0.001), return_sequences=False))
model.add(Dropout(0.3))
model.add(Dense(units=1))
optimizer = Adam(learning_rate=0.005)


model.compile(optimizer=optimizer, loss='mse')
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
history = model.fit(x_seq, y_seq, epochs=200, batch_size=16, validation_split=0.2, callbacks=[early_stopping], verbose=1)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

In [218]:
x_train, x_test, y_train, y_test = train_test_split(x_seq, y_seq, test_size=0.2)

In [219]:
y_pred_scaled = model.predict(x_test)
y_pred = scale_y.inverse_transform(y_pred_scaled)  # Inverse transform to get original scale
y_test_original = scale_y.inverse_transform(y_test)



In [211]:
mae = mean_absolute_error(y_test_original, y_pred)
rmse = np.sqrt(mean_squared_error(y_test_original, y_pred))
print(f'MAE: {mae:.2f}, RMSE: {rmse:.2f}')

MAE: 38.24, RMSE: 42.11


In [224]:
x.columns

Index(['month', 'day_of_week', 'day_sin', 'day_cos', 'month_sin', 'month_cos',
       'lag_1', 'lag_2', 'lag_3', 'lag_4', 'lag_5', 'lag_6', 'lag_7'],
      dtype='object')

In [None]:
df_test[]