In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
from tensorflow import keras
from keras.layers import Dense, Dropout, LSTM
from keras.models import Sequential
from keras.regularizers import l1_l2, l2
from keras.optimizers import Adam
from keras.initializers import HeNormal
from keras.callbacks import EarlyStopping, ReduceLROnPlateau

import sklearn
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import MinMaxScaler, StandardScaler, OneHotEncoder, PolynomialFeatures
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.linear_model import Ridge, Lasso, LinearRegression, ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR

import warnings
warnings.filterwarnings('ignore')

In [485]:
df = pd.read_csv('veda_mc.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,date,amount
0,0,"Jul 30, 2023",1.0
1,1,"Aug 01, 2023",583.0
2,2,"Aug 01, 2023",93.0
3,3,"Aug 04, 2023",454.0
4,4,"Aug 09, 2023",5500.0


In [486]:
df = df.drop(columns=['Unnamed: 0'])

In [487]:
df = df.rename(columns = {'date': 'date',
          'amount': 'price'})

In [488]:
df['date'] = pd.to_datetime(df['date'])
date_range = pd.date_range(start=df['date'].min(), end=df['date'].max(), freq='D')
full_df = pd.DataFrame(date_range, columns=['date'])
df = pd.merge(full_df, df, on='date', how='left')
df['price'] = df['price'].fillna(0)

In [489]:
df = df.groupby('date').agg({
    'price': 'sum'
}).rename(columns={'price': 'total_price'})
df.reset_index(inplace=True)

In [490]:
df['date'] = pd.to_datetime(df['date'])
df.set_index('date', inplace=True)

# Extract Year-Month and compute average spending
monthly_avg = df.resample('M')['total_price'].sum()

# Print the result
print(monthly_avg)

date
2023-07-31        1.00
2023-08-31     8622.00
2023-09-30    12227.00
2023-10-31    29793.75
2023-11-30    16482.00
2023-12-31    33981.42
2024-01-31    15499.00
2024-02-29    12005.00
2024-03-31    13837.00
2024-04-30    12451.00
2024-05-31    12845.55
2024-06-30     6351.50
2024-07-31    20122.99
2024-08-31    20750.50
2024-09-30    18315.00
2024-10-31    75970.49
2024-11-30    24122.19
2024-12-31    15843.70
2025-01-31    16507.34
2025-02-28    21714.93
2025-03-31    27978.28
Freq: ME, Name: total_price, dtype: float64


In [491]:
import pandas as pd

def calculate_monthly_avg(df):
    df['date'] = pd.to_datetime(df['date'])
    df['monthly_avg'] = None
    monthly_spending = {}

    for idx, row in df.iterrows():
        current_month = row['date'].month
        current_year = row['date'].year
        month_key = (current_year, current_month)

        if month_key not in monthly_spending:
            monthly_spending[month_key] = []

        monthly_spending[month_key].append(row['total_price'])
        monthly_avg = sum(monthly_spending[month_key]) / len(monthly_spending[month_key])
        
        df.at[idx, 'monthly_avg'] = monthly_avg

calculate_monthly_avg(df)


KeyError: 'date'

In [465]:
def extract_date_features(df, date_col):
    df[date_col] = pd.to_datetime(df[date_col])
    df['day_of_week'] = df[date_col].dt.dayofweek    
    return df
df = extract_date_features(df, 'date')
encoder = OneHotEncoder(sparse_output=False)
encoded_days = encoder.fit_transform(df[['day_of_week']])
encoded_df = pd.DataFrame(encoded_days, columns=[f'day_of_week_{i}' for i in range(1, 8)])
df = pd.concat([df, encoded_df], axis=1)
df = df.drop(columns=['day_of_week'])


In [466]:
lag_values = [1, 2, 3, 4, 5, 6, 7, 14, 21, 28]
for lag in lag_values:
    df[f'lag_{lag}'] = sum([df['total_price'].shift(i) for i in range(1, lag+1)]) / lag

In [467]:
df.dropna(inplace=True)

In [468]:
df.sort_values('date', inplace=True)
df.set_index('date', inplace=True)

In [469]:
df['total_price'].describe()

count     573.000000
mean      710.503735
std      1277.167773
min         0.000000
25%        20.000000
50%       255.000000
75%       870.000000
max      9615.000000
Name: total_price, dtype: float64

In [470]:
old_mean = df['total_price'].mean()
old_mean

710.5037347294939

In [471]:
filtered_df = df[df['total_price'] > 0]
Q1 = filtered_df['total_price'].quantile(0.25)
Q3 = filtered_df['total_price'].quantile(0.75)
IQR = Q3-Q1
original_values = df['total_price'].copy()
df.loc[df['total_price'] > Q3 + IQR, 'total_price'] = Q3 + IQR

In [472]:
filtered_df = df[df['total_price'] > 0]

In [473]:
new_mean = df['total_price'].mean()
new_mean

537.1546945898779

In [474]:
Q1

180.0

In [475]:
Q3

1101.5

In [476]:
df['total_price'].describe()

count     573.000000
mean      537.154695
std       640.863782
min         0.000000
25%        20.000000
50%       255.000000
75%       870.000000
max      2023.000000
Name: total_price, dtype: float64

In [477]:
x = df.drop(columns=['total_price'])
y = df['total_price']
scale_x = StandardScaler()
scale_y = StandardScaler()
x_scaled = scale_x.fit_transform(x)
y_scaled = scale_y.fit_transform(y.values.reshape(-1,1))
x_train, x_test, y_train, y_test = train_test_split(x_scaled, y_scaled, test_size=0.25, random_state=42)
model = ElasticNet(
    alpha=0.00005,
    l1_ratio=0.75,
    max_iter=200000,
    tol=0.0001,
    fit_intercept=True,
    random_state=42
)
model.fit(x_train, y_train)
y_pred = model.predict(x_test)
y_pred_original = scale_y.inverse_transform(y_pred.reshape(-1,1))
y_test_original = scale_y.inverse_transform(y_test.reshape(-1,1))

In [478]:
mean_change = old_mean - new_mean

In [479]:
mean_change

173.34904013961602

In [480]:
l = []
from calendar import monthrange

last_date = df.index[-1]
year, month = last_date.year, last_date.month 
days_in_month = monthrange(year, month)[1]
from datetime import timedelta
for i in range(days_in_month):
    last_date = df.index[-1]
    next_day = last_date + timedelta(days=1)
    next_data = {}
    current_month = next_day.month
    monthly_data = df[df.index.month == current_month]
    next_data['monthly_avg'] = monthly_data['total_price'].mean()
    next_data['day_of_week_1'] = 1 if next_day.weekday() == 0 else 0
    next_data['day_of_week_2'] = 1 if next_day.weekday() == 1 else 0
    next_data['day_of_week_3'] = 1 if next_day.weekday() == 2 else 0
    next_data['day_of_week_4'] = 1 if next_day.weekday() == 3 else 0
    next_data['day_of_week_5'] = 1 if next_day.weekday() == 4 else 0
    next_data['day_of_week_6'] = 1 if next_day.weekday() == 5 else 0
    next_data['day_of_week_7'] = 1 if next_day.weekday() == 6 else 0
    lags = [1, 2, 3, 4, 5, 6, 7, 14, 21, 28]
    for lag in lags:
        lag_value = df['total_price'].shift(lag).iloc[-1]
        next_data[f'lag_{lag}'] = lag_value
    next_data_df = pd.DataFrame([next_data], index=[next_day])
    next_data_scaled = scale_x.transform(next_data_df)
    predicted_value = model.predict(next_data_scaled)
    predicted_value = scale_y.inverse_transform(predicted_value.reshape(-1, 1))
    if predicted_value[0] < 10:
        predicted_value[0] = 0

    next_data_df['total_price'] = predicted_value[0]
    df = pd.concat([df, next_data_df], axis=0)
    l.append((predicted_value[0], next_day.weekday()))


In [481]:
l

[(array([973.82853866]), 5),
 (array([0.]), 6),
 (array([1520.48840725]), 0),
 (array([0.]), 1),
 (array([1257.79100376]), 2),
 (array([89.75940416]), 3),
 (array([748.50510044]), 4),
 (array([835.76303753]), 5),
 (array([0.]), 6),
 (array([646.23531687]), 0),
 (array([0.]), 1),
 (array([885.5592152]), 2),
 (array([582.7912401]), 3),
 (array([519.62376957]), 4),
 (array([1068.89673773]), 5),
 (array([0.]), 6),
 (array([345.58079985]), 0),
 (array([0.]), 1),
 (array([643.21837248]), 2),
 (array([128.10159273]), 3),
 (array([0.]), 4),
 (array([222.38190014]), 5),
 (array([0.]), 6),
 (array([694.70221851]), 0),
 (array([0.]), 1),
 (array([613.4094005]), 2),
 (array([0.]), 3),
 (array([268.96336693]), 4),
 (array([607.70452687]), 5),
 (array([81.9047264]), 6),
 (array([839.94539884]), 0)]

In [482]:
sum_ = sum([item[0] for item in l])+days_in_month*mean_change
average = sum_ / len(l) if len(l) > 0 else 0

In [483]:
sum_

array([18948.97431886])

In [484]:
average

array([611.25723609])

In [381]:
#MANAGMENT

In [382]:
monthly_expense = int(input(''))

 3


In [395]:
excess = sum_ - monthly_expense
excess

array([2410.27093941])

In [397]:
day_excess = excess/30
day_excess

array([80.34236465])

In [400]:
#Enter time in HH:MM:SS format: 
from datetime import datetime
breakfast_time = input("")
try:
    time = datetime.strptime(breakfast_time, '%H:%M:%S').time()
    print("You entered the time:", time)
except ValueError:
    print("Invalid time format. Please enter time in HH:MM:SS format.")

 9:30:00


You entered the time: 09:30:00


In [401]:
#Enter time in HH:MM:SS format: 
from datetime import datetime
lunch_time = input("")
try:
    time = datetime.strptime(lunch_time, '%H:%M:%S').time()
    print("You entered the time:", time)
except ValueError:
    print("Invalid time format. Please enter time in HH:MM:SS format.")

 2:30:00


You entered the time: 02:30:00


In [402]:
#Enter time in HH:MM:SS format: 
from datetime import datetime
dinner_time = input("")
try:
    time = datetime.strptime(dinner_time, '%H:%M:%S').time()
    print("You entered the time:", time)
except ValueError:
    print("Invalid time format. Please enter time in HH:MM:SS format.")

 22:00:00


You entered the time: 22:00:00


In [None]:

degree = 4
poly = PolynomialFeatures(degree=degree)
x_poly = poly.fit_transform(x_train)


    next_data_scaled = poly.transform(next_data_scaled)