In [None]:
import numpy as np
import pandas as pd
import torch
from google.colab import drive

In [None]:
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
file_path = "/content/drive/My Drive/data.csv"

df = pd.read_csv(file_path)

print(df.head())

       # Date  Receipt_Count
0  2021-01-01        7564766
1  2021-01-02        7455524
2  2021-01-03        7095414
3  2021-01-04        7666163
4  2021-01-05        7771289


In [None]:
# preprocess
df['# Date'] = pd.to_datetime(df['# Date'])

df_monthly = df.resample('M', on='# Date').sum()

max_receipt = df_monthly['Receipt_Count'].max()
min_receipt = df_monthly['Receipt_Count'].min()
df_monthly['Receipt_Count'] = (df_monthly['Receipt_Count'] - min_receipt) / (max_receipt - min_receipt)

print(df_monthly)
print(max_receipt)
print(min_receipt)

            Receipt_Count
# Date                   
2021-01-31       0.185766
2021-02-28       0.000000
2021-03-31       0.317801
2021-04-30       0.340447
2021-05-31       0.479544
2021-06-30       0.451796
2021-07-31       0.608824
2021-08-31       0.710778
2021-09-30       0.679670
2021-10-31       0.844481
2021-11-30       0.845816
2021-12-31       1.000000
309948684
220033460


In [None]:
#data spliting
train_data = df_monthly.iloc[:10]['Receipt_Count'].values
val_data = df_monthly.iloc[10:]['Receipt_Count'].values

print(train_data)
print(val_data)

[0.18576639 0.         0.31780119 0.34044702 0.4795438  0.45179646
 0.60882396 0.71077809 0.67967015 0.8444813 ]
[0.84581563 1.        ]


In [None]:
import torch

train_tensor = torch.FloatTensor(train_data).view(-1)
val_tensor = torch.FloatTensor(val_data).view(-1)

In [None]:
from torch.utils.data import DataLoader, TensorDataset

def create_sequences(data, seq_length):
    sequences = []
    target = []
    for i in range(len(data) - seq_length):
        seq = data[i:i+seq_length]
        label = data[i+seq_length:i+seq_length+1]
        sequences.append(seq)
        target.append(label)
    return torch.stack(sequences), torch.stack(target)

seq_length_train = 3
seq_length_val = 1

train_sequences, train_labels = create_sequences(train_tensor, seq_length_train)
val_sequences, val_labels = create_sequences(val_tensor, seq_length_val)

train_dataset = TensorDataset(train_sequences, train_labels)
val_dataset = TensorDataset(val_sequences, val_labels)

train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False)



In [None]:
import torch.nn as nn

class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.linear = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        out, _ = self.lstm(x)
        if len(out.shape) == 2:
            out = out.unsqueeze(1)
        out = self.linear(out[:, -1, :])
        return out


input_dim = 1
hidden_dim = 32
num_layers = 1

model = LSTMModel(input_dim, hidden_dim, num_layers)


In [None]:
import torch.optim as optim

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [None]:
num_epochs = 100

# Training loop
for epoch in range(num_epochs):
    for batch_seq, batch_labels in train_loader:
        # Forward pass
        outputs = model(batch_seq.view(-1, seq_length_train, 1))
        loss = criterion(outputs, batch_labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Validate the model
    val_loss = 0
    with torch.no_grad():
        for val_seq, val_labels in val_loader:
            val_outputs = model(val_seq.view(-1, seq_length_val, 1))
            val_loss += criterion(val_outputs, val_labels).item()
    val_loss /= len(val_loader)

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}, Val Loss: {val_loss:.4f}')


Epoch [1/100], Loss: 0.3671, Val Loss: 1.1943
Epoch [2/100], Loss: 0.4453, Val Loss: 1.1274
Epoch [3/100], Loss: 0.4927, Val Loss: 1.0640
Epoch [4/100], Loss: 0.3545, Val Loss: 1.0007
Epoch [5/100], Loss: 0.4644, Val Loss: 0.9392
Epoch [6/100], Loss: 0.2319, Val Loss: 0.8783
Epoch [7/100], Loss: 0.0056, Val Loss: 0.8165
Epoch [8/100], Loss: 0.0555, Val Loss: 0.7643
Epoch [9/100], Loss: 0.0022, Val Loss: 0.7161
Epoch [10/100], Loss: 0.0008, Val Loss: 0.6871
Epoch [11/100], Loss: 0.0072, Val Loss: 0.6704
Epoch [12/100], Loss: 0.0086, Val Loss: 0.6673
Epoch [13/100], Loss: 0.0074, Val Loss: 0.6697
Epoch [14/100], Loss: 0.0001, Val Loss: 0.6752
Epoch [15/100], Loss: 0.0341, Val Loss: 0.6856
Epoch [16/100], Loss: 0.0096, Val Loss: 0.6845
Epoch [17/100], Loss: 0.0003, Val Loss: 0.6814
Epoch [18/100], Loss: 0.0278, Val Loss: 0.6779
Epoch [19/100], Loss: 0.0309, Val Loss: 0.6830
Epoch [20/100], Loss: 0.0094, Val Loss: 0.6828
Epoch [21/100], Loss: 0.0283, Val Loss: 0.6804
Epoch [22/100], Loss: 

In [None]:
model.eval()

predictions = []

with torch.no_grad():
    for batch_seq, _ in val_loader:
        outputs = model(batch_seq.view(-1, seq_length_val, 1))
        predictions.append(outputs.item())

print("Predictions:", predictions)


Predictions: [0.18067651987075806]


In [None]:
max_receipt = 309948684
min_receipt = 220033460

scaled_prediction = 0.13805176317691803

original_prediction = (scaled_prediction * (max_receipt - min_receipt)) + min_receipt
original_prediction

232446415.20964754

In [None]:
min_receipt = 220033460
max_receipt = 309948684

actual_val_scaled = 1.0

actual_val = actual_val_scaled * (max_receipt - min_receipt) + min_receipt
actual_val


309948684.0

In [None]:
min_receipt = 220033460
max_receipt = 309948684

predicted_normalized = 0.13805176317691803

predicted_original = predicted_normalized * (max_receipt - min_receipt) + min_receipt
predicted_original


232446415.20964754

In [None]:
import math

ground_truth = [0.84581563, 1.0]
predictions = [0.13805176317691803]

predictions = [0.13805176317691803, 0.13805176317691803]

squared_diffs = [(pred - truth)**2 for pred, truth in zip(predictions, ground_truth)]

mean_squared_diff = sum(squared_diffs) / len(squared_diffs)

rmse = math.sqrt(mean_squared_diff)
rmse


0.7886331384562727

In [None]:
torch.save(model.state_dict(), "/content/drive/My Drive/model")