In [None]:

import pandas as pd
import numpy as np
import boto3
import io
import matplotlib.pyplot as plt
from datetime import timedelta

import torch
import torch.nn as nn
from sklearn.preprocessing import MinMaxScaler

In [None]:
# Cell 2: AWS Setup
s3_bucket = 'groweasy-products-bucket'
csv_key = 'products.csv'
dynamodb_table = 'ForecastResults'

s3 = boto3.client('s3')
dynamodb = boto3.resource('dynamodb')
table = dynamodb.Table(dynamodb_table)

In [None]:
# Cell 3: Load CSV from S3
obj = s3.get_object(Bucket=s3_bucket, Key=csv_key)
df = pd.read_csv(io.BytesIO(obj['Body'].read()))

df['timestamp'] = pd.to_datetime(df['timestamp'])
df = df.sort_values(['product_id', 'timestamp'])


In [None]:
# Cell 4: LSTM Model Definition
class LSTMModel(nn.Module):
    def __init__(self, input_size=1, hidden_size=50, output_size=1):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out, _ = self.lstm(x)
        return self.fc(out[:, -1, :])


In [None]:
# Cell 5: Forecast Function
def forecast_product(df_product, forecast_days=7):
    scaler = MinMaxScaler()
    orders = df_product['total_orders'].values.reshape(-1, 1)
    scaled = scaler.fit_transform(orders)

In [None]:
 # Create sequences
    seq_len = 5
    X, y = [], []
    for i in range(len(scaled) - seq_len):
        X.append(scaled[i:i+seq_len])
        y.append(scaled[i+seq_len])
    
    X = torch.tensor(np.array(X), dtype=torch.float32)
    y = torch.tensor(np.array(y), dtype=torch.float32)


In [None]:
 # Train model
    model = LSTMModel()
    loss_fn = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

    model.train()
    for epoch in range(100):
        optimizer.zero_grad()
        output = model(X)
        loss = loss_fn(output, y)
        loss.backward()
        optimizer.step()

In [None]:
# Forecast next N days
    model.eval()
    last_seq = scaled[-seq_len:].reshape(1, seq_len, 1)
    last_seq = torch.tensor(last_seq, dtype=torch.float32)

    preds = []
    for _ in range(forecast_days):
        with torch.no_grad():
            pred = model(last_seq).item()
        preds.append(pred)

        new_seq = torch.cat([last_seq[:, 1:, :], torch.tensor([[[pred]]])], dim=1)
        last_seq = new_seq

    preds_rescaled = scaler.inverse_transform(np.array(preds).reshape(-1, 1)).flatten()
    return preds_rescaled

In [None]:
# Cell 6: Run Forecast for Each Product
results = []
for product_id, group in df.groupby('product_id'):
    preds = forecast_product(group, forecast_days=7)
    start_date = group['timestamp'].max() + timedelta(days=1)

    for i, forecast_value in enumerate(preds):
        forecast_date = (start_date + timedelta(days=i)).strftime('%Y-%m-%d')
        results.append({
            'product_id': product_id,
            'forecast_date': forecast_date,
            'predicted_orders': int(forecast_value)
        })


In [None]:
# Cell 7: Upload to DynamoDB
for item in results:
    table.put_item(Item=item)

print("✅ Forecasts saved to DynamoDB table:", dynamodb_table)