In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import DataLoader, TensorDataset
from LSTM import *

In [6]:
df = pd.read_csv('data/weatherHistory.csv')

# Encode target labels
encoder = LabelEncoder()
df['Summary_Encoded'] = encoder.fit_transform(df['Summary'])

# Select features and target
features = ['Temperature (C)', 'Apparent Temperature (C)', 'Humidity', 
            'Wind Speed (km/h)', 'Wind Bearing (degrees)', 'Visibility (km)', 
            'Pressure (millibars)']
target = 'Summary_Encoded'

# Normalize features
scaler = StandardScaler()
df[features] = scaler.fit_transform(df[features])

In [None]:
# Create sequences (using 6-hour windows)
sequence_length = 6
X, y = [], []
for i in range(len(df) - sequence_length):
    X.append(df[features].values[i:i+sequence_length])
    y.append(df[target].iloc[i+sequence_length])

X = np.array(X)
y = np.array(y)

In [None]:








# Train-test split (time-series friendly)
split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

In [None]:
# Initialize model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = WeatherLSTM(
    input_size=len(features),
    hidden_size=64,
    num_layers=2,
    num_classes=len(encoder.classes_)
).to(device)

In [None]:
train_data = TensorDataset(
    torch.FloatTensor(X_train), 
    torch.LongTensor(y_train)
)
test_data = TensorDataset(
    torch.FloatTensor(X_test), 
    torch.LongTensor(y_test)
)

# Data loaders
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
test_loader = DataLoader(test_data, batch_size=32)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
epochs = 50
for epoch in range(epochs):
    model.train()
    for batch_x, batch_y in train_loader:
        batch_x, batch_y = batch_x.to(device), batch_y.to(device)
        
        outputs = model(batch_x)
        loss = criterion(outputs, batch_y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for batch_x, batch_y in test_loader:
            batch_x, batch_y = batch_x.to(device), batch_y.to(device)
            outputs = model(batch_x)
            _, predicted = torch.max(outputs.data, 1)
            total += batch_y.size(0)
            correct += (predicted == batch_y).sum().item()
        
        acc = 100 * correct / total
        print(f'Epoch {epoch+1}/{epochs} | Test Acc: {acc:.2f}%')

In [None]:
# Get a test sample
test_sample, _ = next(iter(test_loader))
test_sample = test_sample[0].unsqueeze(0).to(device)

# Predict
model.eval()
with torch.no_grad():
    output = model(test_sample)
    prediction = torch.argmax(output).item()
    print("Predicted:", encoder.inverse_transform([prediction])[0])