In [None]:
# Data Exploration Notebook

'''This notebook is used for exploring the dataset to identify trends, patterns, and any preprocessing requirements.'''


### Imports and Loading Data

import pandas as pd
import matplotlib.pyplot as plt

# Load data
data_path = '../data/raw/indexProcessed.csv'
df = pd.read_csv(data_path)

# Display basic info
df.head()
df.info()


In [None]:
# Convert Date column to datetime
df['Date'] = pd.to_datetime(df['Date'])

# Plot Closing Prices
plt.figure(figsize=(12, 6))
plt.plot(df['Date'], df['CloseUSD'], label='Close Price (USD)')
plt.title('Stock Price Over Time')
plt.xlabel('Date')
plt.ylabel('Price (USD)')
plt.legend()
plt.show()


In [None]:
# Summary statistics
df['CloseUSD'].describe()


In [None]:
# Check for nulls
df.isnull().sum()


In [None]:

'''---

### **2. `model_analysis.ipynb`**
```markdown
# Model Analysis Notebook

This notebook is used for analyzing the performance of the trained model.

---
'''
### Imports and Loading the Model

import torch
from model import LSTMModel
from data_loader import load_and_preprocess_data

# Load model
model = LSTMModel()
model.load_state_dict(torch.load('../models/saved/stock_lstm_model.pth'))
model.eval()


In [None]:
import numpy as np
import matplotlib.pyplot as plt

_, _, test_dataset, scaler = load_and_preprocess_data('../data/raw/indexProcessed.csv')
test_data = [scaler.inverse_transform([[x]])[0][0] for x in test_dataset.data]

# Prepare input sequence
sequence_length = 50
predictions = []
actuals = test_data[sequence_length:]

for i in range(sequence_length, len(test_data)):
    input_seq = torch.tensor(test_data[i-sequence_length:i], dtype=torch.float32).unsqueeze(0).unsqueeze(-1)
    with torch.no_grad():
        prediction = model(input_seq).item()
    predictions.append(prediction)

# Plot results
plt.figure(figsize=(12, 6))
plt.plot(actuals, label="Actual Prices")
plt.plot(predictions, label="Predicted Prices")
plt.title("Actual vs Predicted Stock Prices")
plt.xlabel("Time")
plt.ylabel("Price (USD)")
plt.legend()
plt.show()


In [None]:
from sklearn.metrics import mean_squared_error
import math

rmse = math.sqrt(mean_squared_error(actuals, predictions))
print(f"Root Mean Square Error (RMSE): {rmse}")


In [None]:

'''---

### **3. `test_model.py`**
```python''' 
import torch
from model import LSTMModel

def test_model_forward_pass():
    model = LSTMModel(input_size=1, hidden_size=50, num_layers=2, output_size=1)
    dummy_input = torch.randn(10, 50, 1)  # Batch size 10, sequence length 50, feature size 1
    output = model(dummy_input)
    assert output.shape == (10, 1), "Output shape mismatch"
    print("Model forward pass test passed!")

if __name__ == "__main__":
    test_model_forward_pass()
