In [60]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

## **Data**

In [3]:
# Read the CSV file into a DataFrame
df_ibex = pd.read_csv('data/ibex.csv')

# Convert the 'date' column to datetime if it's not already in datetime format
df_ibex['Date'] = pd.to_datetime(df_ibex['Date'])

# Set the 'date' column as the index
df_ibex.set_index('Date', inplace=True)

# Drop 'Volume' and 'Adj Close' columns
df_ibex = df_ibex.drop(['Volume', 'Adj Close'], axis=1)

In [4]:
df_ibex

Unnamed: 0_level_0,Open,High,Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2011-01-03,9899.400391,9993.599609,9850.500000,9888.299805
2011-01-04,9895.500000,9974.200195,9799.400391,9888.400391
2011-01-05,9877.500000,9882.400391,9599.299805,9801.400391
2011-01-06,9803.200195,9832.599609,9678.400391,9702.700195
2011-01-07,9682.400391,9699.400391,9497.799805,9560.700195
...,...,...,...,...
2019-12-23,9650.200195,9670.900391,9639.000000,9659.599609
2019-12-24,9632.099609,9661.799805,9607.799805,9661.799805
2019-12-27,9673.000000,9700.500000,9657.500000,9700.500000
2019-12-30,9672.500000,9682.099609,9612.599609,9612.599609


In [70]:
# Define the split date
split_date = pd.to_datetime('2019-01-01')

# Split the data into train and test sets
train_data = df_ibex[df_ibex.index < split_date]
test_data = df_ibex[df_ibex.index >= split_date]

## **Linear Regression**

In [99]:
time_window = 10

In [97]:
def split_data_x(df, time_window):
    data_raw = np.array(df)  # convert to numpy array
    data = []

    # create all possible sequences of length seq_len
    for index in range(len(data_raw) - time_window):
        data.append(data_raw[index: index + time_window].reshape(-1))

    data = np.array(data)

    out = data

    return out

In [98]:
def split_data_y(df, time_window):
    df = df['Close']
    data_raw = np.array(df)  # convert to numpy array
    data = []

    # create all possible sequences of length seq_len
    for index in range(len(data_raw) - time_window):
        data.append(data_raw[index + time_window])

    data = np.array(data)

    out = data
    out = np.expand_dims(out, axis=1)

    return out

In [100]:
x_train = split_data_x(train_data, time_window )
y_train = split_data_y(train_data, time_window)
x_test = split_data_x(test_data, time_window)
y_test = split_data_y(test_data, time_window)

In [112]:
# x_train, x_test, y_train, y_test = train_test_split(price_x,price,test_size=0.3,shuffle=False)
print('\nInput & Output Shape of Train & Test Data:')
print('x_train.shape = ',x_train.shape)
print('y_train.shape = ',y_train.shape)
print('x_test.shape = ',x_test.shape)
print('y_test.shape = ',y_test.shape)
print('\n')


Input & Output Shape of Train & Test Data:
x_train.shape =  (2034, 40)
y_train.shape =  (2034, 1)
x_test.shape =  (245, 40)
y_test.shape =  (245, 1)




In [113]:
# Create and train the linear regression model
regression_model = LinearRegression()
regression_model.fit(x_train, y_train)

In [114]:
# Make predictions on the test set
y_pred = regression_model.predict(x_test)

In [115]:
# Calculate the root mean squared error (RMSE)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print('Root Mean Squared Error (RMSE):', rmse)

Root Mean Squared Error (RMSE): 70.7192571196334


In [122]:
import matplotlib.pyplot as plt

# Plotting the predicted values and real values
plt.figure(figsize=(12, 6))
plt.plot(y_test.index, y_test, label='Real Values')
plt.plot(y_pred.index, y_pred, label='Predicted Values')
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.title('Linear Regression - Predicted vs Real Values')
plt.legend()
plt.show()

AttributeError: 'numpy.ndarray' object has no attribute 'index'

<Figure size 1200x600 with 0 Axes>

## **ANN**

In [115]:
# Define the neural network architecture
class ANN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(ANN, self).__init__()
        self.hidden = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.output = nn.Linear(hidden_size, output_size)
        
    def forward(self, x):
        x = self.hidden(x)
        x = self.relu(x)
        x = self.output(x)
        return x[:, -1] # Output only the last column (close price)

In [116]:
# Set the device (GPU or CPU)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [117]:
# Set the hyperparameters
w = 6  # Number of input data points (lookback_window)
h = 1  # Number of future price points to predict (horizon)
hidden_size = 16
lr = 0.001
num_iterations = 500

In [118]:
# Preprocess the data
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(df_ibex_train[['Open', 'Close', 'High', 'Low']])

# Convert the scaled data to PyTorch tensors
tensor_data = torch.FloatTensor(scaled_data).to(device)

In [119]:
# Generate the input and target sequences for training
input_sequences = []
target_sequences = []
for i in range(len(tensor_data) - w - h):
    input_sequences.append(tensor_data[i:i+w])
    target_sequences.append(tensor_data[i+w+h-1, 1])

# Create tensors for input and target sequences
x_train = torch.stack(input_sequences).to(device)
y_train = torch.stack(target_sequences).to(device)

In [120]:
# Define the model
input_size = x_train.size(-1)
output_size = 1
model = ANN(input_size, hidden_size, output_size).to(device)

# Define the loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=lr)

In [121]:
# Train the model
for iteration in range(num_iterations):
    # Forward pass
    output = model(x_train)
    loss = criterion(output, y_train)
    
    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    # Print the loss
    if (iteration + 1) % 100 == 0:
        print(f'Iteration: {iteration + 1}, Loss: {loss.item()}')

  return F.mse_loss(input, target, reduction=self.reduction)


Iteration: 100, Loss: 0.04446214437484741
Iteration: 200, Loss: 0.04043450206518173
Iteration: 300, Loss: 0.039052631705999374
Iteration: 400, Loss: 0.03876839578151703
Iteration: 500, Loss: 0.03872628137469292


In [122]:
# Compute predictions on the training data
predicted_data = model(x_train)
predicted_data = predicted_data.cpu().detach().numpy()

# Inverse transform the target data
y_train = y_train.cpu().detach().numpy()

In [123]:
# Calculate metrics
mae = mean_absolute_error(y_train, predicted_data)
rmse = mean_squared_error(y_train, predicted_data, squared=False)
smape = 2 * mae / (np.abs(y_train) + np.abs(predicted_data)).mean()
mape = np.abs((y_train - predicted_data) / y_train).mean()

# Print the metrics
print(f'MAE: {mae}')
print(f'RMSE: {rmse}')
print(f'sMAPE: {smape}')
print(f'MAPE: {mape}')

MAE: 0.16694113612174988
RMSE: 0.19499312341213226
sMAPE: 0.28301321977805227
MAPE: inf


  mape = np.abs((y_train - predicted_data) / y_train).mean()
