In [26]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

In [27]:
ds = pd.read_csv('../../data/round-4-island-data-bottle/prices_round_4_day_1.csv', sep=';')
data = ds[ds["product"] == "COCONUT"]
# data = data[["timestamp", "bid_price_1", "bid_volume_1", "ask_price_1", "ask_volume_1", "mid_price"]]
data = data[["timestamp", "mid_price"]]
data_returns = data[["timestamp", "mid_price"]]

In [28]:
lookaheads = [1,2,5,10,20,50, 75, 100, 150, 200, 250, 300, 350, 400, 450, 500]
for lookahead in lookaheads:
    data['mid_price_t_' + str(lookahead)] = data['mid_price'].shift(-lookahead)
    data_returns['returns_t' + str(lookahead)] = (data['mid_price_t_' + str(lookahead)] - data['mid_price'])/data['mid_price']
data.columns

Index(['timestamp', 'mid_price', 'mid_price_t_1', 'mid_price_t_2',
       'mid_price_t_5', 'mid_price_t_10', 'mid_price_t_20', 'mid_price_t_50',
       'mid_price_t_75', 'mid_price_t_100', 'mid_price_t_150',
       'mid_price_t_200', 'mid_price_t_250', 'mid_price_t_300',
       'mid_price_t_350', 'mid_price_t_400', 'mid_price_t_450',
       'mid_price_t_500'],
      dtype='object')

In [29]:
data_returns.columns

Index(['timestamp', 'mid_price', 'returns_t1', 'returns_t2', 'returns_t5',
       'returns_t10', 'returns_t20', 'returns_t50', 'returns_t75',
       'returns_t100', 'returns_t150', 'returns_t200', 'returns_t250',
       'returns_t300', 'returns_t350', 'returns_t400', 'returns_t450',
       'returns_t500'],
      dtype='object')

In [30]:
data_returns.mean()

timestamp       499950.000000
mid_price        10040.633000
returns_t1           0.000001
returns_t2           0.000003
returns_t5           0.000007
returns_t10          0.000014
returns_t20          0.000027
returns_t50          0.000066
returns_t75          0.000098
returns_t100         0.000129
returns_t150         0.000190
returns_t200         0.000262
returns_t250         0.000335
returns_t300         0.000407
returns_t350         0.000474
returns_t400         0.000546
returns_t450         0.000617
returns_t500         0.000694
dtype: float64

In [31]:
def fit_linear_regression_model(stock_data, lookback_period=10):
    """Fits a linear regression model to predict future price changes of a stock.

    Args:
        stock_data: A pandas DataFrame containing the stock price data.
        lookback_period: The number of previous days to use for prediction.

    Returns:
        The trained linear regression model.
    """

    # Create features and target variable
    X = stock_data.shift(periods=1).dropna()
    y = stock_data.diff(periods=1).dropna()

    # Split data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

    # Create and train the linear regression model
    model = LinearRegression()
    model.fit(X_train, y_train)

    return model

In [32]:
def predict_future_price(model, stock_data, lookback_period=10):
    """Predicts the future price change of a stock using the trained model.

    Args:
        model: The trained linear regression model.
        stock_data: A pandas DataFrame containing the stock price data.
        lookback_period: The number of previous days to use for prediction.

    Returns:
        The predicted price change.
    """

    # Get the last 'lookback_period' days of data
    last_prices = stock_data[-lookback_period:]

    # Predict the price change
    predicted_change = model.predict(last_prices.values.reshape(1, -1))

    # Calculate the predicted future price
    predicted_price = last_prices.iloc[-1] + predicted_change[0]

    return predicted_price

In [33]:
# # Load your stock data (replace 'stock_data.csv' with your actual file)
# stock_data = pd.read_csv('stock_data.csv', index_col='Date', parse_dates=True)

# # Fit the linear regression model
# model = fit_linear_regression_model(stock_data)

# # Predict the future price
# predicted_price = predict_future_price(model, stock_data)

# print("Predicted future price:", predicted_price)