In [14]:
import requests
import pandas as pd
import os
from dotenv import load_dotenv

load_dotenv()

api_key = os.getenv("APCA_API_KEY_ID")
api_secret = os.getenv("APCA_API_SECRET_KEY")

base_url = "https://data.alpaca.markets/v2/stocks/bars"
symbol = "AAPL"
timeframe = "12H"
start_date = "2024-01-03T00:00:00Z"
end_date = "2025-04-05T00:00:00Z"
limit = 10000

headers = {
    "accept": "application/json",
    "APCA-API-KEY-ID": api_key,
    "APCA-API-SECRET-KEY": api_secret
}

all_bars = []
page_token = None

while True:
    params = {
        "symbols": symbol,
        "timeframe": timeframe,
        "start": start_date,
        "end": end_date,
        "limit": limit,
        "adjustment": "raw",
        "feed": "sip",
        "sort": "asc"
    }
    if page_token:
        params["page_token"] = page_token

    response = requests.get(base_url, headers=headers, params=params)
    data = response.json()

    # Collect bars
    bars = data.get("bars", {}).get(symbol, [])
    all_bars.extend(bars)

    # Check for next_page_token
    page_token = data.get("next_page_token")
    if not page_token:
        break

# Convert to DataFrame
df = pd.DataFrame(all_bars)
df.to_csv("AAPL_full_data.csv", index=False)
print(f"Downloaded {len(df)} bars.")


Downloaded 660 bars.


In [2]:
%pip install xgboost scikit-learn

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.3.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [13]:
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from datetime import datetime, timedelta


df = pd.read_csv("AAPL.csv")

print(df.head(3))


print (df.tail(5))

   Unnamed: 0       c       h       l       n       o                     t  \
0           0  185.00  185.31  184.66    3785  185.31  2024-01-03T00:00:00Z   
1           1  184.29  185.88  183.43  650136  184.95  2024-01-03T12:00:00Z   
2           2  182.67  184.30  182.55    5343  184.20  2024-01-04T00:00:00Z   

          v          vw  
0    123704  185.045218  
1  58149882  184.318538  
2    171739  183.158026  
    Unnamed: 0       c       h       l       n       o                     t  \
26          26  191.23  191.31  191.10     676  191.30  2024-01-20T00:00:00Z   
27          27  192.02  192.18  191.56    2358  192.18  2024-01-22T00:00:00Z   
28          28  193.89  195.33  191.86  712398  192.00  2024-01-22T12:00:00Z   
29          29  193.70  193.91  193.01    2180  193.88  2024-01-23T00:00:00Z   
30          30  195.11  195.75  193.30  528168  193.69  2024-01-23T12:00:00Z   

           v          vw  
26     26804  191.211728  
27     84229  191.956100  
28  60009352  193

In [4]:
# === Clean & Prepare ===
df = df.rename(columns={
    't': 'timestamp',
    'o': 'open',
    'h': 'high',
    'l': 'low',
    'c': 'close',
    'v': 'volume',
    'n': 'trade_count',
    'vw': 'vwap'
})

df['timestamp'] = pd.to_datetime(df['timestamp'])
df.set_index('timestamp', inplace=True)

# === Feature Engineering ===
df['close_lag_1'] = df['close'].shift(1)
df['close_lag_2'] = df['close'].shift(2)
df['ma_3'] = df['close'].rolling(window=3).mean()
df['ma_5'] = df['close'].rolling(window=5).mean()

df.dropna(inplace=True)

# === Features & Target ===
features = ['open', 'high', 'low', 'volume', 'trade_count', 'vwap', 'close_lag_1', 'close_lag_2', 'ma_3', 'ma_5']
X = df[features]
y = df['close']

# === Train Model on Entire Historical Data ===
model = XGBRegressor(n_estimators=200, max_depth=4, learning_rate=0.1)
model.fit(X, y)

# === Forecast Next 30 Days ===
last_row = df.iloc[-1]
future_preds = []
future_dates = []

for i in range(30):
    # Create next row input
    new_date = df.index[-1] + timedelta(days=i + 1)
    future_dates.append(new_date)

    input_data = {
        'open': last_row['open'],
        'high': last_row['high'],
        'low': last_row['low'],
        'volume': last_row['volume'],
        'trade_count': last_row['trade_count'],
        'vwap': last_row['vwap'],
        'close_lag_1': last_row['close'],
        'close_lag_2': last_row['close_lag_1'],
        'ma_3': df['close'][-3:].mean(),
        'ma_5': df['close'][-5:].mean()
    }

    input_df = pd.DataFrame([input_data])
    predicted_close = model.predict(input_df)[0]

    # Save prediction
    future_preds.append(predicted_close)

    # Update last_row for next iteration
    new_row = last_row.copy()
    new_row['close_lag_2'] = new_row['close_lag_1']
    new_row['close_lag_1'] = predicted_close
    new_row['close'] = predicted_close
    df.loc[new_date] = new_row
    last_row = new_row

# === Show Forecast ===
forecast_df = pd.DataFrame({
    'date': future_dates,
    'predicted_close': future_preds
})
print(forecast_df)


                        date  predicted_close
0  2025-04-04 04:00:00+00:00       204.139877
1  2025-04-06 04:00:00+00:00       204.151199
2  2025-04-09 04:00:00+00:00       204.106033
3  2025-04-13 04:00:00+00:00       204.073563
4  2025-04-18 04:00:00+00:00       204.073563
5  2025-04-24 04:00:00+00:00       204.073563
6  2025-05-01 04:00:00+00:00       204.073563
7  2025-05-09 04:00:00+00:00       204.073563
8  2025-05-18 04:00:00+00:00       204.073563
9  2025-05-28 04:00:00+00:00       204.073563
10 2025-06-08 04:00:00+00:00       204.073563
11 2025-06-20 04:00:00+00:00       204.073563
12 2025-07-03 04:00:00+00:00       204.073563
13 2025-07-17 04:00:00+00:00       204.073563
14 2025-08-01 04:00:00+00:00       204.073563
15 2025-08-17 04:00:00+00:00       204.073563
16 2025-09-03 04:00:00+00:00       204.073563
17 2025-09-21 04:00:00+00:00       204.073563
18 2025-10-10 04:00:00+00:00       204.073563
19 2025-10-30 04:00:00+00:00       204.073563
20 2025-11-20 04:00:00+00:00      

In [5]:
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Load original data
df = pd.read_csv("AAPL.csv")

# Convert time column and sort
df['t'] = pd.to_datetime(df['t'])
df = df.sort_values('t')
df.set_index('t', inplace=True)

# Feature engineering
df['close'] = df['c']
df['close_lag_1'] = df['close'].shift(1)
df['close_lag_2'] = df['close'].shift(2)
df['ma_3'] = df['close'].rolling(window=3).mean()
df['ma_5'] = df['close'].rolling(window=5).mean()

# Drop rows with NaN values after feature creation
df.dropna(inplace=True)

# Define features and target
features = ['close_lag_1', 'close_lag_2', 'ma_3', 'ma_5', 'v', 'n', 'vw']
target = 'close'

X = df[features]
y = df[target]

# Train the model
model = XGBRegressor(n_estimators=100, learning_rate=0.1, random_state=42)
model.fit(X, y)

# ---- Begin Future Prediction ---- #

future_predictions = []
last_known_date = df.index[-1]
last_known_row = df.iloc[-1].copy()

for i in range(30):
    # Create a new date (next day)
    new_date = last_known_date + pd.Timedelta(days=1)

    # Create a copy of last known row for new prediction
    new_row = last_known_row.copy()

    # Shift lag values
    new_row['close_lag_2'] = new_row['close_lag_1']
    new_row['close_lag_1'] = new_row['close']

    # Update moving averages manually
    last_closes = [new_row['close_lag_2'], new_row['close_lag_1'], new_row['close']]
    new_row['ma_3'] = np.mean(last_closes)

    last_5_closes = last_closes + [new_row['close']]
    new_row['ma_5'] = np.mean(last_5_closes[-5:])

    # Predict next close value
    input_row = new_row[features].values.reshape(1, -1)
    predicted_close = model.predict(input_row)[0]

    # Save prediction
    future_predictions.append({
        'date': new_date,
        'predicted_close': predicted_close
    })

    # Update new_row with predicted value
    new_row['close'] = predicted_close

    # Add small variation to other features
    new_row['v'] *= 1 + np.random.normal(0, 0.01)   # ±1% noise
    new_row['n'] *= 1 + np.random.normal(0, 0.01)
    new_row['vw'] *= 1 + np.random.normal(0, 0.005)

    # Set this as the row for the next prediction
    last_known_row = new_row.copy()
    last_known_date = new_date

# Create DataFrame with predictions
pred_df = pd.DataFrame(future_predictions)
print(pred_df)

# Optional: Save predictions to CSV
# pred_df.to_csv("future_aapl_predictions.csv", index=False)


                        date  predicted_close
0  2025-04-04 04:00:00+00:00       203.278793
1  2025-04-05 04:00:00+00:00       203.189865
2  2025-04-06 04:00:00+00:00       203.189865
3  2025-04-07 04:00:00+00:00       205.310440
4  2025-04-08 04:00:00+00:00       203.189865
5  2025-04-09 04:00:00+00:00       203.189865
6  2025-04-10 04:00:00+00:00       203.173355
7  2025-04-11 04:00:00+00:00       203.173355
8  2025-04-12 04:00:00+00:00       203.173355
9  2025-04-13 04:00:00+00:00       203.173355
10 2025-04-14 04:00:00+00:00       203.173355
11 2025-04-15 04:00:00+00:00       203.173355
12 2025-04-16 04:00:00+00:00       203.183334
13 2025-04-17 04:00:00+00:00       203.183334
14 2025-04-18 04:00:00+00:00       203.183334
15 2025-04-19 04:00:00+00:00       203.183334
16 2025-04-20 04:00:00+00:00       203.183334
17 2025-04-21 04:00:00+00:00       203.183334
18 2025-04-22 04:00:00+00:00       203.183334
19 2025-04-23 04:00:00+00:00       203.183334
20 2025-04-24 04:00:00+00:00      

In [6]:
%pip install tensorflow matplotlib

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.3.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [10]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

# Load data
df = pd.read_csv("AAPL.csv")
df['t'] = pd.to_datetime(df['t'])
df = df.sort_values('t')
df.set_index('t', inplace=True)

# Feature engineering
df['close'] = df['c']
df['close_lag_1'] = df['close'].shift(1)
df['close_lag_2'] = df['close'].shift(2)
df['ma_3'] = df['close'].rolling(window=3).mean()
df['ma_5'] = df['close'].rolling(window=5).mean()

df.dropna(inplace=True)

features = ['close_lag_1', 'close_lag_2', 'ma_3', 'ma_5', 'v', 'n', 'vw']
target = 'close'

# Normalize
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()

X_scaled = scaler_X.fit_transform(df[features])
y_scaled = scaler_y.fit_transform(df[[target]])

# Reshape for LSTM [samples, timesteps, features]
X_lstm = X_scaled.reshape((X_scaled.shape[0], 1, X_scaled.shape[1]))

# Train/Test split
split_index = int(len(X_lstm) * 0.8)
X_train, X_test = X_lstm[:split_index], X_lstm[split_index:]
y_train, y_test = y_scaled[:split_index], y_scaled[split_index:]

# Build LSTM model
model = Sequential()
model.add(LSTM(64, activation='relu', input_shape=(1, X_scaled.shape[1])))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

# Train
model.fit(X_train, y_train, epochs=30, batch_size=8, verbose=1)

# Predict on test
predicted = model.predict(X_test)
predicted_close = scaler_y.inverse_transform(predicted)

# ---- Predict Future ---- #
future_predictions = []
last_known_row = df.iloc[-1][features].copy()
last_closes = [df.iloc[-2]['close'], df.iloc[-1]['close']]  # for lag features



Epoch 1/30


  super().__init__(**kwargs)


[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.2729
Epoch 2/30
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.1256
Epoch 3/30
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0248
Epoch 4/30
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0078 
Epoch 5/30
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0033 
Epoch 6/30
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0015 
Epoch 7/30
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 9.8539e-04
Epoch 8/30
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 8.7948e-04
Epoch 9/30
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 9.7320e-04 
Epoch 10/30
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 9.7845e-04 

In [11]:
for i in range(365):
    # Update features
    close_lag_1 = last_closes[-1]
    close_lag_2 = last_closes[-2]
    ma_3 = np.mean(last_closes[-2:] + [close_lag_1])
    ma_5 = np.mean((last_closes + [close_lag_1])[-5:])

    last_known_row['close_lag_1'] = close_lag_1
    last_known_row['close_lag_2'] = close_lag_2
    last_known_row['ma_3'] = ma_3
    last_known_row['ma_5'] = ma_5

    # Small variations
    last_known_row['v'] *= 1 + np.random.normal(0, 0.01)
    last_known_row['n'] *= 1 + np.random.normal(0, 0.01)
    last_known_row['vw'] *= 1 + np.random.normal(0, 0.005)

    # Scale
    input_features = scaler_X.transform(pd.DataFrame([last_known_row[features]], columns=features))


    input_features = input_features.reshape((1, 1, len(features)))

    # Predict
    scaled_prediction = model.predict(input_features)
    predicted_close = scaler_y.inverse_transform(scaled_prediction)[0][0]

    # Store prediction
    next_date = df.index[-1] + pd.Timedelta(days=i+1)
    future_predictions.append({'date': next_date, 'predicted_close': predicted_close})

    # Update for next iteration
    last_closes.append(predicted_close)
    last_closes = last_closes[-5:]
    last_known_row['close'] = predicted_close

# Show predictions
pred_df = pd.DataFrame(future_predictions)
print(pred_df)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 118ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 90ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 106ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 91ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 