In [1]:
import os
from alpaca.data.historical import CryptoHistoricalDataClient
from alpaca.data.requests import CryptoBarsRequest
from alpaca.data.timeframe import TimeFrame
from datetime import datetime  # Importing datetime module
import requests
import pandas as pd
from dotenv import load_dotenv  

load_dotenv()

# No keys required for crypto data
# client = CryptoHistoricalDataClient()

# # Creating request object
# request_params = CryptoBarsRequest(
#     symbol_or_symbols=["BTC/USD"],
#     timeframe=TimeFrame.Day,
#     start=datetime(2022, 9, 1),
#     end=datetime(2022, 9, 20)
# )

# # Retrieve daily bars for Bitcoin in a DataFrame and printing it
# btc_bars = client.get_crypto_bars(request_params)

# df = btc_bars.df
# print(df.head(2))

# Retrieve API keys from environment variables
api_key = os.getenv("APCA_API_KEY_ID")
api_secret = os.getenv("APCA_API_SECRET_KEY")

url = "https://data.alpaca.markets/v2/stocks/bars?symbols=AAPL&timeframe=1Day&start=2024-01-03T00%3A00%3A00Z&end=2025-04-04T00%3A00%3A00Z&limit=10000&adjustment=raw&feed=sip&sort=asc"

headers = {
    "accept": "application/json",
    "APCA-API-KEY-ID": api_key,
    "APCA-API-SECRET-KEY": api_secret
}

response = requests.get(url, headers=headers)

print(response.text)


bars = response.json()["bars"]["AAPL"]
df = pd.DataFrame(bars)
df.to_csv("AAPL.csv")
# Convert the response as DataFrame
#df = pd.DataFrame(response.json()['bars'])
# print(df.head(2))

{"bars":{"AAPL":[{"c":184.25,"h":185.88,"l":183.43,"n":656956,"o":184.22,"t":"2024-01-03T05:00:00Z","v":58418916,"vw":184.319693},{"c":181.91,"h":183.0872,"l":180.88,"n":712850,"o":182.15,"t":"2024-01-04T05:00:00Z","v":71992243,"vw":182.013085},{"c":181.18,"h":182.76,"l":180.17,"n":682335,"o":181.99,"t":"2024-01-05T05:00:00Z","v":62379661,"vw":181.483937},{"c":185.56,"h":185.6,"l":181.5,"n":669304,"o":182.085,"t":"2024-01-08T05:00:00Z","v":59151720,"vw":184.400869},{"c":185.14,"h":185.15,"l":182.73,"n":538297,"o":183.92,"t":"2024-01-09T05:00:00Z","v":42848219,"vw":184.364122},{"c":186.19,"h":186.4,"l":183.92,"n":554884,"o":184.35,"t":"2024-01-10T05:00:00Z","v":46797681,"vw":185.223846},{"c":185.59,"h":187.05,"l":183.62,"n":584114,"o":186.54,"t":"2024-01-11T05:00:00Z","v":49133996,"vw":185.022217},{"c":185.92,"h":186.74,"l":185.19,"n":477050,"o":186.06,"t":"2024-01-12T05:00:00Z","v":40477782,"vw":185.818208},{"c":183.63,"h":184.26,"l":180.934,"n":767431,"o":182.16,"t":"2024-01-16T05:00:

In [2]:
%pip install xgboost scikit-learn

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.3.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [11]:
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from datetime import datetime, timedelta


df = pd.read_csv("AAPL.csv")

print(df.head(3))




   Unnamed: 0       c         h       l       n       o                     t  \
0           0  184.25  185.8800  183.43  656956  184.22  2024-01-03T05:00:00Z   
1           1  181.91  183.0872  180.88  712850  182.15  2024-01-04T05:00:00Z   
2           2  181.18  182.7600  180.17  682335  181.99  2024-01-05T05:00:00Z   

          v          vw  
0  58418916  184.319693  
1  71992243  182.013085  
2  62379661  181.483937  


In [None]:
# === Clean & Prepare ===
df = df.rename(columns={
    't': 'timestamp',
    'o': 'open',
    'h': 'high',
    'l': 'low',
    'c': 'close',
    'v': 'volume',
    'n': 'trade_count',
    'vw': 'vwap'
})

df['timestamp'] = pd.to_datetime(df['timestamp'])
df.set_index('timestamp', inplace=True)

# === Feature Engineering ===
df['close_lag_1'] = df['close'].shift(1)
df['close_lag_2'] = df['close'].shift(2)
df['ma_3'] = df['close'].rolling(window=3).mean()
df['ma_5'] = df['close'].rolling(window=5).mean()

df.dropna(inplace=True)

# === Features & Target ===
features = ['open', 'high', 'low', 'volume', 'trade_count', 'vwap', 'close_lag_1', 'close_lag_2', 'ma_3', 'ma_5']
X = df[features]
y = df['close']

# === Train Model on Entire Historical Data ===
model = XGBRegressor(n_estimators=200, max_depth=4, learning_rate=0.1)
model.fit(X, y)

# === Forecast Next 30 Days ===
last_row = df.iloc[-1]
future_preds = []
future_dates = []

for i in range(30):
    # Create next row input
    new_date = df.index[-1] + timedelta(days=i + 1)
    future_dates.append(new_date)

    input_data = {
        'open': last_row['open'],
        'high': last_row['high'],
        'low': last_row['low'],
        'volume': last_row['volume'],
        'trade_count': last_row['trade_count'],
        'vwap': last_row['vwap'],
        'close_lag_1': last_row['close'],
        'close_lag_2': last_row['close_lag_1'],
        'ma_3': df['close'][-3:].mean(),
        'ma_5': df['close'][-5:].mean()
    }

    input_df = pd.DataFrame([input_data])
    predicted_close = model.predict(input_df)[0]

    # Save prediction
    future_preds.append(predicted_close)

    # Update last_row for next iteration
    new_row = last_row.copy()
    new_row['close_lag_2'] = new_row['close_lag_1']
    new_row['close_lag_1'] = predicted_close
    new_row['close'] = predicted_close
    df.loc[new_date] = new_row
    last_row = new_row

# === Show Forecast ===
forecast_df = pd.DataFrame({
    'date': future_dates,
    'predicted_close': future_preds
})
print(forecast_df)


                        date  predicted_close
0  2025-04-04 04:00:00+00:00       204.139877
1  2025-04-06 04:00:00+00:00       204.151199
2  2025-04-09 04:00:00+00:00       204.106033
3  2025-04-13 04:00:00+00:00       204.073563
4  2025-04-18 04:00:00+00:00       204.073563
5  2025-04-24 04:00:00+00:00       204.073563
6  2025-05-01 04:00:00+00:00       204.073563
7  2025-05-09 04:00:00+00:00       204.073563
8  2025-05-18 04:00:00+00:00       204.073563
9  2025-05-28 04:00:00+00:00       204.073563
10 2025-06-08 04:00:00+00:00       204.073563
11 2025-06-20 04:00:00+00:00       204.073563
12 2025-07-03 04:00:00+00:00       204.073563
13 2025-07-17 04:00:00+00:00       204.073563
14 2025-08-01 04:00:00+00:00       204.073563
15 2025-08-17 04:00:00+00:00       204.073563
16 2025-09-03 04:00:00+00:00       204.073563
17 2025-09-21 04:00:00+00:00       204.073563
18 2025-10-10 04:00:00+00:00       204.073563
19 2025-10-30 04:00:00+00:00       204.073563
20 2025-11-20 04:00:00+00:00      

In [18]:
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Load original data
df = pd.read_csv("AAPL.csv")

# Convert time column and sort
df['t'] = pd.to_datetime(df['t'])
df = df.sort_values('t')
df.set_index('t', inplace=True)

# Feature engineering
df['close'] = df['c']
df['close_lag_1'] = df['close'].shift(1)
df['close_lag_2'] = df['close'].shift(2)
df['ma_3'] = df['close'].rolling(window=3).mean()
df['ma_5'] = df['close'].rolling(window=5).mean()

# Drop rows with NaN values after feature creation
df.dropna(inplace=True)

# Define features and target
features = ['close_lag_1', 'close_lag_2', 'ma_3', 'ma_5', 'v', 'n', 'vw']
target = 'close'

X = df[features]
y = df[target]

# Train the model
model = XGBRegressor(n_estimators=100, learning_rate=0.1, random_state=42)
model.fit(X, y)

# ---- Begin Future Prediction ---- #

future_predictions = []
last_known_date = df.index[-1]
last_known_row = df.iloc[-1].copy()

for i in range(30):
    # Create a new date (next day)
    new_date = last_known_date + pd.Timedelta(days=1)

    # Create a copy of last known row for new prediction
    new_row = last_known_row.copy()

    # Shift lag values
    new_row['close_lag_2'] = new_row['close_lag_1']
    new_row['close_lag_1'] = new_row['close']

    # Update moving averages manually
    last_closes = [new_row['close_lag_2'], new_row['close_lag_1'], new_row['close']]
    new_row['ma_3'] = np.mean(last_closes)

    last_5_closes = last_closes + [new_row['close']]
    new_row['ma_5'] = np.mean(last_5_closes[-5:])

    # Predict next close value
    input_row = new_row[features].values.reshape(1, -1)
    predicted_close = model.predict(input_row)[0]

    # Save prediction
    future_predictions.append({
        'date': new_date,
        'predicted_close': predicted_close
    })

    # Update new_row with predicted value
    new_row['close'] = predicted_close

    # Add small variation to other features
    new_row['v'] *= 1 + np.random.normal(0, 0.01)   # ±1% noise
    new_row['n'] *= 1 + np.random.normal(0, 0.01)
    new_row['vw'] *= 1 + np.random.normal(0, 0.005)

    # Set this as the row for the next prediction
    last_known_row = new_row.copy()
    last_known_date = new_date

# Create DataFrame with predictions
pred_df = pd.DataFrame(future_predictions)
print(pred_df)

# Optional: Save predictions to CSV
# pred_df.to_csv("future_aapl_predictions.csv", index=False)


                        date  predicted_close
0  2025-04-04 04:00:00+00:00       203.278793
1  2025-04-05 04:00:00+00:00       203.173355
2  2025-04-06 04:00:00+00:00       203.173355
3  2025-04-07 04:00:00+00:00       203.173355
4  2025-04-08 04:00:00+00:00       203.173355
5  2025-04-09 04:00:00+00:00       203.173355
6  2025-04-10 04:00:00+00:00       203.173355
7  2025-04-11 04:00:00+00:00       203.173355
8  2025-04-12 04:00:00+00:00       203.116364
9  2025-04-13 04:00:00+00:00       203.116364
10 2025-04-14 04:00:00+00:00       205.236938
11 2025-04-15 04:00:00+00:00       203.116364
12 2025-04-16 04:00:00+00:00       207.019577
13 2025-04-17 04:00:00+00:00       207.550781
14 2025-04-18 04:00:00+00:00       207.624283
15 2025-04-19 04:00:00+00:00       208.043381
16 2025-04-20 04:00:00+00:00       208.043381
17 2025-04-21 04:00:00+00:00       208.043381
18 2025-04-22 04:00:00+00:00       208.026871
19 2025-04-23 04:00:00+00:00       208.814621
20 2025-04-24 04:00:00+00:00      