
Model Used: ChatGPT-4o


Prompt: 

Fed the whole CSV to ChatGPT and then the following prompt

Task:
Write Python code that builds a trading strategy to predict stock prices for the next 1 week using historical stock data. Use machine learning techniques to forecast future prices and make buy/sell/hold recommendations.

Instructions:

Use pandas to load and process the historical stock data (assume it's in CSV format).

Engineer useful features (e.g., moving averages, RSI, volume changes).

Split the data into training and testing sets.

Use a suitable regression model (e.g., RandomForestRegressor, XGBoost, or LSTM).

Train the model to predict the stock's price 5 trading days (1 week) into the future.

Include logic to evaluate model performance (e.g., RMSE, MAE).

Build simple logic for a trading strategy:

If predicted price is significantly higher than current → Buy

If predicted price is significantly lower → Sell

Else → Hold

Include a main() function that loads sample data, runs the pipeline, and outputs prediction and recommendation.

Constraints:

Use public Python packages only (sklearn, pandas, numpy, matplotlib, etc.).

Keep it modular with reusable functions.

Include inline comments for clarity.

Assume the CSV has columns: Date, Open, High, Low, Close, Volume.

the csv is stored in OHLCV_AAPL.csv 

Format:
Return only the Python code with effective comments explaining the code blocks 

    

In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

In [2]:
# Function to load and preprocess the data
def load_data(filepath):
    df = pd.read_csv(filepath, parse_dates=['Date'])
    df.sort_values('Date', inplace=True)
    df.reset_index(drop=True, inplace=True)
    return df

In [3]:
# Feature engineering: moving averages, RSI, and volume change
def add_features(df):
    df['MA_5'] = df['Close'].rolling(window=5).mean()
    df['MA_10'] = df['Close'].rolling(window=10).mean()
    df['MA_20'] = df['Close'].rolling(window=20).mean()

    delta = df['Close'].diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
    rs = gain / loss
    df['RSI'] = 100 - (100 / (1 + rs))

    df['Volume_Change'] = df['Volume'].pct_change()

    # Target: predict the Close price 5 days into the future
    df['Target'] = df['Close'].shift(-5)

    df.dropna(inplace=True)  # Drop rows with NaNs created by rolling/shift
    return df

In [4]:
# Train/test split and scaling
def prepare_data(df):
    features = ['Open', 'High', 'Low', 'Close', 'Volume', 'MA_5', 'MA_10', 'MA_20', 'RSI', 'Volume_Change']
    X = df[features]
    y = df['Target']

    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, shuffle=False)
    return X_train, X_test, y_train, y_test, scaler, features

In [5]:
# Train a regression model
def train_model(X_train, y_train):
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    return model

In [6]:
# Evaluate model performance
def evaluate_model(model, X_test, y_test):
    predictions = model.predict(X_test)
    rmse = mean_squared_error(y_test, predictions, squared=False)
    mae = mean_absolute_error(y_test, predictions)
    print(f"RMSE: {rmse:.2f}")
    print(f"MAE: {mae:.2f}")
    return predictions

In [7]:
def generate_recommendation(current_price, predicted_price, threshold=0.02):
    change_pct = (predicted_price - current_price) / current_price
    if change_pct > threshold:
        return "BUY"
    elif change_pct < -threshold:
        return "SELL"
    else:
        return "HOLD"

In [8]:
# Main function to run the full pipeline
def main():
    # Load and preprocess the data
    df = load_data("OHLCV_AAPL.csv")
    df = add_features(df)

    # Prepare data for model training
    X_train, X_test, y_train, y_test, scaler, feature_cols = prepare_data(df)

    # Train the model
    model = train_model(X_train, y_train)

    # Evaluate the model
    predictions = evaluate_model(model, X_test, y_test)

    # Get the latest feature data and current close price
    latest_data = df[feature_cols].iloc[-1:]
    latest_scaled = scaler.transform(latest_data)
    predicted_price = model.predict(latest_scaled)[0]
    current_price = df['Close'].iloc[-1]

    # Print prediction and recommendation
    print(f"Current Price: {current_price:.2f}")
    print(f"Predicted Price in 5 Days: {predicted_price:.2f}")
    recommendation = generate_recommendation(current_price, predicted_price)
    print(f"Trading Recommendation: {recommendation}")

In [9]:
# Run the pipeline
if __name__ == "__main__":
    main()

RMSE: 8.47
MAE: 7.40
Current Price: 130.71
Predicted Price in 5 Days: 137.76
Trading Recommendation: BUY
