In [2]:
pip install yfinance pandas numpy scikit-learn



### **Preprocessing**

In [4]:
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Define function to fetch stock data
def fetch_stock_data(ticker, start_date, end_date):
    stock_data = yf.download(ticker, start=start_date, end=end_date)
    return stock_data

# Fetch historical data for a stock, e.g., Apple (AAPL)
ticker = 'AAPL'
start_date = '2022-01-01'
end_date = '2023-01-01'
data = fetch_stock_data(ticker, start_date, end_date)

[*********************100%***********************]  1 of 1 completed


In [5]:
# Feature Engineering: Add Moving Averages and other indicators
def calculate_rsi(series, period=14):
    delta = series.diff(1)
    gain = (delta.where(delta > 0, 0)).rolling(window=period).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=period).mean()
    rs = gain / loss
    return 100 - (100 / (1 + rs))

def add_technical_indicators(df):
    df['SMA_20'] = df['Close'].rolling(window=20).mean()  # 20-day Simple Moving Average
    df['EMA_20'] = df['Close'].ewm(span=20, adjust=False).mean()  # 20-day Exponential Moving Average
    df['RSI_14'] = calculate_rsi(df['Close'], 14)  # 14-day RSI
    df['Volatility'] = df['Close'].rolling(window=20).std()  # 20-day Volatility
    df = df.dropna()  # Drop rows with NaN values resulting from rolling calculations
    return df


In [6]:
# Add technical indicators to data
data = add_technical_indicators(data)

# Define features and target for model training
data['Target'] = data['Close'].shift(-1)  # Predicting the next day's closing price
features = ['SMA_20', 'EMA_20', 'RSI_14', 'Volatility']
X = data[features].iloc[:-1]  # Exclude last row due to target shifting
y = data['Target'].iloc[:-1]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Target'] = data['Close'].shift(-1)  # Predicting the next day's closing price


## **Model Creation**

In [7]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize models
models = {
    "Linear Regression": LinearRegression(),
    "Random Forest": RandomForestRegressor(random_state=42),
    "Support Vector Regressor (SVR)": SVR(kernel='rbf')
}

In [8]:
# Train and evaluate each model
results = {}
for model_name, model in models.items():
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    results[model_name] = {
        "MAE": mean_absolute_error(y_test, predictions),
        "MSE": mean_squared_error(y_test, predictions),
        "R2 Score": r2_score(y_test, predictions)
    }

In [9]:
# Display the results
results_df = pd.DataFrame(results).T
print("Model Evaluation Results:")
print(results_df)

Model Evaluation Results:
                                     MAE        MSE  R2 Score
Linear Regression               2.985451  12.544824  0.878617
Random Forest                   2.627552  12.179282  0.882154
Support Vector Regressor (SVR)  5.814883  52.106504  0.495819
