# ML Stock Predictor

A machine learning model to predict stock prices using historical data.

In [None]:
# Essential imports
import pandas as pd
import numpy as np
import yfinance as yf
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

In [None]:
# Configuration
STOCK_SYMBOL = 'AAPL'  # Change to desired stock
PERIOD = '2y'  # Data period
PREDICTION_DAYS = 30  # Days to use for prediction

In [None]:
# Load stock data
stock = yf.Ticker(STOCK_SYMBOL)
data = stock.history(period=PERIOD)
print(f"Data shape: {data.shape}")
data.head()

In [None]:
# Feature engineering
data['MA_5'] = data['Close'].rolling(window=5).mean()
data['MA_20'] = data['Close'].rolling(window=20).mean()
data['RSI'] = 100 - (100 / (1 + data['Close'].pct_change().rolling(14).apply(lambda x: x[x>0].mean() / abs(x[x<0].mean()))))
data['Price_Change'] = data['Close'].pct_change()
data['Target'] = data['Close'].shift(-1)  # Next day's closing price

# Drop NaN values
data = data.dropna()
print(f"Data after preprocessing: {data.shape}")

In [None]:
# Prepare features and target
features = ['Open', 'High', 'Low', 'Close', 'Volume', 'MA_5', 'MA_20', 'RSI', 'Price_Change']
X = data[features]
y = data['Target']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(f"Training set: {X_train.shape}, Test set: {X_test.shape}")

In [None]:
# Train model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse:.4f}")
print(f"R² Score: {r2:.4f}")

In [None]:
# Visualize predictions
plt.figure(figsize=(12, 6))
plt.scatter(y_test, y_pred, alpha=0.6)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=2)
plt.xlabel('Actual Price')
plt.ylabel('Predicted Price')
plt.title(f'{STOCK_SYMBOL} Stock Price Prediction')
plt.show()

In [None]:
# Feature importance
importance = pd.DataFrame({
    'feature': features,
    'importance': model.feature_importances_
}).sort_values('importance', ascending=False)

plt.figure(figsize=(10, 6))
plt.barh(importance['feature'], importance['importance'])
plt.xlabel('Importance')
plt.title('Feature Importance')
plt.show()

print(importance)