In [None]:
!pip install yfinance scikit-learn pandas numpy matplotlib

In [1]:
import yfinance as yf
import pandas as pd

# Fetch historical data for a stock (e.g., AAPL)
stock_data = yf.download("AAPL", start="2020-01-01", end="2023-01-01")

# Display the first few rows
print(stock_data.head())


[*********************100%%**********************]  1 of 1 completed

                 Open       High        Low      Close  Adj Close     Volume
Date                                                                        
2020-01-02  74.059998  75.150002  73.797501  75.087502  72.876114  135480400
2020-01-03  74.287498  75.144997  74.125000  74.357498  72.167587  146322800
2020-01-06  73.447502  74.989998  73.187500  74.949997  72.742630  118387200
2020-01-07  74.959999  75.224998  74.370003  74.597504  72.400551  108872000
2020-01-08  74.290001  76.110001  74.290001  75.797501  73.565193  132079200





In [2]:
# Create a simple moving average (SMA) and daily return
stock_data['SMA_20'] = stock_data['Close'].rolling(window=20).mean()
stock_data['Daily_Return'] = stock_data['Close'].pct_change()

# Drop rows with NaN values
stock_data.dropna(inplace=True)

# Define the target variable (1 for price increase, 0 for decrease)
stock_data['Target'] = (stock_data['Close'].shift(-1) > stock_data['Close']).astype(int)

# Select features and target
features = ['SMA_20', 'Daily_Return']
X = stock_data[features]
y = stock_data['Target']


In [3]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize the model
model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")


Model Accuracy: 0.49


In [5]:
import numpy as np

# Add predictions to the original dataset with correct alignment
stock_data['Prediction'] = np.nan

# Align the predictions with the correct indices
predictions_df = pd.DataFrame(data={'Prediction': y_pred}, index=X_test.index)
stock_data.update(predictions_df)

# Simulate a simple backtest strategy
initial_balance = 10000
balance = initial_balance
shares = 0

for i in range(len(stock_data) - 1):
    if stock_data['Prediction'].iloc[i] == 1:
        # Buy stock
        shares = balance / stock_data['Close'].iloc[i]
        balance = 0
    elif shares > 0:
        # Sell stock
        balance = shares * stock_data['Close'].iloc[i]
        shares = 0

# Final balance
final_balance = balance + (shares * stock_data['Close'].iloc[-1])
print(f"Final Balance: {final_balance:.2f}, Profit: {final_balance - initial_balance:.2f}")


Final Balance: 0.00, Profit: -10000.00
