In [None]:
!pip install numpy==1.23.5 pandas==1.5.3 yfinance --no-cache-dir



In [29]:
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler

In [30]:
# Fetch S&P 500 data from Yahoo Finance
sp500 = yf.Ticker("^GSPC")
df = sp500.history(start="2000-01-01") 
df

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2000-01-03 00:00:00-05:00,1469.250000,1478.000000,1438.359985,1455.219971,931800000,0.0,0.0
2000-01-04 00:00:00-05:00,1455.219971,1455.219971,1397.430054,1399.420044,1009000000,0.0,0.0
2000-01-05 00:00:00-05:00,1399.420044,1413.270020,1377.680054,1402.109985,1085500000,0.0,0.0
2000-01-06 00:00:00-05:00,1402.109985,1411.900024,1392.099976,1403.449951,1092300000,0.0,0.0
2000-01-07 00:00:00-05:00,1403.449951,1441.469971,1400.729980,1441.469971,1225200000,0.0,0.0
...,...,...,...,...,...,...,...
2025-01-23 00:00:00-05:00,6076.319824,6118.729980,6074.669922,6118.709961,4432250000,0.0,0.0
2025-01-24 00:00:00-05:00,6121.430176,6128.180176,6088.740234,6101.240234,4214250000,0.0,0.0
2025-01-27 00:00:00-05:00,5969.040039,6017.169922,5962.919922,6012.279785,5198750000,0.0,0.0
2025-01-28 00:00:00-05:00,6026.970215,6074.540039,5994.629883,6067.700195,4473640000,0.0,0.0


In [31]:
# Define target variable: 1 if price goes up, 0 if it goes down
df["Tomorrow_close"] = df["Close"].shift(-1)
df["Target"] = (df["Tomorrow_close"] > df["Close"]).astype(int)
df

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits,Tomorrow_close,Target
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2000-01-03 00:00:00-05:00,1469.250000,1478.000000,1438.359985,1455.219971,931800000,0.0,0.0,1399.420044,0
2000-01-04 00:00:00-05:00,1455.219971,1455.219971,1397.430054,1399.420044,1009000000,0.0,0.0,1402.109985,1
2000-01-05 00:00:00-05:00,1399.420044,1413.270020,1377.680054,1402.109985,1085500000,0.0,0.0,1403.449951,1
2000-01-06 00:00:00-05:00,1402.109985,1411.900024,1392.099976,1403.449951,1092300000,0.0,0.0,1441.469971,1
2000-01-07 00:00:00-05:00,1403.449951,1441.469971,1400.729980,1441.469971,1225200000,0.0,0.0,1457.599976,1
...,...,...,...,...,...,...,...,...,...
2025-01-23 00:00:00-05:00,6076.319824,6118.729980,6074.669922,6118.709961,4432250000,0.0,0.0,6101.240234,0
2025-01-24 00:00:00-05:00,6121.430176,6128.180176,6088.740234,6101.240234,4214250000,0.0,0.0,6012.279785,0
2025-01-27 00:00:00-05:00,5969.040039,6017.169922,5962.919922,6012.279785,5198750000,0.0,0.0,6067.700195,1
2025-01-28 00:00:00-05:00,6026.970215,6074.540039,5994.629883,6067.700195,4473640000,0.0,0.0,6039.310059,0


In [32]:
# Feature Engineering - Create Predictors
df["Return"] = df["Close"].pct_change()  # Daily return percentage
df["SMA_5"] = df["Close"].rolling(5).mean()  # 5-day Simple Moving Average
df["SMA_20"] = df["Close"].rolling(20).mean()  # 20-day Simple Moving Average
df["Volatility"] = df["Return"].rolling(10).std()  # Rolling standard deviation (Volatility)


In [33]:
# Drop rows with missing values caused by rolling calculations
df.dropna(inplace=True)


In [34]:
# Define predictors (X) and target (y)
predictors = ["Return", "SMA_5", "SMA_20", "Volatility", "Volume"]
X = df[predictors]
y = df["Target"]


In [35]:
# Split data into training (80%) and test (20%) sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)


In [36]:
# Scale the predictor variables
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)  # Fit on training data
X_test = scaler.transform(X_test)  # Transform test data


In [37]:
# Train a Random Forest model
model = RandomForestClassifier(n_estimators=200, max_depth=5, random_state=42)
model.fit(X_train, y_train)


In [38]:
# Make predictions
y_pred = model.predict(X_test)

# Evaluate Model Performance
accuracy = accuracy_score(y_test, y_pred)
print(f"📊 Model Accuracy: {accuracy:.4f}")  # Print accuracy score
print(classification_report(y_test, y_pred))  # Print detailed performance report


📊 Model Accuracy: 0.5358
              precision    recall  f1-score   support

           0       0.49      0.07      0.13       582
           1       0.54      0.93      0.68       676

    accuracy                           0.54      1258
   macro avg       0.51      0.50      0.40      1258
weighted avg       0.52      0.54      0.43      1258



In [39]:
# Get the latest day's data for prediction
latest_data = X.iloc[-1:].values  # Get the last row (latest data)
latest_data = scaler.transform(latest_data)  # Apply scaling

# Predict tomorrow's movement
tomorrow_prediction = model.predict(latest_data)

# Print prediction result
if tomorrow_prediction[0] == 1:
    print("📈 Prediction: The S&P 500 is likely to go UP tomorrow.")
else:
    print("📉 Prediction: The S&P 500 is likely to go DOWN tomorrow.")


📈 Prediction: The S&P 500 is likely to go UP tomorrow.




In [55]:
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler

# 📌 Step 1: Download S&P 500 Data
sp500 = yf.Ticker("^GSPC")
df = sp500.history(start="1990-01-01")   # Get 10 years of data

# 📌 Step 2: Feature Engineering (Predictors)
df["Tomorrow_Close"] = df["Close"].shift(-1)  # Next day's closing price
df["Target"] = (df["Tomorrow_Close"] > df["Close"]).astype(int)  # 1 if price goes up, 0 if down

# Create Technical Indicators
df["Return"] = df["Close"].pct_change()  # Daily return
df["SMA_5"] = df["Close"].rolling(5).mean()  # 5-day Simple Moving Average
df["SMA_20"] = df["Close"].rolling(20).mean()  # 20-day Moving Average
df["Volatility"] = df["Return"].rolling(10).std()  # Rolling volatility

# Drop rows with NaNs
df.dropna(inplace=True)

# 📌 Step 3: Define Features and Target
predictors = ["Return", "SMA_5", "SMA_20", "Volatility", "Volume"]
X = df[predictors]
y = df["Target"]

# 📌 Step 4: Train-Test Split (No Shuffling for Time-Series Data)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# 📌 Step 5: Normalize the Features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# 📌 Step 6: Train a Random Forest Model
model = RandomForestClassifier(n_estimators=200, max_depth=5, random_state=42)
model.fit(X_train, y_train)

# 📌 Step 7: Make Predictions & Evaluate Performance
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"📊 Model Accuracy: {accuracy:.4f}")
print(classification_report(y_test, y_pred))

# 📌 Step 8: Predict Tomorrow’s Market Movement
latest_data = X.iloc[-1:].values  # Get today's features
latest_data = scaler.transform(latest_data)
tomorrow_prediction = model.predict(latest_data)

if tomorrow_prediction[0] == 1:
    print("📈 Prediction: The S&P 500 is likely to go UP tomorrow.")
else:
    print("📉 Prediction: The S&P 500 is likely to go DOWN tomorrow.")


📊 Model Accuracy: 0.5437
              precision    recall  f1-score   support

           0       0.54      0.03      0.05       808
           1       0.54      0.98      0.70       956

    accuracy                           0.54      1764
   macro avg       0.54      0.50      0.38      1764
weighted avg       0.54      0.54      0.40      1764

📈 Prediction: The S&P 500 is likely to go UP tomorrow.


In [41]:
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler

# Fetch S&P 500 data
sp500 = yf.Ticker("^GSPC")
df = sp500.history(start="1990-01-01")

# Define target variable (1 if price goes up, 0 if it goes down)
df["Tomorrow_Close"] = df["Close"].shift(-1)
df["Target"] = (df["Tomorrow_Close"] > df["Close"]).astype(int)

# Feature Engineering
df["Return"] = df["Close"].pct_change()
df["SMA_5"] = df["Close"].rolling(5).mean()
df["SMA_20"] = df["Close"].rolling(20).mean()
df["Volatility"] = df["Return"].rolling(10).std()

# Add Advanced Indicators
df["RSI"] = df["Close"].rolling(14).apply(lambda x: 100 - (100 / (1 + np.mean(x[x > x.mean()]) / np.mean(x[x <= x.mean()]))))
df["EMA_12"] = df["Close"].ewm(span=12, adjust=False).mean()
df["EMA_26"] = df["Close"].ewm(span=26, adjust=False).mean()
df["MACD"] = df["EMA_12"] - df["EMA_26"]
df["Upper_Band"] = df["SMA_20"] + (df["Close"].rolling(20).std() * 2)
df["Lower_Band"] = df["SMA_20"] - (df["Close"].rolling(20).std() * 2)

df.dropna(inplace=True)

# Define predictors and target
predictors = ["Return", "SMA_5", "SMA_20", "Volatility", "RSI", "MACD", "Upper_Band", "Lower_Band", "Volume"]
X = df[predictors]
y = df["Target"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Normalize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train Random Forest model
model = RandomForestClassifier(n_estimators=500, max_depth=7, random_state=42)
model.fit(X_train, y_train)

# Make predictions & evaluate
y_pred = model.predict(X_test)
print(f"📊 Model Accuracy: {accuracy_score(y_test, y_pred):.4f}")
print(classification_report(y_test, y_pred))

# Predict tomorrow's movement
latest_data = X.iloc[-1:].values
latest_data = scaler.transform(latest_data)
tomorrow_prediction = model.predict(latest_data)

if tomorrow_prediction[0] == 1:
    print("📈 The S&P 500 is likely to go UP tomorrow.")
else:
    print("📉 The S&P 500 is likely to go DOWN tomorrow.")


📊 Model Accuracy: 0.5334
              precision    recall  f1-score   support

           0       0.47      0.18      0.27       806
           1       0.55      0.83      0.66       958

    accuracy                           0.53      1764
   macro avg       0.51      0.51      0.46      1764
weighted avg       0.51      0.53      0.48      1764

📈 The S&P 500 is likely to go UP tomorrow.




In [42]:
!pip install xgboost




In [27]:
import yfinance as yf
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score

# Specify the time period 
time_period = "30y" 

# 📌 Step 1: Fetch S&P 500 Data
sp500 = yf.Ticker("^GSPC")
df = sp500.history(period=time_period, interval="1d")  # Get data with daily granularity

# 📌 Step 2: Define Target Variable (1 if price goes up, 0 if down)
df["Tomorrow_Close"] = df["Close"].shift(-1)
df["Target"] = (df["Tomorrow_Close"] > df["Close"]).astype(int)

# 📌 Step 3: Feature Engineering
df["Return"] = df["Close"].pct_change()
df["SMA_5"] = df["Close"].rolling(5).mean()
df["SMA_20"] = df["Close"].rolling(20).mean()
df["Volatility"] = df["Return"].rolling(10).std()

# Drop NaN values (caused by rolling calculations)
df.dropna(inplace=True)

# 📌 Step 4: Define Predictors
predictors = ["Return", "SMA_5", "SMA_20", "Volatility", "Volume"]
X = df[predictors]
y = df["Target"]

# 📌 Step 5: Normalize the Data for LSTM
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# 📌 Step 6: Convert Data into Sequences for LSTM
def create_sequences(data, target, seq_length=10):
    X_seq, y_seq = [], []
    for i in range(len(data) - seq_length):
        X_seq.append(data[i:i+seq_length])
        y_seq.append(target[i+seq_length])
    return np.array(X_seq), np.array(y_seq)

seq_length = 10  # 🔹 Change this to 20 or 30 for longer-term patterns
X_lstm, y_lstm = create_sequences(X_scaled, y, seq_length)

# 📌 Step 7: Train-Test Split
train_size = int(len(X_lstm) * 0.8)
X_train, X_test = X_lstm[:train_size], X_lstm[train_size:]
y_train, y_test = y_lstm[:train_size], y_lstm[train_size:]

# 📌 Step 8: Build LSTM Model
model = Sequential([
    LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])),
    Dropout(0.2),
    LSTM(50, return_sequences=False),
    Dropout(0.2),
    Dense(1, activation="sigmoid")
])

# 📌 Step 9: Compile Model
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

# 📌 Step 10: Train the Model
model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))

# 📌 Step 11: Make Predictions
y_pred = model.predict(X_test)
y_pred = (y_pred > 0.5).astype(int)

# 📌 Step 12: Evaluate Accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"📊 LSTM Model Accuracy (Time Period: {time_period}): {accuracy:.4f}")

# 📌 Step 13: Predict Tomorrow's Movement
latest_data = X_scaled[-seq_length:].reshape(1, seq_length, X_scaled.shape[1])
tomorrow_prediction = model.predict(latest_data)

if tomorrow_prediction[0][0] > 0.5:
    print("📈 The S&P 500 is likely to go UP tomorrow.")
else:
    print("📉 The S&P 500 is likely to go DOWN tomorrow.")


Epoch 1/20


  super().__init__(**kwargs)


[1m252/252[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.5253 - loss: 0.6924 - val_accuracy: 0.5234 - val_loss: 0.6920
Epoch 2/20
[1m252/252[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.5259 - loss: 0.6922 - val_accuracy: 0.4612 - val_loss: 0.6969
Epoch 3/20
[1m252/252[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.5342 - loss: 0.6908 - val_accuracy: 0.5035 - val_loss: 0.6941
Epoch 4/20
[1m252/252[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.5358 - loss: 0.6911 - val_accuracy: 0.5224 - val_loss: 0.6922
Epoch 5/20
[1m252/252[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.5405 - loss: 0.6904 - val_accuracy: 0.5239 - val_loss: 0.6927
Epoch 6/20
[1m252/252[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.5381 - loss: 0.6902 - val_accuracy: 0.4572 - val_loss: 0.7019
Epoch 7/20
[1m252/252[0m [32m━━━━━━━

In [47]:
pip install ace-tools

Collecting ace-tools
  Obtaining dependency information for ace-tools from https://files.pythonhosted.org/packages/27/c4/402d3ae2ecbfe72fbdcb2769f55580f1c54a3ca110c44e1efc034516a499/ace_tools-0.0-py3-none-any.whl.metadata
  Downloading ace_tools-0.0-py3-none-any.whl.metadata (300 bytes)
Downloading ace_tools-0.0-py3-none-any.whl (1.1 kB)
Installing collected packages: ace-tools
Successfully installed ace-tools-0.0
Note: you may need to restart the kernel to use updated packages.


In [53]:
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler
import warnings


warnings.filterwarnings("ignore")  # Suppress warnings for clean output

# 🔹 Choose multiple stable stocks to analyze
ticker_symbols = ["JNJ", "PG", "KO", "MSFT", "SPY", "VIG", "XLV"]  # Add more if needed

# 📌 Store results in a DataFrame
results = []

# 📌 Loop through each stock
for ticker_symbol in ticker_symbols:
    try:
        # 📌 Step 1: Download Stock Data
        stock = yf.Ticker(ticker_symbol)
        df = stock.history(start="1990-01-01", interval="1d")  # Get daily data since 1990
        
        if df.empty:
            print(f"⚠️ No data found for {ticker_symbol}, skipping...")
            continue

        # 📌 Step 2: Feature Engineering (Predictors)
        df["Tomorrow_Close"] = df["Close"].shift(-1)  # Next day's closing price
        df["Target"] = (df["Tomorrow_Close"] > df["Close"]).astype(int)  # 1 if price goes up, 0 if down

        # Create Technical Indicators
        df["Return"] = df["Close"].pct_change()  # Daily return
        df["SMA_5"] = df["Close"].rolling(5).mean()  # 5-day Simple Moving Average
        df["SMA_20"] = df["Close"].rolling(20).mean()  # 20-day Moving Average
        df["Volatility"] = df["Return"].rolling(10).std()  # Rolling volatility

        # Drop rows with NaNs
        df.dropna(inplace=True)

        # 📌 Step 3: Define Features and Target
        predictors = ["Return", "SMA_5", "SMA_20", "Volatility", "Volume"]
        X = df[predictors]
        y = df["Target"]

        # 📌 Step 4: Train-Test Split (No Shuffling for Time-Series Data)
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

        # 📌 Step 5: Normalize the Features
        scaler = StandardScaler()
        X_train = scaler.fit_transform(X_train)
        X_test = scaler.transform(X_test)

        # 📌 Step 6: Train a Random Forest Model
        model = RandomForestClassifier(n_estimators=200, max_depth=5, random_state=42)
        model.fit(X_train, y_train)

        # 📌 Step 7: Make Predictions & Evaluate Performance
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)

        # 📌 Step 8: Predict Tomorrow’s Market Movement
        latest_data = X.iloc[-1:].values  # Get today's features
        latest_data = scaler.transform(latest_data)
        tomorrow_prediction = model.predict(latest_data)
        movement = "📈 UP" if tomorrow_prediction[0] == 1 else "📉 DOWN"

        # 📌 Store results
        results.append({"Stock": ticker_symbol, "Accuracy": accuracy, "Tomorrow's Prediction": movement})

    except Exception as e:
        print(f"⚠️ Error processing {ticker_symbol}: {e}")
        continue

# 📊 Display Results in a Table
results_df = pd.DataFrame(results)

results_df


Unnamed: 0,Stock,Accuracy,Tomorrow's Prediction
0,JNJ,0.512472,📈 UP
1,PG,0.462018,📉 DOWN
2,KO,0.528912,📈 UP
3,MSFT,0.537415,📈 UP
4,SPY,0.483209,📉 DOWN
5,VIG,0.517021,📈 UP
6,XLV,0.517557,📈 UP
