<a href="https://colab.research.google.com/github/csce585-mlsystems/CSCE585ProjectROI/blob/revisedModel/src/Classification_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [49]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [50]:
import yfinance as yf
import datetime

In [51]:
#Pipeline to enter data
sp500 = yf.Ticker("^GSPC")
print(sp500.history(period="1y"))

                                  Open         High          Low        Close  \
Date                                                                            
2024-12-09 00:00:00-05:00  6083.009766  6088.509766  6048.629883  6052.850098   
2024-12-10 00:00:00-05:00  6057.589844  6065.399902  6029.890137  6034.910156   
2024-12-11 00:00:00-05:00  6060.149902  6092.589844  6060.149902  6084.189941   
2024-12-12 00:00:00-05:00  6074.290039  6079.680176  6051.250000  6051.250000   
2024-12-13 00:00:00-05:00  6068.169922  6078.580078  6035.770020  6051.089844   
...                                ...          ...          ...          ...   
2025-12-02 00:00:00-05:00  6830.959961  6851.549805  6806.709961  6829.370117   
2025-12-03 00:00:00-05:00  6815.290039  6862.419922  6810.430176  6849.720215   
2025-12-04 00:00:00-05:00  6866.470215  6866.470215  6827.120117  6857.120117   
2025-12-05 00:00:00-05:00  6866.319824  6895.779785  6858.290039  6870.399902   
2025-12-08 00:00:00-05:00  6

In [52]:
def get_live_price(symbol):
    ticker = yf.Ticker(symbol)
    # Fetch just the last 1 minute of data
    data = ticker.history(period="1d", interval="1m")
    if not data.empty:
        return data['Close'].iloc[-1]
    else:
        return None

print(f"Starting live stream for {sp500.ticker} (Press Ctrl+C to stop)...")

try:
    while True:
        price = get_live_price(sp500.ticker)
        now = datetime.datetime.now().strftime("%H:%M:%S")

        if price:
            print(f"[{now}] {sp500.ticker} Price: ${price:.2f}")
        else:
            print(f"[{now}] Failed to fetch data.")

        # Sleep for 10 seconds to avoid getting rate-limited (blocked)
        import time # Import time module if not already imported
        time.sleep(10)

except KeyboardInterrupt:
    print("\nStream stopped.")

Starting live stream for ^GSPC (Press Ctrl+C to stop)...
[02:47:02] ^GSPC Price: $6847.84
[02:47:12] ^GSPC Price: $6847.84

Stream stopped.


In [53]:
import pandas as pd
import time
# get metrics
def getMetrics(tickers):
    rows = []
    for i, ticker in enumerate(tickers):
        try:
            stock = yf.Ticker(ticker)
            info = stock.info
            sector = info.get("sector")
            # making sure we always have a pe
            pe = info.get("trailingPE")
            if pe is None:
                pe = info.get("forwardPE")
            pb = info.get("priceToBook")
            de = info.get("debtToEquity")
            fcf = info.get("freeCashflow")
            rows.append(
                {
                    "ticker": ticker,
                    "sector": sector,
                    "pe": pe,
                    "pb": pb,
                    "de": de,
                    "fcf": fcf,
                }
            )
        except Exception as e:
            # print [specific ticker], prob: [the problem]
            print(f" {ticker} , prob: {e}")
        # so that i never get a super irritating 429 error msg again...
        time.sleep(0.1)
    metrics = pd.DataFrame(rows)
    return metrics

# Example usage:
tickers_to_check = ["AAPL", "GOOG", "MSFT"]
stock_metrics = getMetrics(tickers_to_check)
print(stock_metrics)

  ticker                  sector         pe         pb       de          fcf
0   AAPL              Technology  37.200806  55.678223  152.411  78862254080
1   GOOG  Communication Services  31.041462   9.816440   11.424  47997751296
2   MSFT              Technology  34.923183  10.053644   33.154  53327376384


In [54]:
# Get historical data from sp500
sp500_history = sp500.history(period="1y")

# Define features (X) and target (y) for a classification task
# For example, let's predict if the close price is higher than the open price
# X will be the open, high, low, and close prices
X = sp500_history[['Open', 'High', 'Low', 'Close']]
# y will be 1 if Close > Open, else 0
y = (sp500_history['Close'] > sp500_history['Open']).astype(int)

# Ensure X and y have the same number of samples after any potential operations
# (e.g., if there were NaNs from shifting, though not directly in this example)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Instantiate and train the model
model = LogisticRegression(max_iter=200) # Increased max_iter for convergence
model.fit(X_train, y_train)

In [55]:
# Make predictions
y_pred = model.predict(X_test)

# Evaluate performance
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.95


In [56]:
def train_and_evaluate_model(ticker_symbol):
    # Fetch historical data
    ticker = yf.Ticker(ticker_symbol)
    history = ticker.history(period="1y")

    # Define features (X) and target (y)
    X = history[['Open', 'High', 'Low', 'Close']]
    y = (history['Close'] > history['Open']).astype(int)

    # Split data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

    # Instantiate and train the model
    model = LogisticRegression(max_iter=200)
    model.fit(X_train, y_train)

    # Make predictions
    y_pred = model.predict(X_test)

    # Evaluate performance
    accuracy = accuracy_score(y_test, y_pred)

    return model, accuracy

print("Defined the 'train_and_evaluate_model' function.")

Defined the 'train_and_evaluate_model' function.


In [57]:
results = {}

for ticker_symbol in tickers_to_check:
    print(f"\nTraining and evaluating model for {ticker_symbol}...")
    model, accuracy = train_and_evaluate_model(ticker_symbol)
    results[ticker_symbol] = accuracy
    print(f"Accuracy for {ticker_symbol}: {accuracy:.2f}")

print("\n--- Overall Results ---")
for ticker, acc in results.items():
    print(f"{ticker}: Accuracy = {acc:.2f}")


Training and evaluating model for AAPL...
Accuracy for AAPL: 0.99

Training and evaluating model for GOOG...
Accuracy for GOOG: 1.00

Training and evaluating model for MSFT...
Accuracy for MSFT: 0.96

--- Overall Results ---
AAPL: Accuracy = 0.99
GOOG: Accuracy = 1.00
MSFT: Accuracy = 0.96


In [58]:
def calculate_custom_score(ticker_symbol):
    score = 0
    max_score = 4 # 1 point for each metric met

    # 1. P/E Ratio: Prefer below 20 (Undervalued)
    if  pb and 0 < pe < 20:
        score += 1

    # 2. P/B Ratio: Prefer below 3 (Good value relative to assets)
    if pb and history['pb_ratio'] < 3:
        score += 1

    # 3. Debt-to-Equity: Prefer below 100 (Low leverage)
    # Note: Yahoo returns this as a percentage (e.g., 50.0 = 0.5)
    if data['debt_to_equity'] and data['debt_to_equity'] < 100:
        score += 1

    # 4. Free Cash Flow: Must be positive
    if data['fcf'] and data['fcf'] > 0:
        score += 1

    # Normalize to a 10-point scale
    final_smart_score = (score / max_score) * 10
    return final_smart_score

In [60]:
def calculate_custom_score(pe, pb, de, fcf): # Modified function signature
    score = 0
    max_score = 4 # 1 point for each metric met

    # 1. P/E Ratio: Prefer below 20 (Undervalued)
    if pe is not None and 0 < pe < 20:
        score += 1

    # 2. P/B Ratio: Prefer below 3 (Good value relative to assets)
    if pb is not None and pb < 3:
        score += 1

    # 3. Debt-to-Equity: Prefer below 100 (Low leverage)
    # Note: Yahoo returns this as a percentage (e.g., 50.0 = 0.5)
    if de is not None and de < 100:
        score += 1

    # 4. Free Cash Flow: Must be positive
    if fcf is not None and fcf > 0:
        score += 1

    # Normalize to a 10-point scale
    final_smart_score = (score / max_score) * 10
    return final_smart_score

In [61]:
print("\n--- Custom Scores ---")
for index, row in stock_metrics.iterrows():
    ticker = row['ticker']
    pe_ratio = row['pe']
    pb_ratio = row['pb']
    de_ratio = row['de']
    fcf_value = row['fcf']

    custom_score = calculate_custom_score(pe_ratio, pb_ratio, de_ratio, fcf_value)
    print(f"{ticker}: Custom Score = {custom_score:.2f}")


--- Custom Scores ---
AAPL: Custom Score = 2.50
GOOG: Custom Score = 5.00
MSFT: Custom Score = 5.00
