<a href="https://colab.research.google.com/github/eltonaguiar/LoanerLab/blob/main/cheapwinners_plus_majorbigfish.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import logging
import time
import argparse

import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import time
from typing import Dict, List, Optional
from scipy import stats
import logging
import yfinance as yf
import requests
from sklearn.linear_model import LinearRegression

# Config
CONFIG = {
    'ALPHA_VANTAGE_KEY': "your_key_here",
    'STOCK_API_PROVIDER': "alpha_vantage",
    'RISK_FREE_RATE': 0.05,
    'MAX_HISTORY_DAYS': 90,
    'STOCK_API_CALLS_PER_MINUTE': 5,
    'TARGET_STOCKS': 15,
    'MAX_PRICE_THRESHOLD': 8.0,
    'PREDICTION_HORIZONS': {
        'hours': 6,
        'days': 3,
        'weeks': 2
    }
}


def rate_limit_api_calls():
    """Implement rate limiting for API calls"""
    time.sleep(60 / CONFIG['STOCK_API_CALLS_PER_MINUTE'])

def get_us_market_stocks() -> List[str]:
    logging.info("Fetching list of US market stocks...")
    fallback_stocks = [
        # Existing stocks
        "RVYL", "OTRK", "IDN", "TUYA", "AIRG", "SNDL", "CPRX", "FCEL", "NAK",
        "CIDM", "GSAT", "SOLO", "IDEX", "GEVO", "ONTX",
        # Large caps
        "AAPL", "TSLA", "NVDA", "AMZN", "GOOGL", "MSFT", "META", "AMD", "XOM",
        "JNJ", "JPM", "BLK", "DAL",
        # New additions (deduplicated)
        "COUR", "GHG", "HGBL", "HPE", "DTCK", "IQ", "LFST", "NGD", "PTON", "PAYO",
        "EMX", "TARA", "TGB", "MASS", "BLDE", "BZUN", "CRDF", "PLBY", "PRCH",
        "CVRX", "NEOG", "CRVO", "DYN", "VREX", "HPK", "NTIC", "RGNX", "SUNS",
        "LUNG", "ZYME", "RDFN", "FOLD", "IVR", "BOOM", "MAZE", "ALMS", "DEA",
        "NRIX", "CRGY"
    ]
    return list(set(fallback_stocks))

def get_stock_data(symbol: str, days: int = 90) -> Optional[pd.DataFrame]:
    logging.info(f"Fetching data for {symbol} (past {days} days)...")
    rate_limit_api_calls()
    end_date = datetime.now()
    start_date = end_date - timedelta(days=days)

    try:
        if CONFIG['STOCK_API_PROVIDER'] == 'alpha_vantage':
            url = f"https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol={symbol}&outputsize=full&apikey={CONFIG['ALPHA_VANTAGE_KEY']}"
            response = requests.get(url)
            data = response.json()

            if 'Time Series (Daily)' not in data:
                raise ValueError("Invalid or missing data in Alpha Vantage response")

            df = pd.DataFrame(data['Time Series (Daily)']).T
            df.columns = ['open', 'high', 'low', 'close', 'volume']
            df = df.apply(pd.to_numeric)
            df.index = pd.to_datetime(df.index)
            return df.sort_index().loc[df.index >= start_date.strftime('%Y-%m-%d')]

    except Exception as e:
        logging.warning(f"Alpha Vantage failed for {symbol}, falling back to Yahoo: {e}")

    try:
        stock = yf.Ticker(symbol)
        df = stock.history(start=start_date.strftime('%Y-%m-%d'), end=end_date.strftime('%Y-%m-%d'))
        df.columns = [col.lower() for col in df.columns]
        return df if not df.empty else None
    except Exception as e:
        logging.error(f"Failed to fetch {symbol}: {e}")
        return None




def calculate_volatility(stock_data: pd.DataFrame) -> float:
    """Annualized volatility based on daily returns"""
    returns = stock_data['close'].pct_change()
    return returns.std() * np.sqrt(252)

def sharpe_ratio(stock_data: pd.DataFrame, risk_free_rate: float = CONFIG['RISK_FREE_RATE']) -> float:
    returns = stock_data['close'].pct_change()
    avg_return = returns.mean() * 252
    volatility = calculate_volatility(stock_data)
    return (avg_return - risk_free_rate) / volatility if volatility else 0

def max_drawdown(stock_data: pd.DataFrame) -> float:
    cumulative = (1 + stock_data['close'].pct_change().fillna(0)).cumprod()
    peak = cumulative.cummax()
    drawdown = (cumulative - peak) / peak
    return drawdown.min()

def predict_stock_movement(stock_data: pd.DataFrame, horizon: str = 'days') -> Dict:
    X = np.arange(len(stock_data)).reshape(-1, 1)
    y = stock_data['close'].values
    model = LinearRegression()
    model.fit(X, y)

    future_steps = CONFIG['PREDICTION_HORIZONS'].get(horizon, 3)
    future_price = model.predict(np.array([[len(stock_data) + future_steps]]))[0]
    last_price = stock_data['close'].iloc[-1]
    movement_pct = ((future_price - last_price) / last_price) * 100

    return {
        'symbol': stock_data.name if hasattr(stock_data, 'name') else 'Unknown',
        'current_price': last_price,
        'predicted_price': future_price,
        'movement_percentage': movement_pct,
        'direction': 'UP' if movement_pct > 0 else 'DOWN'
    }

def assess_stock_risk(symbol: str, stock_data: pd.DataFrame) -> Dict:
    volatility = calculate_volatility(stock_data)
    sharpe = sharpe_ratio(stock_data)
    drawdown = max_drawdown(stock_data)

    score = (volatility * 50) + (abs(drawdown) * 30) - (sharpe * 20)
    categories = {
        'Very Low Risk': (0, 10),
        'Low Risk': (10, 25),
        'Moderate Risk': (25, 50),
        'High Risk': (50, 75),
        'Very High Risk': (75, 100)
    }

    risk_category = next((cat for cat, (low, high) in categories.items() if low <= score < high), 'Extreme Risk')

    return {
        'symbol': symbol,
        'volatility': volatility,
        'sharpe_ratio': sharpe,
        'max_drawdown': drawdown,
        'risk_score': score,
        'risk_category': risk_category
    }


def explain_prediction(stock):
    """
    Generate a multi-level explanation for why a stock is recommended
    """
    explanation = f"\n\nExplanation for {stock['symbol']} Recommendation:\n"
    explanation += f"1. 🧠 High School Level: This stock's price has been going up steadily. Our model thinks it will keep going up, and it's not very risky compared to others.\n"
    explanation += f"2. 👨‍💻 Technical (Coders): Linear regression on {stock['symbol']}'s closing price showed a positive slope. This trend, extrapolated, suggests a future increase.\n"
    explanation += f"3. 📊 Statistical Insight: We regressed closing prices (y) on time (t), observed a significant upward trend.\n"
    explanation += f"   With a Sharpe ratio of {stock['sharpe_ratio']:.2f} and drawdown of {stock['max_drawdown']:.2f}, this asset shows favorable risk-adjusted return.\n"
    explanation += f"   Risk category: {stock['risk_category']}\n"
    explanation += "\nPotential Enhancements:\n"
    explanation += "- Replace regression with XGBoost using volume, RSI, and news sentiment\n"
    explanation += "- Add earnings surprise filter and insider trade flag\n"
    return explanation

def display_intro():
    print("\n====================")
    print("📈 STOCK PREDICTOR OVERVIEW")
    print("====================")
    print("This script analyzes U.S. stocks (primarily under $8, but also top tech picks) using:")
    print("- 📊 Linear Regression to predict short-term price trends")
    print("- ⚖️ Sharpe Ratio, Volatility, Max Drawdown to assess risk")
    print("- 🔎 Filters to select low-risk, high-upside candidates")
    print("Goal: Identify undervalued stocks likely to move up over the next few days.")
    print(f"Data range pulled: Last {CONFIG['MAX_HISTORY_DAYS']} days")
    print("API Priority: Alpha Vantage → Yahoo Finance fallback\n")

def main(verbose: bool = False):
    logging.basicConfig(level=logging.INFO)
    print("🔍 Starting stock analysis...")
    display_intro()
    print("📦 Fetching stock list...")



    symbols = get_us_market_stocks()
    print(f"✅ Retrieved {len(symbols)} stock symbols. Beginning data collection and prediction...")

    if not symbols:
        print("\nERROR: No stock symbols were retrieved. Please check your internet connection and API key.")
        return

    print("🔄 Processing stock symbols, please wait...")
    start_time = time.time()
    processed = 0
    total_symbols = len(symbols)
    eligible_stocks = []

    for idx, symbol in enumerate(symbols, 1):
        try:
            processed += 1
            elapsed = time.time() - start_time
            est_total_time = (elapsed / processed) * total_symbols if processed > 0 else 0
            est_remaining = est_total_time - elapsed
            print(f"⏳ [{processed}/{total_symbols}] Processing {symbol} | Est. Time Left: {int(est_remaining)}s")
            print(f"   ↪️ Pulling last {CONFIG['MAX_HISTORY_DAYS']} days of price data...")
            print("   📌 Goal: Predict short-term direction and evaluate risk profile")



            if verbose:
                logging.info(f"Processing stock {processed}/{total_symbols}: {symbol}")

            stock_data = get_stock_data(symbol, days=CONFIG['MAX_HISTORY_DAYS'])
            if stock_data is None or stock_data.empty:
                continue

            current_price = stock_data['close'].iloc[-1]
            if current_price > CONFIG['MAX_PRICE_THRESHOLD']:
                continue

            stock_data.name = symbol
            prediction = predict_stock_movement(stock_data)
            risk = assess_stock_risk(symbol, stock_data)
            stock_info = {**prediction, **risk}
            eligible_stocks.append(stock_info)

            if len(eligible_stocks) >= CONFIG['TARGET_STOCKS']:
                break

        except Exception as e:
            logging.warning(f"Error processing {symbol}: {e}")

    print("✅ Stock processing complete. Ranking results...")

    if eligible_stocks:
        ranked = sorted(eligible_stocks, key=lambda x: (x['risk_score'], -x['movement_percentage']))
        print("\n======================================")
        print(f"Top Low-Price Stock Predictions (under ${CONFIG['MAX_PRICE_THRESHOLD']}):")
        print("======================================")

        for i, stock in enumerate(ranked[:10]):
            print(f"\n{i+1}. {stock['symbol']}:\n   Current Price: ${stock['current_price']:.2f}\n   Predicted Price: ${stock['predicted_price']:.2f}\n   Movement: {stock['direction']} by {abs(stock['movement_percentage']):.2f}%\n   Volatility: {stock['volatility']:.4f}\n   Sharpe Ratio: {stock['sharpe_ratio']:.4f}\n   Max Drawdown: {stock['max_drawdown']:.4f}\n   Risk Category: {stock['risk_category']}")
            print(explain_prediction(stock))
    else:
        print("\nNo stocks matching criteria were found.")
        print("Possible issues:")
        print("1. API rate limits exceeded")
        print("2. Network connection problem")
        print("3. No stocks under the price threshold ($" + str(CONFIG['MAX_PRICE_THRESHOLD']) + ")")
        print("4. API key issues (check your Alpha Vantage key)")

    print("✅ Analysis complete. Results above.")

if __name__ == "__main__":
    import sys
    # Set default if running in notebook
    verbose = '--verbose' in sys.argv
    main(verbose=verbose)


🔍 Starting stock analysis...

📈 STOCK PREDICTOR OVERVIEW
This script analyzes U.S. stocks (primarily under $8, but also top tech picks) using:
- 📊 Linear Regression to predict short-term price trends
- ⚖️ Sharpe Ratio, Volatility, Max Drawdown to assess risk
- 🔎 Filters to select low-risk, high-upside candidates
Goal: Identify undervalued stocks likely to move up over the next few days.
Data range pulled: Last 90 days
API Priority: Alpha Vantage → Yahoo Finance fallback

📦 Fetching stock list...
✅ Retrieved 67 stock symbols. Beginning data collection and prediction...
🔄 Processing stock symbols, please wait...
⏳ [1/67] Processing SNDL | Est. Time Left: 0s
   ↪️ Pulling last 90 days of price data...
   📌 Goal: Predict short-term direction and evaluate risk profile
⏳ [2/67] Processing BLK | Est. Time Left: 399s
   ↪️ Pulling last 90 days of price data...
   📌 Goal: Predict short-term direction and evaluate risk profile
⏳ [3/67] Processing DEA | Est. Time Left: 529s
   ↪️ Pulling last 90 