<a href="https://colab.research.google.com/github/eltonaguiar/LoanerLab/blob/main/cheapwinnersv1ipynb.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:

import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import time
from typing import Dict, List, Optional
from scipy import stats
import logging
import yfinance as yf
import requests
from sklearn.linear_model import LinearRegression

# Enhanced Configuration for Low-Price Stock Analysis
CONFIG = {
    'ALPHA_VANTAGE_KEY': "6XN7LYXEYUOIAM7M",
    'STOCK_API_PROVIDER': "alpha_vantage",
    'RISK_FREE_RATE': 0.05,
    'MAX_HISTORY_DAYS': 90,  # Increased for more comprehensive analysis
    'STOCK_API_CALLS_PER_MINUTE': 5,
    'TARGET_STOCKS': 15,
    'MAX_ITERATIONS': 100,
    'MAX_PRICE_THRESHOLD': 8.0,  # Specific to stocks under $8
    'PREDICTION_HORIZONS': {
        'hours': 6,
        'days': 3,
        'weeks': 2
    }
}

# Enhanced Logging Configuration
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('low_price_stock_analysis.log'),
        logging.StreamHandler()  # Added stream handler to show logs in console
    ]
)

# API Rate Limiting
def rate_limit_api_calls():
    """Implement rate limiting for API calls"""
    time.sleep(60 / CONFIG['STOCK_API_CALLS_PER_MINUTE'])

# Stock Data Functions
def get_us_market_stocks() -> List[str]:
    """
    Get a list of stock symbols trading in the US market, focused on lower-price stocks.
    Uses a simple approach with some predefined sectors to avoid exhausting API calls.
    """
    logging.info("Fetching list of US market stocks...")

    # Basic sectors to look for low-price stocks (these often have lower-priced stocks)
    sectors = ['technology', 'mining', 'biotech', 'energy', 'retail']
    all_symbols = []

    try:
        # Use Yahoo Finance to get some basic stock lists
        # This is a simplified approach; in production you might want a more comprehensive list
        for sector in sectors:
            logging.info(f"Searching for stocks in {sector} sector...")

            try:
                # We'll get a sample of stocks per sector to avoid API restrictions
                tickers = yf.Tickers(f"^GSPC")  # S&P 500 as a starting point
                sample_symbols = list(tickers.tickers.keys())[:20]  # Just take a few for demo
                all_symbols.extend(sample_symbols)
                logging.info(f"Found {len(sample_symbols)} symbols in {sector}")
            except Exception as e:
                logging.warning(f"Error fetching {sector} stocks: {e}")

        # Fallback to some known low-price stocks if the above approach doesn't yield results
        if len(all_symbols) < 10:
            logging.warning("Using fallback stock list due to limited results from API")
            fallback_stocks = [
                # Updated fallback stock list with strong buy recommendations under $8
                "RVYL", "OTRK", "IDN", "TUYA", "AIRG",
                # Keeping some of the original stocks for diversity
                "SNDL", "CPRX", "FCEL", "NAK", "CIDM",
                "GSAT", "SOLO", "IDEX", "GEVO", "ONTX"
            ]
            all_symbols.extend(fallback_stocks)

    except Exception as e:
        logging.error(f"Error in get_us_market_stocks: {e}")
        # Emergency fallback
        all_symbols = [
            "RVYL", "OTRK", "IDN", "TUYA", "AIRG",
            "SNDL", "CPRX", "FCEL", "NAK", "CIDM"
        ]
        logging.info(f"Using emergency fallback list with {len(all_symbols)} stocks")

    # Remove duplicates and return
    unique_symbols = list(set(all_symbols))
    logging.info(f"Total unique stock symbols retrieved: {len(unique_symbols)}")
    return unique_symbols

def get_stock_data(symbol: str, days: int = 90) -> Optional[pd.DataFrame]:
    """
    Get historical stock data for analysis.

    Args:
        symbol: Stock ticker symbol
        days: Number of historical days to retrieve

    Returns:
        DataFrame with stock price data or None if an error occurs
    """
    logging.info(f"Fetching data for {symbol} (past {days} days)...")

    try:
        # Apply rate limiting
        rate_limit_api_calls()

        # Calculate date range
        end_date = datetime.now()
        start_date = end_date - timedelta(days=days)

        # First try Alpha Vantage if that's the configured provider
        if CONFIG['STOCK_API_PROVIDER'] == 'alpha_vantage':
            try:
                url = f"https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol={symbol}&outputsize=full&apikey={CONFIG['ALPHA_VANTAGE_KEY']}"
                response = requests.get(url)

                if response.status_code != 200:
                    logging.warning(f"Alpha Vantage API returned status code {response.status_code} for {symbol}")
                    raise Exception(f"API returned status code {response.status_code}")

                data = response.json()

                # Check for error messages or empty results
                if 'Error Message' in data:
                    logging.warning(f"Alpha Vantage API returned error for {symbol}: {data['Error Message']}")
                    raise Exception(data['Error Message'])

                if 'Time Series (Daily)' not in data:
                    logging.warning(f"No time series data available for {symbol}")
                    return None

                # Convert to DataFrame
                time_series = data['Time Series (Daily)']
                df = pd.DataFrame(time_series).T
                df.columns = ['open', 'high', 'low', 'close', 'volume']

                # Convert types
                for col in df.columns:
                    df[col] = pd.to_numeric(df[col])

                # Sort by date (newest first) and filter to requested time period
                df.index = pd.to_datetime(df.index)
                df = df.sort_index(ascending=False)
                df = df.loc[df.index >= start_date.strftime('%Y-%m-%d')]

                logging.info(f"Successfully retrieved {len(df)} days of data for {symbol} from Alpha Vantage")
                return df

            except Exception as e:
                logging.warning(f"Error fetching {symbol} data from Alpha Vantage: {e}. Falling back to Yahoo Finance.")

        # Fallback to Yahoo Finance
        stock = yf.Ticker(symbol)
        history = stock.history(start=start_date.strftime('%Y-%m-%d'), end=end_date.strftime('%Y-%m-%d'))

        if history.empty:
            logging.warning(f"No data available for {symbol} from Yahoo Finance")
            return None

        # Convert column names to lowercase for consistency
        history.columns = [col.lower() for col in history.columns]

        logging.info(f"Successfully retrieved {len(history)} days of data for {symbol} from Yahoo Finance")
        return history

    except Exception as e:
        logging.error(f"Error retrieving data for {symbol}: {e}")
        return None

# Risk Assessment Functions
def calculate_volatility(stock_data: pd.DataFrame) -> float:
    """Calculate stock price volatility."""
    returns = stock_data['close'].pct_change()
    volatility = returns.std() * np.sqrt(252)  # Annualized volatility
    return volatility

def sharpe_ratio(stock_data: pd.DataFrame, risk_free_rate: float = CONFIG['RISK_FREE_RATE']) -> float:
    """Calculate Sharpe Ratio for risk-adjusted return."""
    returns = stock_data['close'].pct_change()
    avg_return = returns.mean() * 252  # Annualized return
    volatility = calculate_volatility(stock_data)

    if volatility == 0:
        return 0

    sharpe = (avg_return - risk_free_rate) / volatility
    return sharpe

def max_drawdown(stock_data: pd.DataFrame) -> float:
    """Calculate maximum drawdown as a risk metric."""
    cumulative_returns = (1 + stock_data['close'].pct_change().fillna(0)).cumprod()
    peak = cumulative_returns.cummax()
    drawdown = (cumulative_returns - peak) / peak
    return drawdown.min()

def predict_stock_movement(stock_data: pd.DataFrame, horizon: str = 'days') -> Dict:
    """Predict stock movement for different time horizons."""
    horizons = CONFIG['PREDICTION_HORIZONS']

    # Simple prediction using regression and recent trend
    X = np.arange(len(stock_data)).reshape(-1, 1)
    y = stock_data['close'].values

    model = LinearRegression()
    model.fit(X, y)

    future_steps = horizons.get(horizon, 3)
    future_prediction = model.predict(np.array([[len(stock_data) + future_steps]]))

    last_price = stock_data['close'].iloc[-1]
    predicted_price = future_prediction[0]

    movement_percentage = ((predicted_price - last_price) / last_price) * 100

    return {
        'symbol': stock_data.name if hasattr(stock_data, 'name') else 'Unknown',
        'current_price': last_price,
        'predicted_price': predicted_price,
        'movement_percentage': movement_percentage,
        'direction': 'UP' if movement_percentage > 0 else 'DOWN'
    }

def assess_stock_risk(symbol: str, stock_data: pd.DataFrame) -> Dict:
    """Comprehensive risk assessment for a stock."""
    volatility = calculate_volatility(stock_data)
    sharpe = sharpe_ratio(stock_data)
    drawdown = max_drawdown(stock_data)

    # Risk scoring mechanism
    risk_score = (
        (volatility * 50) +  # Higher volatility increases risk
        (abs(drawdown) * 30) -  # Larger drawdowns increase risk
        (sharpe * 20)  # Higher Sharpe ratio reduces risk
    )

    risk_categories = {
        'Very Low Risk': (0, 10),
        'Low Risk': (10, 25),
        'Moderate Risk': (25, 50),
        'High Risk': (50, 75),
        'Very High Risk': (75, 100)
    }

    risk_category = next(
        (category for category, (lower, upper) in risk_categories.items()
         if lower <= risk_score < upper),
        'Extreme Risk'
    )

    return {
        'symbol': symbol,
        'volatility': volatility,
        'sharpe_ratio': sharpe,
        'max_drawdown': drawdown,
        'risk_score': risk_score,
        'risk_category': risk_category
    }

def main():
    # Main analysis function focusing on low-price stocks
    logging.info("======================================")
    logging.info("Starting low-price stock analysis...")
    logging.info("Target: Stocks priced under $" + str(CONFIG['MAX_PRICE_THRESHOLD']))
    logging.info("======================================")

    # Get list of stocks to analyze
    symbols = get_us_market_stocks()
    logging.info(f"Retrieved {len(symbols)} stock symbols to analyze")

    if not symbols:
        logging.error("No stock symbols found. Exiting.")
        print("\nERROR: No stock symbols were retrieved. Please check your internet connection and API key.")
        return

    # Track progress
    processed = 0
    eligible_stocks = []

    for symbol in symbols:
        try:
            processed += 1
            logging.info(f"Processing stock {processed}/{len(symbols)}: {symbol}")

            # Get stock data
            stock_data = get_stock_data(symbol, days=CONFIG['MAX_HISTORY_DAYS'])

            if stock_data is None or stock_data.empty:
                logging.warning(f"No data available for {symbol}, skipping")
                continue

            # Check if stock meets price threshold criteria
            if len(stock_data) == 0:
                logging.warning(f"Empty dataset for {symbol}, skipping")
                continue

            current_price = stock_data['close'].iloc[-1]
            logging.info(f"{symbol} current price: ${current_price:.2f}")

            if current_price > CONFIG['MAX_PRICE_THRESHOLD']:
                logging.info(f"{symbol} price ${current_price:.2f} exceeds threshold ${CONFIG['MAX_PRICE_THRESHOLD']}, skipping")
                continue

            # Perform analysis on eligible stock
            stock_data.name = symbol  # Add name attribute for reference

            # Make prediction
            logging.info(f"Making prediction for {symbol}...")
            prediction = predict_stock_movement(stock_data)

            # Assess risk
            logging.info(f"Assessing risk for {symbol}...")
            risk_assessment = assess_stock_risk(symbol, stock_data)

            # Combine results
            stock_info = {
                **prediction,
                **risk_assessment
            }

            eligible_stocks.append(stock_info)
            logging.info(f"Successfully analyzed {symbol}: Price ${current_price:.2f}, "
                         f"Predicted {prediction['direction']} by {prediction['movement_percentage']:.2f}%, "
                         f"Risk: {risk_assessment['risk_category']}")

            # Check if we have enough stocks
            if len(eligible_stocks) >= CONFIG['TARGET_STOCKS']:
                logging.info(f"Reached target number of stocks ({CONFIG['TARGET_STOCKS']})")
                break

        except Exception as e:
            logging.error(f"Error processing {symbol}: {e}")

    # Sort and rank stocks
    if eligible_stocks:
        logging.info(f"Ranking {len(eligible_stocks)} eligible stocks...")
        ranked_stocks = sorted(eligible_stocks, key=lambda x: (x['risk_score'], -x['movement_percentage']))

        print("\n======================================")
        print(f"Top Low-Price Stock Predictions (under ${CONFIG['MAX_PRICE_THRESHOLD']}):")
        print("======================================")

        if not ranked_stocks:
            print(f"No eligible stocks found under ${CONFIG['MAX_PRICE_THRESHOLD']}.")
            print("Try increasing the price threshold in the CONFIG.")
            logging.warning("No eligible stocks found after processing.")
        else:
            for i, stock in enumerate(ranked_stocks[:10]):
                print(f"{i+1}. {stock['symbol']}: Current ${stock['current_price']:.2f}, "
                    f"Predicted ${stock['predicted_price']:.2f} ({stock['direction']} by {abs(stock['movement_percentage']):.2f}%), "
                    f"Risk: {stock['risk_category']}")

            print("\nDetailed Analysis:")
            for i, stock in enumerate(ranked_stocks[:10]):
                print(f"\n{i+1}. {stock['symbol']}:")
                print(f"   Current Price: ${stock['current_price']:.2f}")
                print(f"   Predicted Price: ${stock['predicted_price']:.2f}")
                print(f"   Movement: {stock['direction']} by {abs(stock['movement_percentage']):.2f}%")
                print(f"   Volatility: {stock['volatility']:.4f}")
                print(f"   Sharpe Ratio: {stock['sharpe_ratio']:.4f}")
                print(f"   Max Drawdown: {stock['max_drawdown']:.4f}")
                print(f"   Risk Category: {stock['risk_category']}")
    else:
        print("\nNo stocks matching criteria were found.")
        print("Possible issues:")
        print("1. API rate limits exceeded")
        print("2. Network connection problem")
        print("3. No stocks under the price threshold ($" + str(CONFIG['MAX_PRICE_THRESHOLD']) + ")")
        print("4. API key issues (check your Alpha Vantage key)")

        logging.warning("No eligible stocks found after processing all symbols.")

    logging.info("Stock analysis complete.")

if __name__ == "__main__":
    main()


ERROR:yfinance:$ONTX: possibly delisted; no timezone found



Top Low-Price Stock Predictions (under $8.0):
1. GEVO: Current $2.15, Predicted $2.13 (DOWN by 1.03%), Risk: Extreme Risk
2. SNDL: Current $1.80, Predicted $1.91 (UP by 6.03%), Risk: Extreme Risk
3. AIRG: Current $6.00, Predicted $8.43 (UP by 40.48%), Risk: Extreme Risk
4. CIDM: Current $3.42, Predicted $4.15 (UP by 21.29%), Risk: Low Risk
5. OTRK: Current $1.79, Predicted $1.73 (DOWN by 3.00%), Risk: Low Risk
6. IDN: Current $2.62, Predicted $2.74 (UP by 4.59%), Risk: Low Risk
7. RVYL: Current $1.33, Predicted $1.37 (UP by 3.23%), Risk: Low Risk
8. TUYA: Current $1.60, Predicted $2.51 (UP by 56.91%), Risk: Very High Risk
9. NAK: Current $0.67, Predicted $0.56 (DOWN by 16.39%), Risk: Very High Risk
10. GSAT: Current $1.80, Predicted $1.60 (DOWN by 11.10%), Risk: Extreme Risk

Detailed Analysis:

1. GEVO:
   Current Price: $2.15
   Predicted Price: $2.13
   Movement: DOWN by 1.03%
   Volatility: 0.7187
   Sharpe Ratio: 3.8874
   Max Drawdown: -0.1620
   Risk Category: Extreme Risk

2. 