<a href="https://colab.research.google.com/github/YangChaoChung/US-stock/blob/main/find_stocks_that_have_increased_over_15_in_the_last_5_trading_days.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install yfinance



In [None]:
!pip install pandas



In [9]:
import ftplib
import yfinance as yf
import pandas as pd
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta
import logging
import time
from google.colab import files

# Suppress yfinance error messages
logging.getLogger('yfinance').setLevel(logging.INFO)

In [2]:
def download_recent_stock_data(tickers, start_date):
    """Download recent stock data for given tickers starting from start_date."""
    try:
        data = yf.download(
            tickers,
            start=start_date,
            end=datetime.now(),
            progress=False,
            threads=False  # Remove or set group_by='column' here
        )
        if data.empty:
            print("No recent data available for the given tickers.")
            return pd.DataFrame()
        if isinstance(tickers, str):
            close_prices = pd.DataFrame({tickers: data['Close']})
        else:
            close_prices = data['Close']
        return close_prices
    except Exception as e:
        print(f"Failed to download recent data: {e}")
        return pd.DataFrame()



In [3]:
def connect_to_ftp_server(server, encoding="utf-8"):
    ftp = ftplib.FTP(server)
    ftp.login()
    ftp.encoding = encoding
    return ftp

def download_file(ftp, remote_filename, local_filename):
    with open(local_filename, "wb") as local_file:
        ftp.retrbinary(f"RETR {remote_filename}", local_file.write)

def extract_symbols(filename, limit=30):
    symbols = []
    with open(filename, "r") as file:
        for line in file:
            if not line.startswith("Symbol"):  # Skip header line
                symbol = line.split("|")[0].strip()  # Extract stock symbol
                symbols.append(symbol)
                if len(symbols) == limit:
                    break
    return symbols

def is_valid_ticker(ticker):
    """Check if the ticker is valid in yfinance."""
    try:
        info = yf.Ticker(ticker).info
        return 'regularMarketPrice' in info
    except:
        return False

def download_latest_data(tickers):
    """Download the latest data for a list of symbols."""
    try:
        # period=(1d,5d,1mo,3mo,6mo,1y,2y,5y,10y,ytd,max)
        data = yf.download(
            tickers,
            period="5d",
            progress=False,
            threads=False,
            group_by='ticker',  # Ensure data is grouped by ticker
            auto_adjust=True    # Adjust data for splits and dividends
        )
        if data.empty:
            print("No latest data available for the given tickers.")
            return pd.DataFrame()
        return data  # Return the full data
    except Exception as e:
        print(f"Error downloading latest data for tickers: {e}")
        return pd.DataFrame()  # Return an empty DataFrame on failure

def batch_symbols(symbols, batch_size=50):
    """Yield successive batches from symbols."""
    for i in range(0, len(symbols), batch_size):
        yield symbols[i:i + batch_size]

def adjust_symbol(symbol):
    # Handle preferred stocks
    if '$' in symbol:
        base, preferred = symbol.split('$')
        return f"{base}-P{preferred}"
    # Replace '.' with '-' for share classes
    elif '.' in symbol:
        return symbol.replace('.', '-')
    else:
        return symbol


In [4]:
def analyze_recent_stock_data(recent_data):
    """Analyze stock data to find stocks with more than 15% gain over the last 3 trading days."""
    significant_gains = []

    if recent_data.empty:
        print("Recent data is missing or empty.")
        return significant_gains

    # Ensure consistent data format
    if isinstance(recent_data.columns, pd.MultiIndex):
        symbols = recent_data.columns.levels[0]
    else:
        symbols = recent_data.columns

    for symbol in symbols:
        try:
            prices = recent_data[symbol]['Close'].dropna()
            print(f"Processing {symbol}...")
            # print(f"Prices: {prices}")
            if len(prices) >= 3:
                start_price = prices.iloc[0]
                end_price = prices.iloc[-1]
                if start_price != 0:
                    percentage_change = ((end_price - start_price) / start_price) * 100
                    if percentage_change > 15:
                        # Append results
                        significant_gains.append({
                            'Symbol': symbol,
                            'Total Week Change (%)': percentage_change,
                        })
            else:
                print(f"Not enough data for {symbol} to calculate 3-day change.")
        except Exception as e:
            print(f"Error processing {symbol}: {e}")
            continue
    return significant_gains

In [10]:
def analyze_recent_stock_data_multi_metrics(recent_data,
                                            min_data_points=5,
                                            pct_change_threshold=15,
                                            obv_change_threshold=1,
                                            volume_surge_multiplier=1.5):
    """
    Analyze recent stock data (preferably for the last week) to extract various metrics:
    - Weekly total % change
    - Average daily returns
    - Volatility
    - Consistency of growth (all daily returns positive)
    - On-Balance Volume (OBV) change
    - Volume surge

    Parameters
    ----------
    recent_data : pd.DataFrame
        A DataFrame containing stock data. If MultiIndex columns, the top level should be symbol tickers.
        Must contain at least the columns "Close" and "Volume".
    min_data_points : int, optional
        Minimum number of data points required for analysis, by default 5.
    pct_change_threshold : float, optional
        Minimum total percentage change threshold to flag a potential bullish signal, by default 15.
    obv_change_threshold : float, optional
        Minimum OBV change percentage threshold to flag a potential bullish signal, by default 1.
    volume_surge_multiplier : float, optional
        Multiplier for average volume to determine if a volume surge occurred, by default 1.5.

    Returns
    -------
    list of dict
        A list of dictionaries, each containing analysis results for symbols that meet
        the bullish criteria (pct_change_threshold, obv_change_threshold, volume_surge).
    """

    analysis_results = []

    # Basic checks
    if recent_data is None or recent_data.empty:
        logging.warning("Recent data is missing or empty.")
        return analysis_results

    # Identify the symbol columns
    if isinstance(recent_data.columns, pd.MultiIndex):
        symbols = recent_data.columns.levels[0]
    else:
        symbols = recent_data.columns.unique()

    # Iterate over each symbol in the data
    for symbol in symbols:
        try:
            # Handle both simple columns (Close, Volume) or multi-level columns ([symbol, 'Close'], etc.)
            if isinstance(recent_data.columns, pd.MultiIndex):
                stock_data = recent_data[symbol].dropna(how='all')
            else:
                # If single-level columns, we need to ensure columns exist for the symbol
                # But in that case, usually the "symbol" is "Close" or "Volume" if the data is not multi-level.
                # So you may need to adjust the logic below depending on how your DataFrame is structured.
                stock_data = recent_data[[col for col in recent_data.columns if symbol in col]].dropna(how='all')

            if stock_data.empty or len(stock_data) < min_data_points:
                logging.info(f"Not enough data for {symbol} to perform weekly analysis.")
                continue

            # If the columns are labeled as 'Close' and 'Volume' in the second level of a MultiIndex
            # or just as single-level columns:
            if 'Close' not in stock_data.columns or 'Volume' not in stock_data.columns:
                logging.warning(f"Missing 'Close' or 'Volume' columns for {symbol}. Skipping.")
                continue

            # Extract series
            close_prices = stock_data['Close'].dropna()
            volumes = stock_data['Volume'].dropna()

            # We need to confirm we still have enough data after dropna
            if len(close_prices) < min_data_points or len(volumes) < min_data_points:
                logging.info(f"Not enough valid data for {symbol} after dropping NAs.")
                continue

            # Calculate the total weekly percentage change
            start_price = close_prices.iloc[0]
            end_price = close_prices.iloc[-1]
            if start_price == 0:
                logging.warning(f"Start price is zero for {symbol}; cannot compute % change.")
                continue

            total_pct_change = ((end_price - start_price) / start_price) * 100

            # Calculate daily returns
            daily_returns = close_prices.pct_change().dropna()
            average_daily_return = daily_returns.mean() * 100
            volatility = daily_returns.std() * 100

            # Check for consistent growth (all daily returns > 0)
            positive_days = (daily_returns > 0).sum()
            consistent_growth = (positive_days == len(daily_returns))

            # Calculate On-Balance Volume (OBV)
            obv = [volumes.iloc[0]]
            for i in range(1, len(close_prices)):
                if close_prices.iloc[i] > close_prices.iloc[i - 1]:
                    obv.append(obv[-1] + volumes.iloc[i])
                elif close_prices.iloc[i] < close_prices.iloc[i - 1]:
                    obv.append(obv[-1] - volumes.iloc[i])
                else:
                    obv.append(obv[-1])
            obv_change = ((obv[-1] - obv[0]) / obv[0]) * 100 if obv[0] != 0 else 0

            # Check for volume surge (last day volume vs average volume)
            average_volume = volumes.mean()
            last_day_volume = volumes.iloc[-1]
            volume_surge = (last_day_volume > (average_volume * volume_surge_multiplier))

            # Decide if a short-term bullish signal is triggered
            # Conditions: strong weekly gain, OBV increase, recent volume surge
            if (total_pct_change >= pct_change_threshold and
                    obv_change >= obv_change_threshold and
                    volume_surge):
                result = {
                    'Symbol': str(symbol),
                    'Total Week Change (%)': round(total_pct_change, 2),
                    'Average Daily Return (%)': round(average_daily_return, 2),
                    'Volatility (%)': round(volatility, 2),
                    'Consistent Growth': consistent_growth,
                    'OBV Change (%)': round(obv_change, 2),
                    'Last Day Volume': int(last_day_volume),
                    'Average Volume': int(average_volume),
                    'Volume Surge': volume_surge
                }
                analysis_results.append(result)

        except KeyError as e:
            logging.error(f"KeyError for symbol: {symbol}. Missing columns? Error: {e}")
        except Exception as e:
            logging.error(f"Unexpected error processing symbol: {symbol}. Error: {e}")

    return analysis_results


In [11]:
def main():
    ftp_server = "ftp.nasdaqtrader.com"
    nasdaq_filename = "nasdaqlisted.txt"
    other_filename = "otherlisted.txt"

    ftp = connect_to_ftp_server(ftp_server)
    ftp.cwd('Symboldirectory')

    download_file(ftp, nasdaq_filename, nasdaq_filename)
    download_file(ftp, other_filename, other_filename)
    ftp.quit()

    # Extract symbols
    nasdaq_symbols = extract_symbols(nasdaq_filename, limit=10)  # Increase limit as needed, 0 indicate all items
    other_symbols = extract_symbols(other_filename, limit=10)

    all_symbols = nasdaq_symbols + other_symbols
    all_symbols = [adjust_symbol(symbol) for symbol in all_symbols]

    # Prepare to collect significant gains
    all_gains = []

    batch_size = 10  # Adjust as needed
    for batch in batch_symbols(all_symbols, batch_size):
        recent_data = download_latest_data(batch)
        gains = analyze_recent_stock_data_multi_metrics(recent_data)
        # gains = analyze_recent_stock_data(recent_data)

        if gains:
            all_gains.extend(gains)
        time.sleep(3)  # Sleep to avoid rate limiting

    if all_gains:
        # Convert results to DataFrame
        gains_df = pd.DataFrame(all_gains)

        # Sort by higher potential (Total Week Change %) in descending order
        gains_df.sort_values(by='Total Week Change (%)', ascending=False, inplace=True)

        # Generate a timestamped filename
        current_date = datetime.now().strftime("%Y-%m-%d")
        filename = f'weekly_stock_analysis_{current_date}.csv'

        # Save DataFrame to CSV
        gains_df.to_csv(filename, index=False)

        logging.info(f"Analysis complete. Results saved to '{filename}'")

        # Download the CSV file
        files.download(filename)
    else:
        print("No stocks found with more than 10% gain over the last 3 trading days.")

In [12]:
if __name__ == "__main__":
    main()

ERROR:yfinance:
4 Failed downloads:
ERROR:yfinance:['ACT SYMBOL', 'AACT-W', 'AAM-U', 'AACT-U']: YFPricesMissingError('$%ticker%: possibly delisted; no price data found  (period=5d) (Yahoo error = "No data found, symbol may be delisted")')


No stocks found with more than 10% gain over the last 3 trading days.
