<a href="https://colab.research.google.com/github/YangChaoChung/US-stock/blob/main/find_stocks_that_have_increased_over_15_in_the_last_5_trading_days.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install yfinance



In [None]:
!pip install pandas



In [1]:
import ftplib
import yfinance as yf
import pandas as pd
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta
import logging
import time
from google.colab import files

# Suppress yfinance error messages
logging.getLogger('yfinance').setLevel(logging.CRITICAL)

In [None]:
def download_recent_stock_data(tickers, start_date):
    """Download recent stock data for given tickers starting from start_date."""
    try:
        data = yf.download(
            tickers,
            start=start_date,
            end=datetime.now(),
            progress=False,
            threads=False  # Remove or set group_by='column' here
        )
        if data.empty:
            print("No recent data available for the given tickers.")
            return pd.DataFrame()
        if isinstance(tickers, str):
            close_prices = pd.DataFrame({tickers: data['Close']})
        else:
            close_prices = data['Close']
        return close_prices
    except Exception as e:
        print(f"Failed to download recent data: {e}")
        return pd.DataFrame()



In [None]:
def connect_to_ftp_server(server, encoding="utf-8"):
    ftp = ftplib.FTP(server)
    ftp.login()
    ftp.encoding = encoding
    return ftp

def download_file(ftp, remote_filename, local_filename):
    with open(local_filename, "wb") as local_file:
        ftp.retrbinary(f"RETR {remote_filename}", local_file.write)

def extract_symbols(filename, limit=30):
    symbols = []
    with open(filename, "r") as file:
        for line in file:
            if not line.startswith("Symbol"):  # Skip header line
                symbol = line.split("|")[0].strip()  # Extract stock symbol
                symbols.append(symbol)
                if len(symbols) == limit:
                    break
    return symbols

def is_valid_ticker(ticker):
    """Check if the ticker is valid in yfinance."""
    try:
        info = yf.Ticker(ticker).info
        return 'regularMarketPrice' in info
    except:
        return False

def download_latest_data(tickers):
    """Download the latest data for a list of symbols."""
    try:
        # period=(1d,5d,1mo,3mo,6mo,1y,2y,5y,10y,ytd,max)
        data = yf.download(
            tickers,
            period="5d",
            progress=False,
            threads=False,
            group_by='ticker',  # Ensure data is grouped by ticker
            auto_adjust=True    # Adjust data for splits and dividends
        )
        if data.empty:
            print("No latest data available for the given tickers.")
            return pd.DataFrame()
        return data  # Return the full data
    except Exception as e:
        print(f"Error downloading latest data for tickers: {e}")
        return pd.DataFrame()  # Return an empty DataFrame on failure

def batch_symbols(symbols, batch_size=50):
    """Yield successive batches from symbols."""
    for i in range(0, len(symbols), batch_size):
        yield symbols[i:i + batch_size]

def adjust_symbol(symbol):
    # Handle preferred stocks
    if '$' in symbol:
        base, preferred = symbol.split('$')
        return f"{base}-P{preferred}"
    # Replace '.' with '-' for share classes
    elif '.' in symbol:
        return symbol.replace('.', '-')
    else:
        return symbol


In [None]:
def analyze_recent_stock_data(recent_data):
    """Analyze stock data to find stocks with more than 15% gain over the last 3 trading days."""
    significant_gains = []

    if recent_data.empty:
        print("Recent data is missing or empty.")
        return significant_gains

    # Ensure consistent data format
    if isinstance(recent_data.columns, pd.MultiIndex):
        symbols = recent_data.columns.levels[0]
    else:
        symbols = recent_data.columns

    for symbol in symbols:
        try:
            prices = recent_data[symbol]['Close'].dropna()
            print(f"Processing {symbol}...")
            # print(f"Prices: {prices}")
            if len(prices) >= 3:
                start_price = prices.iloc[0]
                end_price = prices.iloc[-1]
                if start_price != 0:
                    percentage_change = ((end_price - start_price) / start_price) * 100
                    if percentage_change > 15:
                        # Append results
                        significant_gains.append({
                            'Symbol': symbol,
                            'Total Week Change (%)': percentage_change,
                        })
            else:
                print(f"Not enough data for {symbol} to calculate 3-day change.")
        except Exception as e:
            print(f"Error processing {symbol}: {e}")
            continue
    return significant_gains

In [None]:
def analyze_recent_stock_data_multi_metrics(recent_data):
    """Analyze stock data to perform various analyses over the last week."""
    analysis_results = []

    if recent_data.empty:
        print("Recent data is missing or empty.")
        return analysis_results

    # Ensure consistent data format
    if isinstance(recent_data.columns, pd.MultiIndex):
        symbols = recent_data.columns.levels[0]
    else:
        symbols = recent_data.columns

    for symbol in symbols:
        stock_data = recent_data[symbol].dropna()
        print(f"Processing {symbol}...")

        if len(stock_data) >= 5:
            close_prices = stock_data['Close']
            volumes = stock_data['Volume']
            # Calculate total percentage change over the week
            start_price = close_prices.iloc[0]
            end_price = close_prices.iloc[-1]

            if start_price != 0:
                total_pct_change = ((end_price - start_price) / start_price) * 100

                # Calculate daily returns
                daily_returns = close_prices.pct_change().dropna()
                average_daily_return = daily_returns.mean() * 100  # Convert to percentage
                volatility = daily_returns.std() * 100  # Convert to percentage

                # Check for consistent growth
                positive_days = (daily_returns > 0).sum()
                consistent_growth = positive_days == len(daily_returns)

                # Calculate OBV
                obv = [volumes.iloc[0]]
                for i in range(1, len(close_prices)):
                    if close_prices.iloc[i] > close_prices.iloc[i - 1]:
                        obv.append(obv[-1] + volumes.iloc[i])
                    elif close_prices.iloc[i] < close_prices.iloc[i - 1]:
                        obv.append(obv[-1] - volumes.iloc[i])
                    else:
                        obv.append(obv[-1])
                obv_change = ((obv[-1] - obv[0]) / obv[0]) * 100

                if (total_pct_change >= 15) and (obv_change >= 1):
                  # Append results
                  analysis_results.append({
                      'Symbol': symbol,
                      'Total Week Change (%)': round(total_pct_change, 2),
                      'Average Daily Return (%)': round(average_daily_return, 2),
                      'Volatility (%)': round(volatility, 2),
                      'Consistent Growth': consistent_growth,
                      'OBV Change (%)': round(obv_change, 2),
                  })
        else:
            print(f"Not enough data for {symbol} to perform weekly analysis.")
            logging.info(f"Not enough data for {symbol} to perform weekly analysis.")
    return analysis_results


In [None]:
def main():
    ftp_server = "ftp.nasdaqtrader.com"
    nasdaq_filename = "nasdaqlisted.txt"
    other_filename = "otherlisted.txt"

    ftp = connect_to_ftp_server(ftp_server)
    ftp.cwd('Symboldirectory')

    download_file(ftp, nasdaq_filename, nasdaq_filename)
    download_file(ftp, other_filename, other_filename)
    ftp.quit()

    # Extract symbols
    nasdaq_symbols = extract_symbols(nasdaq_filename, limit=10)  # Increase limit as needed, 0 indicate all items
    other_symbols = extract_symbols(other_filename, limit=5)

    all_symbols = nasdaq_symbols + other_symbols
    all_symbols = [adjust_symbol(symbol) for symbol in all_symbols]
    # print (all_symbols)
    # print (len(all_symbols))
    # return None

    # Prepare to collect significant gains
    all_gains = []

    batch_size = 10  # Adjust as needed
    for batch in batch_symbols(all_symbols, batch_size):
        recent_data = download_latest_data(batch)
        gains = analyze_recent_stock_data_multi_metrics(recent_data)
        # gains = analyze_recent_stock_data(recent_data)

        if gains:
            all_gains.extend(gains)
        time.sleep(3)  # Sleep to avoid rate limiting

    if all_gains:
        # Convert to DataFrame and save to CSV
        gains_df = pd.DataFrame(all_gains)
        gains_df.sort_values(by='Total Week Change (%)', ascending=False, inplace=True)
        gains_df.to_csv('weekly_stock_analysis.csv', index=False)
        logging.info("Analysis complete. Results saved to 'weekly_stock_analysis.csv'")
        files.download('weekly_stock_analysis.csv')
        #today = datetime.datetime.now().strftime("%Y-%m-%d") # Generate today's date timestamp
        # Create filename with today's timestamp
        #filename = f"stock_recovery_analysis_{today}.csv"

        # print(gains_df)
    else:
        print("No stocks found with more than 10% gain over the last 3 trading days.")

In [None]:
if __name__ == "__main__":
    main()