<a href="https://colab.research.google.com/github/YangChaoChung/US-stock/blob/main/find_stocks_that_have_decreased.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install yfinance pandas_market_calendars

Collecting pandas_market_calendars
  Downloading pandas_market_calendars-4.5.1-py3-none-any.whl.metadata (9.1 kB)
Collecting exchange-calendars>=3.3 (from pandas_market_calendars)
  Downloading exchange_calendars-4.7-py3-none-any.whl.metadata (37 kB)
Collecting pyluach (from exchange-calendars>=3.3->pandas_market_calendars)
  Downloading pyluach-2.2.0-py3-none-any.whl.metadata (4.3 kB)
Collecting korean_lunar_calendar (from exchange-calendars>=3.3->pandas_market_calendars)
  Downloading korean_lunar_calendar-0.3.1-py3-none-any.whl.metadata (2.8 kB)
Downloading pandas_market_calendars-4.5.1-py3-none-any.whl (108 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m108.1/108.1 kB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading exchange_calendars-4.7-py3-none-any.whl (196 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m196.9/196.9 kB[0m [31m14.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading korean_lunar_calendar-0.3.1-py3-none-any.whl (9.0 k

In [3]:
!pip install pandas



In [4]:
import ftplib
import yfinance as yf
import pandas as pd
import pandas_market_calendars as mcal
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta
import logging
import time
import pandas as pd
from google.colab import files

# Suppress yfinance error messages
logging.getLogger('yfinance').setLevel(logging.CRITICAL)

def connect_to_ftp_server(server, encoding="utf-8"):
    ftp = ftplib.FTP(server)
    ftp.login()
    ftp.encoding = encoding
    return ftp

def download_file(ftp, remote_filename, local_filename):
    with open(local_filename, "wb") as local_file:
        ftp.retrbinary(f"RETR {remote_filename}", local_file.write)

def extract_symbols(filename, limit=30):
    symbols = []
    with open(filename, "r") as file:
        for line in file:
            if not line.startswith("Symbol"):  # Skip header line
                symbol = line.split("|")[0]  # Extract stock symbol
                symbols.append(symbol)
                if len(symbols) == limit:
                    break
    return symbols

def adjust_symbol(symbol):
    # Handle preferred stocks
    if '$' in symbol:
        base, preferred = symbol.split('$')
        return f"{base}-P{preferred}"
    # Replace '.' with '-' for share classes
    elif '.' in symbol:
        return symbol.replace('.', '-')
    else:
        return symbol

def get_trading_dates():
    nyse = mcal.get_calendar('NYSE')
    # Calculate the date three years ago
    three_years_ago = datetime.now() - relativedelta(years=3)
    # Get the trading calendar
    try:
        # Fetch the trading schedule from three years ago to today
        schedule = nyse.schedule(start_date=three_years_ago, end_date=datetime.now())
        if not schedule.empty:
            # Find the closest trading day to three years ago
            first_trading_day = schedule.iloc[0].name.date()
            print(f"The closest trading day three years ago: {first_trading_day}")

            # Extend the date range to one week around the target day
            start_date = first_trading_day - timedelta(days=3)  # Three days before the first trading day
            end_date = first_trading_day + timedelta(days=3)    # Three days after the first trading day
            return start_date, end_date
    except Exception as e:
        print(f"Error occurred: {e}")
        return None, None
    else:
        print("No trading days available in the specified period.")
        return None, None

def download_stock_data(tickers, start_date, end_date):
    """Download historical stock data for given tickers within a date range."""
    try:
        data = yf.download(tickers, start=start_date, end=end_date, progress=False)
        # print("download_stock_data:\n", data)
        if data.empty:
            print("No historical data available for the given date range and tickers.")
        return data['Close']  # Return only the 'Close' prices
    except Exception as e:
        print(f"Failed to download historical data: {e}")
        return pd.DataFrame()  # Return an empty DataFrame on failure

def download_latest_data(tickers):
    """Download the latest data for a list of symbols."""
    try:
        data = yf.download(tickers, period="1d", progress=False)
        # print("download_latest_data:\n", data)
        if data.empty:
            print("No latest data available for the given tickers.")
        return data['Close'].iloc[-1]  # Return the latest 'Close' prices
    except Exception as e:
        print(f"Error downloading latest data for tickers: {e}")
        return pd.Series()  # Return an empty Series on failure

def batch_symbols(symbols, batch_size=50):
    """Yield successive batches from symbols."""
    for i in range(0, len(symbols), batch_size):
        yield symbols[i:i + batch_size]

def analyze_stock_data(symbol, historical_data, latest_data):
    if historical_data.empty or latest_data.empty:
        return symbol, None
    try:
        start_price = historical_data.iloc[0]['Close']  # First available close price from history
        end_price = latest_data.iloc[0]['Close']  # Close price from the latest trading day
        percentage_change = ((end_price - start_price) / start_price) * 100
        return symbol, percentage_change
    except Exception as e:
        print(f"Error processing data for {symbol}: {e}")
        return symbol, None

def analyze_stock_data(historical_data, latest_data, find_gainers=False):
    """Analyze stock data to find stocks with more than 80% loss or 100% gain."""
    significant_changes = []

    # Check that data is in the expected format and not empty
    if not isinstance(historical_data, pd.DataFrame) or historical_data.empty:
        print("Historical data is missing or empty.")
        return significant_changes

    # Analysis of stock data
    for symbol in historical_data.columns:
        if symbol in latest_data and pd.notna(historical_data[symbol].iloc[0]) and pd.notna(latest_data[symbol]):
            start_price = historical_data[symbol].iloc[0]
            end_price = latest_data[symbol]
            if start_price != 0:  # Prevent division by zero
                percentage_change = ((end_price - start_price) / start_price) * 100
                if percentage_change < -50:
                    significant_changes.append((symbol, percentage_change, 'Loss'))
                elif find_gainers and percentage_change > 100:
                    significant_changes.append((symbol, percentage_change, 'Gain'))
        else:
            print(f"Data missing for {symbol} in historical or latest data.")

    return significant_changes

In [8]:
def main(find_gainers=False):
    ftp_server = "ftp.nasdaqtrader.com"
    nasdaq_filename = "nasdaqlisted.txt"
    other_filename = "otherlisted.txt"

    ftp = connect_to_ftp_server(ftp_server)
    ftp.cwd('Symboldirectory')

    download_file(ftp, nasdaq_filename, nasdaq_filename)
    download_file(ftp, other_filename, other_filename)
    ftp.quit()

    # Extract symbols
    nasdaq_symbols = extract_symbols(nasdaq_filename, limit=0)  # Increase limit as needed, 0 indicate all items
    other_symbols = extract_symbols(other_filename, limit=0)

    start_date, end_date = get_trading_dates()
    if start_date and end_date:
        losses_data = []
        gains_data = []
        all_symbols = nasdaq_symbols[:5] + other_symbols[:5]
        # all_symbols = nasdaq_symbols + other_symbols
        all_symbols = [adjust_symbol(symbol) for symbol in all_symbols]

        for batch in batch_symbols(all_symbols, 5):
            historical_data = download_stock_data(batch, start_date, end_date)
            latest_data = download_latest_data(batch)
            changes = analyze_stock_data(historical_data, latest_data, find_gainers)
            if changes:
                print("Significant stock changes:")
                for stock, change, change_type in changes:
                    print(f"{stock}: {change:.2f}% ({change_type})")
                    if change_type == 'Loss':
                        losses_data.append([stock, change])
                    elif change_type == 'Gain':
                        gains_data.append([stock, change])
            time.sleep(3)
    else:
        print("No trading dates found.")

    # Save losses to a CSV file
    if losses_data:
        losses_df = pd.DataFrame(losses_data, columns=['Symbol', 'Percentage Loss'])
        losses_df.to_csv('losses.csv', index=False)
        files.download('losses.csv')
    else:
        print("No significant losses found.")

    # Save gains to a separate CSV file
    if gains_data:
        gains_df = pd.DataFrame(gains_data, columns=['Symbol', 'Percentage Gain'])
        gains_df.to_csv('gains.csv', index=False)
        files.download('gains.csv')
    else:
        print("No significant gains found.")

In [9]:
if __name__ == "__main__":
    # Set to True to find stocks with more than 100% gain
    main(find_gainers=True)

The closest trading day three years ago: 2022-01-11
Significant stock changes:
AACG: -56.62% (Loss)
AADI: -83.55% (Loss)
Data missing for ACT SYMBOL in historical or latest data.


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

No significant gains found.
