In [71]:
# This module helps to select stocks to look at
# It writes raw stock data to disk

import sys
sys.path.append('..')

import os
from os.path import exists

import time
from glob import glob
import json

from etl import download_data, delete_data_dir

import yfinance as yf # type: ignore
import yahooquery as yq # type: ignore
from pandas import DataFrame


In [50]:
type Prices = list[list[str | int]]

ranges = ["0_50", "50_100", "100_150", "150_200", "200_10000"]

def split_by_range(price_dict: dict[str, Prices], price_list: Prices):
    for range in ranges:
        start, end = range.split("_")
        price_dict[range] = [data for data in price_list if data[1] > float(start) and data[1] < float(end)]


large: dict[str, Prices] = {}
mid: dict[str, Prices] = {}

for match in glob(f'../prices/*cap*'):
    with open(match, "r") as f:
        if "large" in match:
            split_by_range(large, json.load(f))
        else:
            split_by_range(mid, json.load(f))

In [51]:
print("Large")
for range in ranges:
    print(f"{range}: {len(large[range])}")

print()

print("Mid")
for range in ranges:
    print(f"{range}: {len(mid[range])}")

Large
0_50: 97
50_100: 126
100_150: 87
150_200: 49
200_10000: 142

Mid
0_50: 151
50_100: 126
100_150: 54
150_200: 25
200_10000: 44


In [57]:
today = time.strftime("%Y-%m-%d")

In [73]:
delete_data_dir("mid")
delete_data_dir("large")

In [69]:
# Get only ticker symbols

def get_tickers(price_dict: dict[str, Prices], range: str, dir_name: str) -> list[str]:
    return [data[0] for data in price_dict[range] if not exists(f"{dir_name}/{data[0]}/{today}")]

large_tickers: list[str] = get_tickers(large, "0_50", "large_0_50")
mid_tickers: list[str] = get_tickers(mid, "0_50", "mid_0_50")

In [64]:
print(f'Getting {len(large_tickers)} stocks')

period = '1y'
interval = '1d'

large_cap_dfs = download_data(large_tickers, period, interval)
mid_cap_dfs = download_data(mid_tickers, period, interval)

Getting 3 stocks
[*********************100%%**********************]  3 of 3 completed


In [65]:
def dfs_to_disk(dfs: dict[str, DataFrame], tickers: list[str], dir_name: str):
    """Writes dataframes to disk as csv"""
    for ticker in tickers:
        stock_dir = f"{dir_name}/{ticker}/{today}"
        os.makedirs(stock_dir, exist_ok=True)

        dfs[ticker].to_csv(f"{stock_dir}/stats.csv")

dfs_to_disk(large_cap_dfs, large_tickers, "large_0_50")
dfs_to_disk(mid_cap_dfs, mid_tickers, "mid_0_50")

In [66]:
def get_ticker_info(tickers: list[str], dir_name: str):
    """Gets ticker info from yfinance and recs from yahooquery and writes them to disk"""
    for ticker in tickers:
        try: 
            stock_dir = f"{dir_name}/{ticker}/{today}"
            os.makedirs(stock_dir, exist_ok=True)

            csv_path = f"{stock_dir}/recs.csv"
            if not exists(csv_path):
                recs: DataFrame = yq.Ticker(ticker).recommendation_trend
                recs.to_csv(csv_path)
            

            json_path = f"{stock_dir}/info.json"
            if not exists(json_path):
                info = yf.Ticker(ticker).info

                with open(f"{stock_dir}/info.json", "w") as f:
                    f.write(json.dumps(info))
            
            time.sleep(3)
        except Exception as e:
            print(f"Error getting info for {ticker}: {e}")

get_ticker_info(large_tickers, "large_0_50")
get_ticker_info(mid_tickers, "mid_0_50")