In [31]:
import sys
sys.path.append('..')

import os
from os.path import exists
from glob import glob
import json

import pandas as pd
from pandas import DataFrame

import matplotlib.pyplot as plt
import matplotlib.dates as mdates

import seaborn as sns
sns.set(style='darkgrid', context='talk', palette='Dark2')

my_year_month_fmt = mdates.DateFormatter('%m/%y')

from calc import calc_line_params


In [32]:
def get_tickers_from_dir(dir: str):
    return [match.split("/")[-1] for match in glob(f'../select/{dir}*/*')]

large_tickers = get_tickers_from_dir('large')        
mid_tickers = get_tickers_from_dir('mid')       

In [33]:
type DataFrameDict = dict[str, DataFrame]

def get_data_from_dir(dir: str):
    """
    Returns `(prices, earnings dates, recommendations, info)`.

    They are in separate dicts because I need better typing for later but Python typing is a pain.
    """

    dfs: DataFrameDict = {}
    er_dfs: DataFrameDict = {}
    recs_dfs: DataFrameDict = {}
    info: dict[str, dict[str, int | str]] = {}

    for match in glob(f'../select/{dir}*/*'):
        ticker = match.split("/")[-1]

        prices_path = f"{match}/prices.csv"
        if exists(prices_path):
            dfs[ticker] = pd.read_csv(prices_path, index_col=['Date'], parse_dates=['Date']) # type: ignore
        
        recs_path = f"{match}/recs.csv"
        if exists(recs_path):
            recs_dfs[ticker] = pd.read_csv(recs_path) # type: ignore
        
        earnings_dates_path = f"{match}/earnings_dates.csv"
        if exists(earnings_dates_path):
            er_dfs[ticker] = pd.read_csv(earnings_dates_path, index_col=['Earnings Date'], parse_dates=['Earnings Date']) # type: ignore
    
        info_path = f"{match}/info.json"
        if exists(info_path):
            with open(info_path) as f:
                file = f.read()
                if len(file) > 0:
                    info[ticker] = json.loads(file)
            

    return dfs, er_dfs, recs_dfs, info
      
large_dfs, large_er_dfs, large_recs_dfs, large_info = get_data_from_dir('large')
mid_dfs, mid_er_dfs, mid_recs_dfs, mid_info = get_data_from_dir('mid')


In [34]:
def get_rising_trend(ticker: str, df: DataFrame):
    """Gets only rising trends."""
    try:
        coeff, intercept, slope = calc_line_params(df)

        if coeff > 0:
            return True, coeff, intercept, slope
    
        return False, coeff, intercept, slope
            
    except Exception as e:
        err = f"Error finding rising trend {ticker}: {e}"
        print(err)

        with open("log", "w") as f:
            f.writelines(err)
        
        raise e

def plot_ticker(ticker: str, df: DataFrame):
    fig, ax = plt.subplots(figsize=(4,2), layout='constrained')

    # plt.rc('font', size=8)
    plt.rc('axes', titlesize=8, labelsize=8)
    # plt.rc('xtick', labelsize=8) 
    # plt.rc('ytick', labelsize=8) 
    plt.rc('legend', fontsize=8) 
    plt.xticks(fontsize=8)
    plt.yticks(fontsize=8)

    ax.plot(df.index, df["Close"], label='Close price', color='skyblue', linewidth=1)
    
    ax.plot(df.index, df["Slope"], label='Slope', color='orange', linewidth=1)

    min: str = df.index.min().strftime("%Y-%m-%d")
    max: str = df.index.max().strftime("%Y-%m-%d")

    ax.legend(loc='best')
    ax.set_ylabel('Price in $', fontsize=8)
    ax.set_title(f'{ticker}, {min} to {max}')
    ax.xaxis.set_major_formatter(my_year_month_fmt)

    # plt.show()
    return plt

In [35]:
def get_suitable_tickers(
        tickers: list[str],
        dfs: dict[str, DataFrame],
        info_dict: dict[str, dict[str, int | str]],
        er_dfs: dict[str, DataFrame],
        recs_dfs: dict[str, DataFrame],
        dir_name: str,
        ):
    for ticker in tickers:
        df = dfs.get(ticker)
        info = info_dict.get(ticker)
        er_df = er_dfs.get(ticker)
        recs_df = recs_dfs.get(ticker)

        rising_trend, coeff, intercept, slope = get_rising_trend(ticker, df) if df is not None else (False, None, None, None)

        if rising_trend == False:
            continue

        stock_dir = f"data/{dir_name}/{ticker}"
        os.makedirs(stock_dir, exist_ok=True)
        
        if df is not None:
            df["Slope"] = slope
            fig = plot_ticker(ticker, df)
            # fig.show()
            fig.savefig(f"{stock_dir}/graph.png")
            fig.close()
        
        if er_df is not None and len(er_df) > 0:
            today = pd.Timestamp.today('US/Eastern')
            month_later = today + pd.DateOffset(months=1)
            
            new_er_df = er_df.loc[(er_df.index >= today) & (er_df.index <= month_later)]
            # new_er_df.to_csv(f"{stock_dir}/er_dates.csv")
            new_er_df.to_json(f"{stock_dir}/er_dates.json", orient="table")
            # display(new_er_df)

        
        if recs_df is not None and len(recs_df) > 0:
            new_recs_df = recs_df.iloc[[0]]
            # new_recs_df.to_csv(f"{stock_dir}/recs.csv")
            new_recs_df.to_json(f"{stock_dir}/recs.json", orient="records")
            # display(new_recs_df)
        
        basic_info = {
            "companyName": info.get("shortName", "NA"),
            "symbol": info.get("symbol", "NA"),
            "latestPrice": df['Close'].iloc[-1] if df is not None else "NA",
            "trailingPE": info.get("trailingPE", "NA"),
            "forwardPE": info.get("forwardPE", "NA"),
            "pegRatio": info.get("pegRatio", "NA"),
            "trailingPegRatio": info.get("trailingPegRatio", "NA"),
            "beta": info.get("beta", "NA"),
        } if info else {}

        info_path = f"{stock_dir}/info.json"
        with open(info_path, "w") as f:
            f.write(json.dumps(basic_info))

        # for key, value in basic_info.items():
        #     print(f"{key}: {value}")

get_suitable_tickers(large_tickers, large_dfs, large_info, large_er_dfs, large_recs_dfs, "large")
get_suitable_tickers(mid_tickers, mid_dfs, mid_info, mid_er_dfs, mid_recs_dfs, "mid")
    