# Generate Pystocks Notebooks and TOC file

In [1]:
import nbformat as nbf
import pickle
import os
from pandas_datareader.data import DataReader
from dateutil.relativedelta import relativedelta
import numpy as np
import pandas as pd 
from tqdm import tqdm

## Settings

In [2]:
top_n_by_market_cap = 50
top_n_industries_by_market_cap = 10

## Import Files

### Overall Market Cap

In [3]:
# Export CSV files from https://www.nasdaq.com/market-activity/stocks/screener?exchange=AMEX and place in data folder.

try:
    df_nasdaq = pd.read_csv('../data/NASDAQ.csv')
    df_nyse = pd.read_csv('../data/NYSE.csv')

    df_concat = pd.concat([df_nasdaq,df_nyse])[["Symbol","Name","Market Cap",'Sector']].sort_values(by=["Market Cap"],ascending=False).dropna()
    df_concat['Market Cap'] = df_concat['Market Cap']/1000000000
    df_concat = df_concat[df_concat['Market Cap']>10]
    df_concat_marketcap = df_concat[0:(top_n_by_market_cap-1)]
    df_concat_marketcap.to_pickle("../data/symbols.pkl")

except:
    print("Nope!")

### By industry

In [4]:
# Export CSV files from https://www.nasdaq.com/market-activity/stocks/screener?exchange=AMEX and place in data folder.

try:
    df_nasdaq = pd.read_csv('../data/NASDAQ.csv')
    df_nyse = pd.read_csv('../data/NYSE.csv')

    df_concat = pd.concat([df_nasdaq,df_nyse])[["Symbol","Name","Market Cap",'Sector']].sort_values(by=["Market Cap"],ascending=False).dropna()
    df_concat['Market Cap'] = df_concat['Market Cap']/1000000000
    df_concat = df_concat[df_concat['Market Cap']>10]
    #df_concat = df_concat[0:(top_n_by_market_cap-1)]
    df_concat_industry = df_concat.sort_values(['Sector','Market Cap'],ascending=False).groupby(['Sector']).head(top_n_industries_by_market_cap)
    df_concat_industry.to_pickle("../data/symbols_industry.pkl")

except:
    print("Nope!")

### Unique dataframe

In [5]:
df_concat = pd.concat([df_concat_marketcap,df_concat_industry]).drop_duplicates()

## Generate notebooks

In [6]:
def generate_notebook(symbol,name):
    nb = nbf.v4.new_notebook()

    # Title
    text = """# ({ticker}): {name}""".format(ticker = symbol, name = name)
    # Code
    code = """\
    from functions import *
    collect_tune_and_predict('{ticker}',n_ahead = 365)""".format(ticker = symbol)

    # Cells
    nb['cells'] = [nbf.v4.new_markdown_cell(text),
                nbf.v4.new_code_cell(code)]

    fname = 'stocks/fbprophet/{ticker}.ipynb'.format(ticker = symbol)

    with open(fname, 'w') as f:
        nbf.write(nb, f)
    return True

In [7]:
for tuple_ in tqdm( pd.concat([df_concat_marketcap,df_concat_industry]).drop_duplicates().itertuples() ):
    symbol = tuple_.Symbol
    name = tuple_.Name
    generate_notebook(symbol = symbol, name = name)

124it [00:00, 737.76it/s]


# Generate YAML list

In [8]:
sectors = list(sorted(set(df_concat['Sector'])))

In [9]:
text = "- file: pystocks/intro.ipynb\n"
for sector in sectors:
    text = text + "- part: " + sector + "\n  chapters:\n"
    for company in df_concat.query("Sector=='" + sector + "'").itertuples():
        symbol = company.Symbol
        text = text + "  - file: pystocks/stocks/fbprophet/{ticker}.ipynb\n".format(ticker=symbol)

In [10]:
with open("../_toc.yml", 'r+') as f:
    f.seek(0)
    f.write(text)
    f.truncate()