In [1]:
import os
import requests
import pandas as pd
import numpy as np
import json

from pathlib import Path
import csv

# from dotenv import load_dotenv
# load_dotenv()

# import pandas_datareader as web
# import datetime as dt

#pip install yfinance 
import yfinance as yf

# import panel as pn
# from panel.interact import interact
# import plotly.express as px
# pn.extension("plotly")

%matplotlib inline

In [2]:
## Set start date variable - dataframes will be created starting from this date
start_date = '2020-09-14'

In [3]:
def import_df(csv_path):
    df = pd.read_csv(
    Path(csv_path),
    infer_datetime_format=True,
    parse_dates=True,
    index_col="Date",
    )
    return df 

def get_ticker_info(symbol):
    ticker = yf.Ticker(symbol)
    ticker_info = ticker.info
    sharesOutstanding = ticker_info['sharesOutstanding']
    floatShares = ticker_info['floatShares']
    dictionary = {
        'SharesOutstanding' : sharesOutstanding,
        'FloatShares' : floatShares
    }
    return dictionary

In [4]:
def create_df(symbol):
    # Symbol must be string 
    
    csv_path = '../FilesExport/'+symbol+'_all_data.csv'
    quantity_title = symbol+'_QUANTITY_FAILS'
    
    df = import_df(csv_path)
    ticker_info = get_ticker_info(symbol)
    
    df['price_range'] = df['high'] - df['low']
    df['daily_change'] = df['open'] - df['close']
    
    df.drop(columns={'open','low','high'},inplace=True)
    
    df['fail_volume_pct'] = df[quantity_title] / df['volume'] * 100
    df['fail_float_pct'] = df[quantity_title] / ticker_info['FloatShares'] * 100
    
    df = df.loc[start_date:]
    
    return df

In [5]:
def normalize_ftd(df,symbol):
    quanity_title = symbol+'_QUANTITY_FAILS'
    result = df.copy()
    for feature_name in df.columns:
        max_value = df[quanity_title].max()
        min_value = df[quanity_title].min()
        result[quanity_title] = (df[quanity_title] - min_value) / (max_value - min_value)
        max_value1 = df['volume'].max()
        min_value1 = df['volume'].min()
        result['volume'] = (df['volume'] - min_value1) / (max_value1 - min_value1)
        
    normal_df = result[[quanity_title,'volume']]
    
        
    return normal_df

def normalize_two_columns(df,column1,column2):
    #quanity_title = symbol+'_QUANTITY_FAILS'
    
    result = df.copy()
    for feature_name in df.columns:
        max_value = df[column1].max()
        min_value = df[column1].min()
        result[column1] = (df[column1] - min_value) / (max_value - min_value)
        max_value1 = df[column2].max()
        min_value1 = df[column2].min()
        result[column2] = (df[column2] - min_value1) / (max_value1 - min_value1)
        
    normal_df = result[[column1,column2]]
    
        
    return normal_df

def normalize_ftd_plus_two_columns(df,symbol,column1,column2):
    quanity_title = symbol+'_QUANTITY_FAILS'
    
    result = df.copy()
    for feature_name in df.columns:  
          
        max_value0 = df[quanity_title].max()
        min_value0 = df[quanity_title].min()
        result[quanity_title] = (df[quanity_title] - min_value0) / (max_value0 - min_value0)
        
        max_value = df[column1].max()
        min_value = df[column1].min()
        result[column1] = (df[column1] - min_value) / (max_value - min_value)
        
        max_value1 = df[column2].max()
        min_value1 = df[column2].min()
        result[column2] = (df[column2] - min_value1) / (max_value1 - min_value1)
        
    normal_df = result[[quanity_title,column1,column2]]
    
        
    return normal_df

def normalize_ftd_plus_three_columns(df,symbol,column1,column2,column3):
    quanity_title = symbol+'_QUANTITY_FAILS'
    
    result = df.copy()
    for feature_name in df.columns:  
          
        max_value0 = df[quanity_title].max()
        min_value0 = df[quanity_title].min()
        result[quanity_title] = (df[quanity_title] - min_value0) / (max_value0 - min_value0)
        
        max_value = df[column1].max()
        min_value = df[column1].min()
        result[column1] = (df[column1] - min_value) / (max_value - min_value)
        
        max_value1 = df[column2].max()
        min_value1 = df[column2].min()
        result[column2] = (df[column2] - min_value1) / (max_value1 - min_value1)
        
        max_value2 = df[column3].max()
        min_value2 = df[column3].min()
        result[column3] = (df[column3] - min_value2) / (max_value2 - min_value2)
        
    normal_df = result[[quanity_title,column1,column2,column3]]
    
        
    return normal_df

# Make graph that shows normalized Fails compared to Volume and Close Price 
def make_nfvc_plot(df,symbol):
    #result = df.copy()
    
    df_2 = normalize_ftd_plus_two_columns(df,symbol,'volume','close')
    nfvc_plot = px.line(df_2)
    return nfvc_plot

def make_ftd_compare_plot(df,symbol,column1):
    #result = df.copy()
    
    quanity_title = symbol+'_QUANTITY_FAILS'
    result = df.copy()
    for feature_name in df.columns:
        max_value = df[quanity_title].max()
        min_value = df[quanity_title].min()
        result[quanity_title] = (df[quanity_title] - min_value) / (max_value - min_value)
        max_value1 = df[column1].max()
        min_value1 = df[column1].min()
        result[column1] = (df[column1] - min_value1) / (max_value1 - min_value1)
        
    normal_df = result[[quanity_title,column1]]
    
    nfvc_plot = px.line(normal_df)
    return nfvc_plot

In [6]:
def do_correlation_ftds(df,column):

    ftd_df = df.iloc[:,0]
    column_df = df[[column]]
    combined_df = pd.concat([ftd_df,column_df],join='inner',axis=1)

    plot = px.scatter(combined_df,x=combined_df.iloc[:,0],y=column)
    corr = combined_df.corr()
    print(corr)
    plot.show()
    
    return corr

def do_correlation_custom(df,column1,column2):
        
    combined_df = df[[column1,column2]]

    plot = px.scatter(combined_df,x=column1,y=column2)
    corr = combined_df.corr()
    print(corr)
    plot.show() 

In [7]:
def sort_ftd_fail_float_1pct(df): 
    new_df = df[df.fail_float_pct > 1]
    print(len(new_df.index))
    return new_df 

def count_ftd_fail_float_1pct(df): 
    new_df = df[df.fail_float_pct > 1]
    length = int(len(new_df.index))
    return length 

In [8]:
gme_df = create_df('GME')
gme_df

Unnamed: 0_level_0,GME_QUANTITY_FAILS,close,volume,price_range,daily_change,fail_volume_pct,fail_float_pct
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2020-09-14,398676,6.91,10373189,0.4100,0.090,3.843331,0.644776
2020-09-15,267777,7.39,5806696,0.6980,-0.340,4.611521,0.433074
2020-09-16,884485,8.71,19943923,2.1600,-1.340,4.434860,1.430471
2020-09-17,598737,9.20,17777853,1.6200,-0.520,3.367881,0.968333
2020-09-18,847632,9.73,17437164,1.0000,-0.230,4.861066,1.370869
...,...,...,...,...,...,...,...
2021-09-08,96,207.70,5325598,22.3377,2.080,0.001803,0.000155
2021-09-09,544,204.09,7569339,25.0900,-15.258,0.007187,0.000880
2021-09-10,4836,194.87,3088838,13.3900,5.963,0.156564,0.007821
2021-09-13,3751,207.70,4474886,19.6400,-10.980,0.083823,0.006066


In [9]:
amc_df = create_df('AMC')
bb_df = create_df('BB')
nok_df = create_df('NOK')

tsla_df = create_df('TSLA')
shop_df = create_df('SHOP')
qsr_df = create_df('QSR')
spce_df = create_df('SPCE')

aapl_df = create_df('AAPL')
msft_df = create_df('MSFT')
jpm_df = create_df('JPM')
gs_df = create_df('GS')

DataFrame_List = [
    gme_df,amc_df,bb_df,nok_df,          # "meme" stocks
    tsla_df,shop_df,qsr_df,spce_df,      # "volatile" stocks
    aapl_df,msft_df,jpm_df,gs_df         # "stable" stocks
]

In [13]:
## Create and Save df_info DataFrame. Takes forever to run, so use once, save as CSV and use that
## Comment out code when done to not waste time accessing Yahoo API for data over and over 


# empty_list = []
# ticker_list = ['GME','AMC','BB','NOK','TSLA','SHOP','QSR','SPCE','AAPL','MSFT','JPM','GS']
# float_list = []
# sum_list = []
# for i in DataFrame_List:
#     count = count_ftd_fail_float_1pct(i)
#     empty_list.append(count)
    
# for i in ticker_list:
#     ticker_info = get_ticker_info(i)
#     float_list.append(ticker_info['FloatShares'])

# for i in DataFrame_List:
#     sum_variable = i.iloc[:,0].sum()
#     sum_list.append(sum_variable)
    
# df_info = pd.DataFrame(
#     {'Symbols': ticker_list,
#      'DaysAbove1pct': empty_list,
#      'FloatShares': float_list,
#      'TotalFTDsPastYear': sum_list
#     }
# )
# df_info['pct_float_Total_FTD_ytd'] = df_info['TotalFTDsPastYear'] / df_info['FloatShares'] * 100

# # Export and Save df_info 
# df_info.to_csv('../FilesExport/all_ftd_info.csv')

In [16]:
#Import and load df_info 
df_info = pd.read_csv(Path('../FilesExport/all_ftd_info.csv'),index_col=0)
df_info

Unnamed: 0,Symbols,DaysAbove1pct,FloatShares,TotalFTDsPastYear,pct_float_Total_FTD_ytd
0,GME,35,61831728,55465688,89.70425
1,AMC,5,511466851,277004450,54.158828
2,BB,0,512112701,32895332,6.423456
3,NOK,0,4643057032,112441625,2.421715
4,TSLA,0,801734075,10201621,1.272444
5,SHOP,0,113048153,603560,0.533896
6,QSR,0,279856954,5256245,1.87819
7,SPCE,3,181386891,36960919,20.376841
8,AAPL,0,16513305231,21670257,0.131229
9,MSFT,0,7506925463,3465817,0.046168
