In [3]:
pip install -q stockstats

In [4]:
import pandas as pd
import numpy as np
import datetime as dt
import matplotlib.pyplot as plt
from stockstats import StockDataFrame

from sklearn.model_selection import train_test_split
from sklearn import linear_model
from sklearn.svm import SVR

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets, layers, models

from math import floor

import pandas_datareader.data as web
import pickle
import time

In [5]:
stock_list = {"BlockChain":["COIN","NVDA","FB"],
              "Airline":["BA","GD","LMT",],
              "Traveling":["UBER","ABNB","MAR","BKNG"],
              "Semiconductors":["INTC","NVDA","QCOM","MU","AMD"],
              "Cloud Computing":["IBM", "AMZN","GOOG","CRM"],
              "Social Media":["TWTR","SNAP","PINS","FB"],
              "Entertainment":["DIS","NFLX","FB"],
              "Retail":["WMT","COST","TGT","BBY","HD"],
              "Franchise":["MCD", "YUM", "SBUX", "DPZ"],
              "Real Estate":["HST", "EQR", "AVB", "PLD", "SPG"],
              "Telecommunication":["T","TMUS","VZ","CMCSA","CHTR"],
              "Energy & Resources":["DOW", "DD"],
              "Luxury goods":["RACE","EL","PVH"]
                            }

In [6]:
def get_bollinger_band_width(df):
    df['bollinger_mean'] = df['Close'].rolling(20, min_periods=1).mean()
    df['bollinger_std'] = df['Close'].rolling(20, min_periods=1).std()
    df['BOL_UP'] = df['bollinger_mean'] + (2 * df['bollinger_std'])
    df['BOL_DOWN'] = df['bollinger_mean'] - (2 * df['bollinger_std'])
    df["bollinger_gap"] = df["BOL_UP"]-df['BOL_DOWN']
    df.dropna(inplace=True) #Not sure if I can change?
    return df

In [7]:
end_date = "2021-12-31"
year = 22
start_date = str(int(end_date[0:4])+1-year)+"-01-01"

In [10]:
mean_bollinger_gap_summary = pd.DataFrame(columns=["Sector","Stock","mean_bollinger_gap"]) # Prepare the summary df
count = 0
for sector, stocks in stock_list.items():
    for stock in stocks: 
        count = count + 1
        if count%5 == 0:
            print("Waiting to avoid connection error...")
            time.sleep(15)
        print("Calculating Bollinger Band Width for ["+sector+":"+stock+"]")
        df = web.DataReader(stock, 'stooq',start=start_date, end=end_date)
        boll_width_df = get_bollinger_band_width(df)
        mean_boll_width = boll_width_df["bollinger_gap"].mean()
        mean_bollinger_gap_summary = mean_bollinger_gap_summary.append({"Sector":sector,
                                                                        "Stock":stock,
                                                                        "mean_bollinger_gap":mean_boll_width},ignore_index=True)

Calculating Bollinger Band Width for [BlockChain:COIN]
Calculating Bollinger Band Width for [BlockChain:NVDA]
Calculating Bollinger Band Width for [BlockChain:FB]
Calculating Bollinger Band Width for [Airline:BA]
Waiting to avoid connection error...
Calculating Bollinger Band Width for [Airline:GD]
Calculating Bollinger Band Width for [Airline:LMT]
Calculating Bollinger Band Width for [Traveling:UBER]
Calculating Bollinger Band Width for [Traveling:ABNB]
Calculating Bollinger Band Width for [Traveling:MAR]
Waiting to avoid connection error...
Calculating Bollinger Band Width for [Traveling:BKNG]
Calculating Bollinger Band Width for [Semiconductors:INTC]
Calculating Bollinger Band Width for [Semiconductors:NVDA]
Calculating Bollinger Band Width for [Semiconductors:QCOM]
Calculating Bollinger Band Width for [Semiconductors:MU]
Waiting to avoid connection error...
Calculating Bollinger Band Width for [Semiconductors:AMD]
Calculating Bollinger Band Width for [Cloud Computing:IBM]
Calculati

## Additionally parse by the sectors

In [11]:
bg_sector = mean_bollinger_gap_summary.groupby(["Sector"]).mean()
x = bg_sector.sort_values(by=['mean_bollinger_gap'], ascending=False)

In [13]:
x

Unnamed: 0_level_0,mean_bollinger_gap
Sector,Unnamed: 1_level_1
Cloud Computing,51.472271
Traveling,41.780777
BlockChain,28.852095
Entertainment,14.684937
Luxury goods,11.770115
Airline,10.901414
Social Media,10.864089
Telecommunication,7.951963
Retail,6.811624
Energy & Resources,6.352364


In [14]:
bg_sectors = np.array_split(x, 3)
risk_high = bg_sectors[0]
risk_middle = bg_sectors[1]
risk_low = bg_sectors[2]

#Find the list of sectors
print(f'This is the list of high risk sectors{list(risk_high.index)}')
print(f'This is the list of middle risk sectors{list(risk_middle.index)}')
print(f'This is the list of low risk sectors{list(risk_low.index)}')

This is the list of high risk sectors['Cloud Computing', 'Traveling', 'BlockChain', 'Entertainment', 'Luxury goods']
This is the list of middle risk sectors['Airline', 'Social Media', 'Telecommunication', 'Retail']
This is the list of low risk sectors['Energy & Resources', 'Franchise', 'Real Estate', 'Semiconductors']


In [None]:
risk_high.to_csv("High_Risk.csv",index = False)
risk_middle.to_csv("Middle_Risk.csv",index = False)
risk_low.to_csv("Low_Risk.csv",index = False)

In [None]:
#Converting to the csv file
mean_bollinger_gap_summary.to_csv("boll_gap_for_"+str(year)+"_years.csv",index = False)