Preprocessing Data -> cleaning + normalize


In [15]:
import pandas as pd
import numpy as np
import yfinance as yf

In [16]:
def calculate_volatility(symbol, per):
    # historical data from Yahoo Finance
    stock_data = symbol.history(period = per)
    
    stock_data['Returns'] = stock_data['Close'].pct_change()

    # volatility -> standard deviation
    volatility = np.std(stock_data['Returns'])

    return volatility

In [17]:
def calculate_bollinger_bands(symbol, per, window=20, num_std=2):

    stock_data = symbol.history(period=per)

    stock_data['Returns'] = stock_data['Close'].pct_change()

    #also get volatility
    stock_data['Volatility'] = stock_data['Returns'].rolling(window=window).std()

    # calculate the rolling mean and standard deviation for Bollinger Bands
    stock_data['SMA'] = stock_data['Close'].rolling(window=window).mean()
    stock_data['Upper Band'] = stock_data['SMA'] + (num_std * stock_data['Close'].rolling(window=window).std())
    stock_data['Lower Band'] = stock_data['SMA'] - (num_std * stock_data['Close'].rolling(window=window).std())

    return stock_data[['Close', 'SMA', 'Upper Band', 'Lower Band', 'Volatility']]

#returns a dataframe of things

In [18]:
def macd_calc(symbol, per):
    stock_data = symbol.history(period=per)
    stock_data.get('Volume')
    stock_data.get('Close')

    stock_data['EMA12'] = stock_data['Close'].ewm(span=12, min_periods=0, adjust=False).mean()
    stock_data['EMA26'] = stock_data['Close'].ewm(span=26, min_periods=0, adjust=False).mean()
    stock_data['MACD'] = stock_data['EMA12'] - stock_data['EMA26']
    stock_data['Signal'] = stock_data['MACD'].ewm(span=9, min_periods=0, adjust=False).mean()
    stock_data['Histogram'] = stock_data['MACD'] - stock_data['Signal']

    return stock_data[['EMA12', 'EMA26', 'MACD', 'Signal', 'Histogram']]

In [19]:
def sma_calc(symbol, per):
    stock_data = symbol.history(period=per)
    stock_data['SMA50'] = stock_data['Close'].rolling(50).mean()
    stock_data['SMA200'] = stock_data['Close'].rolling(200).mean()

    return stock_data [['SMA50', 'SMA200']]


In [20]:
def rsi_cal(symbol, per):
    stock_data = symbol.history(period=per)
    stock_data['RSI'] = 100 - 100 / (
            1 + (stock_data['Close'].diff() / stock_data['Close'].shift(1)).rolling(14).mean())

    return stock_data ['RSI']

In [21]:
def volume_calc(symbol, per):
    stock_data = symbol.history(period = per)

    return  stock_data['Volume']


In [27]:
def combine_all (symbol, per):

    volatility = calculate_volatility(symbol, per)
    bollinger_bands_data = calculate_bollinger_bands(symbol, per)
    macd = macd_calc(symbol, per)
    sma = sma_calc(symbol, per)
    rsi = rsi_cal(symbol, per)
    volume = volume_calc(symbol, per)

    return bollinger_bands_data.join([macd, sma, rsi, volume])

In [28]:
symbol = yf.Ticker('YUM')
per = '10y'

# volatility = calculate_volatility(symbol, per)
# bollinger_bands_data = calculate_bollinger_bands(symbol, per)
# macd = macd_calc(symbol, per)
# sma = sma_calc(symbol, per)
# rsi = rsi_cal(symbol, per)
# volume = volume_calc(symbol, per)

# stock_data_df = bollinger_bands_data.join([macd, sma, rsi, volume])

stock_data_df = combine_all(symbol, per)

In [30]:
stock_data_df = stock_data_df.dropna()

In [31]:
stock_data_df

Unnamed: 0_level_0,Close,SMA,Upper Band,Lower Band,Volatility,EMA12,EMA26,MACD,Signal,Histogram,SMA50,SMA200,RSI,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2014-11-14 00:00:00-05:00,45.366333,43.437146,45.939320,40.934972,0.008831,44.359955,43.625597,0.734358,0.446468,0.287890,43.095145,44.804837,0.511272,3379017
2014-11-17 00:00:00-05:00,44.887070,43.584565,46.062314,41.106817,0.009298,44.441049,43.719039,0.722010,0.501577,0.220433,43.120471,44.831660,0.378539,3078700
2014-11-18 00:00:00-05:00,45.008404,43.717728,46.201270,41.234186,0.009181,44.528332,43.814547,0.713786,0.544018,0.169767,43.151480,44.841468,0.458216,3598239
2014-11-19 00:00:00-05:00,44.874928,43.864540,46.250505,41.478575,0.008811,44.581656,43.893094,0.688562,0.572927,0.115635,43.173549,44.855149,0.399577,2242988
2014-11-20 00:00:00-05:00,45.809193,44.054426,46.423866,41.684986,0.009615,44.770509,44.035028,0.735481,0.605438,0.130043,43.214302,44.871619,0.359755,6258248
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-01-29 00:00:00-05:00,130.550003,129.688501,131.945433,127.431569,0.008232,130.075836,129.536652,0.539184,0.635001,-0.095817,128.562644,129.602927,0.109113,1482100
2024-01-30 00:00:00-05:00,130.619995,129.686501,131.939875,127.433126,0.008229,130.159553,129.616900,0.542653,0.616531,-0.073879,128.654566,129.592119,0.136197,1421500
2024-01-31 00:00:00-05:00,129.490005,129.709501,131.943973,127.475029,0.007964,130.056545,129.607500,0.449045,0.583034,-0.133989,128.699802,129.574577,0.019993,2154200
2024-02-01 00:00:00-05:00,130.449997,129.777000,132.015507,127.538494,0.008126,130.117076,129.669907,0.447169,0.555861,-0.108692,128.767621,129.550890,0.083938,1440700


KeyError: "None of [Index([''], dtype='object')] are in the [columns]"