# Stock price prediction

- Author: Andreas Persson
- Email: apers@chalmers.se
- github: https://github.com/andreaspersson01
- linkedin: https://www.linkedin.com/in/andreas-persson-2011291a7/

## Introduction

In this project, my goal is first to predict if the stock price for Investor B on the Stockholm Exchange, will move up or down. This will be done by using different technical indicators as input variables. This is a work in progress as I learn more about Time Series Anlysis using python. Enjoy! :D



In [86]:
# Imports

import pandas as pd
import numpy as np

# Vizulasation
import matplotlib.pyplot as plt

# Stock data
#!pip install yfinance
#!pip install exchange_calendars
import yfinance as yf
import exchange_calendars as xcals

# Technical Analysis package
#!pip install ta
#!pip install pandas_ta
import ta
import pandas_ta



In [117]:
# Fetch data for Investor B on the Stockholm Exchange
data = yf.download("INVE-B.ST", start="2006-01-01", end="2023-12-31")

# Get the Stockholm Stock Exchange calendar
stockholm_calendar = xcals.get_calendar("XSTO")

# Get the trading days
trading_days = stockholm_calendar.sessions_in_range("2006-01-01", "2023-12-31")

# Filter the data for trading days
data = data[data.index.isin(trading_days)]

# Display the tail of the filtered data
display(data.tail())

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-12-21,232.0,232.25,229.75,231.149994,231.149994,2213157
2023-12-22,231.0,232.5,230.100006,231.949997,231.949997,2107061
2023-12-27,232.0,233.100006,231.449997,232.100006,232.100006,2610513
2023-12-28,232.600006,232.949997,231.5,231.949997,231.949997,2084982
2023-12-29,232.0,233.699997,231.850006,233.5,233.5,2445449


In [118]:
# Create target value
target = pd.DataFrame({'y': (data['Close'] > data['Close'].shift(1)).astype(int)})


display(data.head())
display(target.head())

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2006-01-02,34.875,34.875,34.25,34.375,20.299772,3210420
2006-01-03,34.5,34.875,34.5,34.5,20.37359,7382236
2006-01-04,34.875,35.0,34.5,34.875,20.595045,10157744
2006-01-05,35.0,35.125,34.75,35.125,20.742676,2712732
2006-01-09,35.375,36.0,35.375,35.875,21.185577,13009748


Unnamed: 0_level_0,y
Date,Unnamed: 1_level_1
2006-01-02,0
2006-01-03,1
2006-01-04,1
2006-01-05,1
2006-01-09,1


## TODO Read about and understand these technical indicators
## Technical Indicators
- Overlap Indicators:
    - Moving Average
    - Exponential Moving Average
    - Double Exponential Moving Average
    - Kaufman's Adaptive Moving Average
    - Parabolic SAR
- Momentum indicators:
    - Average Directional Movement Index
    - Price Oscillator - Absolute
    - Commodity Channel Index
    - Moving Average Convergence/Divergence
    - Money Flow Index
    - Momentum
    - Relative Strength Index
- Volume Indicators:
    - Chaikin A/D Line (AD)
    - Chaikin Oscillator
    - On Balance Volume
- Volatility Indicators
    - True Range
    - Average True Range


In [119]:

# Time frame definitions
time_frames = {
    '1_year': 252,
    'half_year': 126,
    'quarter': 63,
    'month': 21,
    'week': 5,
}

# Create a copy of the DataFrame
data = data.copy()

# Loop through each time frame and calculate indicators
for period_name, window in time_frames.items():
    # Calculate Simple Moving Average (SMA)
    data[f'SMA_{period_name}'] = ta.trend.sma_indicator(data['Close'], window=window)

    # Calculate Exponential Moving Average (EMA)
    data[f'EMA_{period_name}'] = ta.trend.ema_indicator(data['Close'], window=window)

    # Calculate Double Exponential Moving Average (DEMA)
    data[f'DEMA_{period_name}'] = calculate_dema(data['Close'], window)

    # Calculate Kaufman's Adaptive Moving Average (KAMA)
    data[f'KAMA_{period_name}'] = ta.momentum.kama(data['Close'], window=window, pow1=2, pow2=30)

    # Calculate Parabolic SAR
    psar_result = pandas_ta.psar(data['High'], data['Low'], data['Close'], acceleration=0.02, maximum=0.2)
    data[f'Parabolic_SAR_{period_name}'] = psar_result[['PSARl_0.02_0.2', 'PSARs_0.02_0.2']].max(axis=1)

    # Momentum
    data[f'Momentum_{period_name}'] = data['Close'] - data['Close'].shift(window)

    # Average Directional Movement Index (ADX)
    data[f'ADX_{period_name}'] = ta.trend.adx(data['High'], data['Low'], data['Close'], window=window)

    # Price Oscillator - Absolute (PO)
    data[f'PO_{period_name}'] = ta.trend.sma_indicator(data['Close'], window=int(window/2)) - ta.trend.sma_indicator(data['Close'], window=window)

    # Commodity Channel Index (CCI)
    data[f'CCI_{period_name}'] = ta.trend.cci(data['High'], data['Low'], data['Close'], window=window)

    # Moving Average Convergence/Divergence (MACD)
    macd = ta.trend.MACD(data['Close'], window_slow=window, window_fast=int(window/2), window_sign=int(window/3))
    data[f'MACD_{period_name}'] = macd.macd()

    # Money Flow Index (MFI)
    data[f'MFI_{period_name}'] = ta.volume.money_flow_index(data['High'], data['Low'], data['Close'], data['Volume'], window=window)

    # Relative Strength Index (RSI)
    data[f'RSI_{period_name}'] = ta.momentum.rsi(data['Close'], window=window)
    
    # Average True Range (ATR)
    data[f'ATR_{period_name}'] = ta.volatility.average_true_range(data['High'], data['Low'], data['Close'], window=window)

# Calculate Chaikin A/D Line (AD)
data['AD'] = ta.volume.chaikin_money_flow(data['High'], data['Low'], data['Close'], data['Volume'])

# Calculate Chaikin Oscillator (manually)
ad_3_ema = data['AD'].ewm(span=3, adjust=False).mean()
ad_10_ema = data['AD'].ewm(span=10, adjust=False).mean()
data['Chaikin_Oscillator'] = ad_3_ema - ad_10_ema

# Calculate On Balance Volume (OBV)
data['OBV'] = ta.volume.on_balance_volume(data['Close'], data['Volume'])

# True Range (TR)
data[f'TR'] = ta.volatility.average_true_range(data['High'], data['Low'], data['Close'], window=1)

# Function to calculate DEMA
def calculate_dema(series, window):
    ema = series.ewm(span=window, adjust=False).mean()
    dema = 2 * ema - ema.ewm(span=window, adjust=False).mean()
    return dema

print(max(data.isnull().sum()))


252


In [121]:
# Drop 252 first rows:
data = data.iloc[252:]
target = target.iloc[252:]
# Show the DataFrame with the new indicators
print(data.isnull().sum())
# Drop specified columns
data = data.drop(columns=['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume'])
display(data)
display(target)

Open                  0
High                  0
Low                   0
Close                 0
Adj Close             0
                     ..
ATR_week              0
AD                    0
Chaikin_Oscillator    0
OBV                   0
TR                    0
Length: 75, dtype: int64


Unnamed: 0_level_0,SMA_1_year,EMA_1_year,DEMA_1_year,KAMA_1_year,Parabolic_SAR_1_year,Momentum_1_year,ADX_1_year,PO_1_year,CCI_1_year,MACD_1_year,...,PO_week,CCI_week,MACD_week,MFI_week,RSI_week,ATR_week,AD,Chaikin_Oscillator,OBV,TR
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2007-01-03,35.698413,36.615161,38.438105,42.369437,41.118918,7.500000,0.000000,1.627976,161.816317,1.467394,...,0.375000,60.344828,0.231210,54.863592,64.762697,0.716858,0.471781,0.087653,330380932,1.000000
2007-01-04,35.725198,36.651800,38.496683,42.357969,42.625000,6.750000,0.000000,1.668651,139.940654,1.480636,...,-0.187500,-72.829132,-0.086044,40.813966,47.226385,0.798486,0.435149,0.063314,319542336,1.125000
2007-01-05,35.750496,36.688149,38.554511,42.347045,42.625000,6.375000,0.000000,1.707837,132.066221,1.493382,...,-0.500000,-102.864583,-0.137424,39.910400,47.226385,0.788789,0.430396,0.046088,324090004,0.750000
2007-01-08,35.774306,36.723223,38.609627,42.335479,42.540000,6.000000,0.000000,1.754464,133.606470,1.504662,...,-0.387500,-55.555556,-0.159970,36.488377,43.541948,0.731031,0.393021,0.022960,318556432,0.500000
2007-01-09,35.794147,36.756044,38.660096,42.323131,42.458400,5.000000,0.000000,1.806052,128.923036,1.513528,...,-0.275000,-62.271062,-0.212765,12.640476,36.435518,0.709825,0.326614,-0.009719,310999708,0.625000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-12-21,210.058056,208.654590,218.080347,212.329459,234.350006,42.829987,4.610415,2.722420,224.858408,5.088828,...,0.324998,-56.052504,0.400925,71.864108,65.787038,2.625887,0.252343,-0.030339,2037614363,3.050003
2023-12-22,210.226746,208.838743,218.372686,212.636607,234.350006,42.509995,4.624064,2.708095,228.119706,5.191392,...,-0.230003,-18.311459,0.396750,65.507092,70.663020,2.580708,0.241794,-0.030015,2039721424,2.399994
2023-12-27,210.390079,209.022627,218.663632,212.932896,234.258006,41.160004,4.638218,2.675317,233.525061,5.292074,...,0.055000,25.193981,0.357659,83.825074,71.611335,2.394568,0.222358,-0.033339,2042331937,1.650009
2023-12-28,210.557421,209.203871,218.948474,213.229898,234.167846,42.169998,4.652316,2.641706,230.622393,5.388551,...,0.035002,17.856741,0.219489,66.563865,68.830175,2.205654,0.217139,-0.033328,2040246955,1.449997


Unnamed: 0_level_0,y
Date,Unnamed: 1_level_1
2007-01-03,0
2007-01-04,0
2007-01-05,0
2007-01-08,0
2007-01-09,0
...,...
2023-12-21,0
2023-12-22,1
2023-12-27,1
2023-12-28,0


In [131]:
data.to_csv("data.csv")
target.to_csv("target.csv")

## ML part

In [133]:
data = pd.read_csv("data.csv")
target = pd.read_csv("target.csv")
display(data)
display(target)

Unnamed: 0,Date,SMA_1_year,EMA_1_year,DEMA_1_year,KAMA_1_year,Parabolic_SAR_1_year,Momentum_1_year,ADX_1_year,PO_1_year,CCI_1_year,...,PO_week,CCI_week,MACD_week,MFI_week,RSI_week,ATR_week,AD,Chaikin_Oscillator,OBV,TR
0,2007-01-03,35.698413,36.615161,38.438105,42.369437,41.118918,7.500000,0.000000,1.627976,161.816317,...,0.375000,60.344828,0.231210,54.863592,64.762697,0.716858,0.471781,0.087653,330380932,1.000000
1,2007-01-04,35.725198,36.651800,38.496683,42.357969,42.625000,6.750000,0.000000,1.668651,139.940654,...,-0.187500,-72.829132,-0.086044,40.813966,47.226385,0.798486,0.435149,0.063314,319542336,1.125000
2,2007-01-05,35.750496,36.688149,38.554511,42.347045,42.625000,6.375000,0.000000,1.707837,132.066221,...,-0.500000,-102.864583,-0.137424,39.910400,47.226385,0.788789,0.430396,0.046088,324090004,0.750000
3,2007-01-08,35.774306,36.723223,38.609627,42.335479,42.540000,6.000000,0.000000,1.754464,133.606470,...,-0.387500,-55.555556,-0.159970,36.488377,43.541948,0.731031,0.393021,0.022960,318556432,0.500000
4,2007-01-09,35.794147,36.756044,38.660096,42.323131,42.458400,5.000000,0.000000,1.806052,128.923036,...,-0.275000,-62.271062,-0.212765,12.640476,36.435518,0.709825,0.326614,-0.009719,310999708,0.625000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4262,2023-12-21,210.058056,208.654590,218.080347,212.329459,234.350006,42.829987,4.610415,2.722420,224.858408,...,0.324998,-56.052504,0.400925,71.864108,65.787038,2.625887,0.252343,-0.030339,2037614363,3.050003
4263,2023-12-22,210.226746,208.838743,218.372686,212.636607,234.350006,42.509995,4.624064,2.708095,228.119706,...,-0.230003,-18.311459,0.396750,65.507092,70.663020,2.580708,0.241794,-0.030015,2039721424,2.399994
4264,2023-12-27,210.390079,209.022627,218.663632,212.932896,234.258006,41.160004,4.638218,2.675317,233.525061,...,0.055000,25.193981,0.357659,83.825074,71.611335,2.394568,0.222358,-0.033339,2042331937,1.650009
4265,2023-12-28,210.557421,209.203871,218.948474,213.229898,234.167846,42.169998,4.652316,2.641706,230.622393,...,0.035002,17.856741,0.219489,66.563865,68.830175,2.205654,0.217139,-0.033328,2040246955,1.449997


Unnamed: 0,Date,y
0,2007-01-03,0
1,2007-01-04,0
2,2007-01-05,0
3,2007-01-08,0
4,2007-01-09,0
...,...,...
4262,2023-12-21,0
4263,2023-12-22,1
4264,2023-12-27,1
4265,2023-12-28,0
