# Tesla Stock Price Prediction - SYS5185 Project
Aahil Jivani (8729441)

Alex Rodriguez (300452235)

In [None]:
# change the numpy version to ensure compatibility with pandas_ta library
!pip install numpy==1.23.5



In [None]:
# Install required packages if needed
!pip install yfinance pandas_ta scikit-learn shap

import numpy as np
import pandas as pd
import yfinance as yf
import pandas_ta as ta
import matplotlib.pyplot as plt

from sklearn.ensemble import RandomForestRegressor
from sklearn.feature_selection import RFE
from sklearn.preprocessing import MinMaxScaler


%matplotlib inline

Collecting pandas_ta
  Using cached pandas_ta-0.3.14b.tar.gz (115 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pandas_ta
  Building wheel for pandas_ta (setup.py) ... [?25l[?25hdone
  Created wheel for pandas_ta: filename=pandas_ta-0.3.14b0-py3-none-any.whl size=218909 sha256=b4b557678baf8a37853ca5d27c54b39f3286d7be58e634ca9b8bf27a80068a71
  Stored in directory: /root/.cache/pip/wheels/7f/33/8b/50b245c5c65433cd8f5cb24ac15d97e5a3db2d41a8b6ae957d
Successfully built pandas_ta
Installing collected packages: pandas_ta
Successfully installed pandas_ta-0.3.14b0


In [None]:
# Download Tesla historical data from yfinance
ticker = "TSLA"
start_date = "2018-01-01"
end_date = "2025-01-01"  # Adjust as needed

df = yf.download(ticker, start=start_date, end=end_date)
df.reset_index(inplace=True)
# reset column names and formatting  to single string
df.columns = ['date', 'close', 'high', 'low', 'open', 'volume']
df.columns = [col.strip().lower() for col in df.columns]
df.head()

YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  1 of 1 completed


Unnamed: 0,date,close,high,low,open,volume
0,2018-01-02,21.368668,21.474001,20.733334,20.799999,65283000
1,2018-01-03,21.15,21.683332,21.036667,21.4,67822500
2,2018-01-04,20.974667,21.236668,20.378668,20.858,149194500
3,2018-01-05,21.105333,21.149332,20.799999,21.108,68868000
4,2018-01-08,22.427334,22.468,21.033333,21.066668,147891000


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1761 entries, 0 to 1760
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   date    1761 non-null   datetime64[ns]
 1   close   1761 non-null   float64       
 2   high    1761 non-null   float64       
 3   low     1761 non-null   float64       
 4   open    1761 non-null   float64       
 5   volume  1761 non-null   int64         
dtypes: datetime64[ns](1), float64(4), int64(1)
memory usage: 82.7 KB


# 1 Data Preprocessing

## 1.1 Handling Missing Data

In [None]:
df.isnull().sum()

Unnamed: 0,0
date,0
close,0
high,0
low,0
open,0
volume,0


No missing values so no action for this step.

## 1.2 Data Cleaning

## 1.3 Feature Engineering

In [None]:
# Generate technical indicators using pandas_ta
# Produce a wide set of candidate features - 38 features

# 1. Momentum Indicators
df['rsi_14'] = ta.rsi(df['close'], length=14)
df['rsi_7']  = ta.rsi(df['close'], length=7)

# 2. MACD (adds three columns: MACD, MACD_signal, MACD_hist)
df.ta.macd(fast=12, slow=26, signal=9, append=True)

# 3. Simple Moving Averages (SMA)
df['sma_10']  = ta.sma(df['close'], length=10)
df['sma_20']  = ta.sma(df['close'], length=20)
df['sma_50']  = ta.sma(df['close'], length=50)
df['sma_100'] = ta.sma(df['close'], length=100)
df['sma_200'] = ta.sma(df['close'], length=200)

# 4. Exponential Moving Averages (EMA)
df['ema_10']  = ta.ema(df['close'], length=10)
df['ema_20']  = ta.ema(df['close'], length=20)
df['ema_50']  = ta.ema(df['close'], length=50)
df['ema_100'] = ta.ema(df['close'], length=100)

# 5. Bollinger Bands (appends multiple columns: lower, middle, upper, etc.)
df.ta.bbands(close='close', length=20, std=2, append=True)

# 6. Volatility: ATR (Average True Range)
df['atr_14'] = ta.atr(df['high'], df['low'], df['close'], length=14)

# 7. Commodity Channel Index (CCI)
df['cci_20'] = ta.cci(df['high'], df['low'], df['close'], length=20)

# 8. Stochastic Oscillator (adds STOCHk and STOCHd columns)
df.ta.stoch(high='high', low='low', close='close', k=14, d=3, append=True)

# 9. Rate of Change (ROC)
df['roc_10'] = ta.roc(df['close'], length=10)

# 10. Trend Strength: ADX and Directional Indicators
# The adx() method appends the columns "ADX_14", "DMP_14", and "DMN_14"
df.ta.adx(length=14, append=True)

# 11. Volume-Based Indicator: OBV (On Balance Volume)
df['obv'] = ta.obv(df['close'], df['volume'])

# 12. Oscillator: Williams %R
df['willr'] = ta.willr(df['high'], df['low'], df['close'], length=14)

# 13. Aroon Indicator (adds AROONup, AROONdown, and AROONosc)
df.ta.aroon(high='high', low='low', length=14, append=True)

Unnamed: 0,AROOND_14,AROONU_14,AROONOSC_14
0,,,
1,,,
2,,,
3,,,
4,,,
...,...,...,...
1756,0.0,71.428571,71.428571
1757,0.0,64.285714,64.285714
1758,0.0,57.142857,57.142857
1759,0.0,50.000000,50.000000


In [None]:
df.head()

Unnamed: 0,date,close,high,low,open,volume,rsi_14,rsi_7,MACD_12_26_9,MACDh_12_26_9,...,STOCHd_14_3_3,roc_10,ADX_14,DMP_14,DMN_14,obv,willr,AROOND_14,AROONU_14,AROONOSC_14
0,2018-01-02,21.368668,21.474001,20.733334,20.799999,65283000,,,,,...,,,,,,65283000.0,,,,
1,2018-01-03,21.15,21.683332,21.036667,21.4,67822500,,,,,...,,,,,,-2539500.0,,,,
2,2018-01-04,20.974667,21.236668,20.378668,20.858,149194500,,,,,...,,,,,,-151734000.0,,,,
3,2018-01-05,21.105333,21.149332,20.799999,21.108,68868000,,,,,...,,,,,,-82866000.0,,,,
4,2018-01-08,22.427334,22.468,21.033333,21.066668,147891000,,,,,...,,,,,,65025000.0,,,,




## Export data set

In [None]:
df.to_csv('tsla_2018_2025.csv', index=False)