# cs setting features for the v.00 (big balz) model

In [1]:
# import main libs
import numpy as np
import pandas as pd

In [2]:
# df = pd.read_csv(r'../tsla_stock_data.csv').drop([0,1]) # 1 month of data
# df = pd.read_csv(r'../tsla_stock_data_1y.csv').drop([0,1]) # 1 year of data
df = pd.read_csv(r'../tsla_stock_data_2y.csv').drop([0,1]) # 2 years of data

In [3]:
df.columns = ['Date', 'Close', 'High', 'Low', 'Open', 'Volume']

In [4]:
# Convert Date column to datetime if not already
df['Date'] = pd.to_datetime(df['Date']).dt.tz_localize(None)
numeric_cols = ['Open', 'High', 'Low', 'Close', 'Volume']
df[numeric_cols] = df[numeric_cols].apply(pd.to_numeric, errors='coerce')

In [5]:
df = df.sort_values(by="Date")

In [6]:
# 1️⃣ Daily Return
df['Daily_Return'] = (df['Close'] - df['Open']) / df['Open']

In [7]:
# 2️⃣ Price Momentum (Close Price Difference)
df['Price_Momentum'] = df['Close'].diff()

In [8]:
# 3️⃣ Volatility (High - Low range)
df['Volatility'] = (df['High'] - df['Low']) / df['Close']

In [9]:
# 4️⃣ Percentage Change from Previous Close
df['Pct_Change'] = df['Close'].pct_change() * 100

In [10]:
# 5️⃣ Simple Moving Averages (SMA)
df['SMA_5'] = df['Close'].rolling(window=5).mean()
df['SMA_10'] = df['Close'].rolling(window=10).mean()

In [11]:
# 6️⃣ Exponential Moving Averages (EMA)
df['EMA_5'] = df['Close'].ewm(span=5, adjust=False).mean()
df['EMA_10'] = df['Close'].ewm(span=10, adjust=False).mean()

In [12]:
# 7️⃣ Volume Change
df['Volume_Change'] = df['Volume'].diff()

In [13]:
# 8️⃣ Relative Strength Index (RSI)
def compute_rsi(series, period=14):
    delta = series.diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=period).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=period).mean()
    rs = gain / loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

df['RSI_14'] = compute_rsi(df['Close'])

In [14]:
# Drop NaN values created by rolling calculations
df = df.dropna()

In [15]:
# Time-Based Features
# ----------------------------
df['Hour'] = df['Date'].dt.hour
df['DayOfWeek'] = df['Date'].dt.dayofweek  # Monday=0, Sunday=6

In [16]:
# MACD: Moving Average Convergence Divergence
# ----------------------------
ema12 = df['Close'].ewm(span=12, adjust=False).mean()
ema26 = df['Close'].ewm(span=26, adjust=False).mean()
df['MACD'] = ema12 - ema26
df['MACD_Signal'] = df['MACD'].ewm(span=9, adjust=False).mean()

In [17]:
# Bollinger Bands (using a 20-period SMA)
# ----------------------------
df['SMA20'] = df['Close'].rolling(window=20).mean()
df['STD20'] = df['Close'].rolling(window=20).std()
df['Bollinger_Upper'] = df['SMA20'] + (2 * df['STD20'])
df['Bollinger_Lower'] = df['SMA20'] - (2 * df['STD20'])

In [18]:
# ATR: Average True Range (using a 14-period rolling mean)
# ----------------------------
# Calculate previous close
df['Previous_Close'] = df['Close'].shift(1)
# True Range (TR) is the maximum of:
#   a) High - Low,
#   b) abs(High - Previous Close),
#   c) abs(Low - Previous Close)
df['High_Low'] = df['High'] - df['Low']
df['High_PC'] = (df['High'] - df['Previous_Close']).abs()
df['Low_PC'] = (df['Low'] - df['Previous_Close']).abs()
df['TR'] = df[['High_Low', 'High_PC', 'Low_PC']].max(axis=1)
df['ATR'] = df['TR'].rolling(window=14).mean()

In [19]:
# OBV: On-Balance Volume

# Initialize OBV starting at zero
obv = [0]
for i in range(1, len(df)):
    if df['Close'].iloc[i] > df['Close'].iloc[i-1]:
        obv.append(obv[-1] + df['Volume'].iloc[i])
    elif df['Close'].iloc[i] < df['Close'].iloc[i-1]:
        obv.append(obv[-1] - df['Volume'].iloc[i])
    else:
        obv.append(obv[-1])
df['OBV'] = obv

In [20]:
# Stochastic Oscillator
# ----------------------------
# Use a 14-period window
period = 14
df['Lowest_Low'] = df['Low'].rolling(window=period).min()
df['Highest_High'] = df['High'].rolling(window=period).max()
df['Stochastic_%K'] = ((df['Close'] - df['Lowest_Low']) / (df['Highest_High'] - df['Lowest_Low'])) * 100
# %D is a 3-period SMA of %K
df['Stochastic_%D'] = df['Stochastic_%K'].rolling(window=3).mean()

In [21]:
# Create lagged features for the closing price from t-9 to t-1
for lag in range(1, 10):  # This creates columns: Lag_Close_1, ..., Lag_Close_9
    df[f'Lag_Close_{lag}'] = df['Close'].shift(lag)

In [22]:
# Cleanup: Drop intermediate columns and any resulting NaNs
# ----------------------------
cols_to_drop = ['Previous_Close', 'High_Low', 'High_PC', 'Low_PC', 'TR', 'STD20', 'Lowest_Low', 'Highest_High']
df.drop(columns=cols_to_drop, inplace=True)
df.dropna(inplace=True)

In [23]:
# df.to_csv("tsla_stock_data_with_features.csv", index=False)
# df.to_csv("tsla_stock_data_with_features_1y.csv", index=False)
df.to_csv("tsla_stock_data_with_features_2y.csv", index=False)

In [24]:
df.columns

Index(['Date', 'Close', 'High', 'Low', 'Open', 'Volume', 'Daily_Return',
       'Price_Momentum', 'Volatility', 'Pct_Change', 'SMA_5', 'SMA_10',
       'EMA_5', 'EMA_10', 'Volume_Change', 'RSI_14', 'Hour', 'DayOfWeek',
       'MACD', 'MACD_Signal', 'SMA20', 'Bollinger_Upper', 'Bollinger_Lower',
       'ATR', 'OBV', 'Stochastic_%K', 'Stochastic_%D', 'Lag_Close_1',
       'Lag_Close_2', 'Lag_Close_3', 'Lag_Close_4', 'Lag_Close_5',
       'Lag_Close_6', 'Lag_Close_7', 'Lag_Close_8', 'Lag_Close_9'],
      dtype='object')

In [25]:
df.head()

Unnamed: 0,Date,Close,High,Low,Open,Volume,Daily_Return,Price_Momentum,Volatility,Pct_Change,...,Stochastic_%D,Lag_Close_1,Lag_Close_2,Lag_Close_3,Lag_Close_4,Lag_Close_5,Lag_Close_6,Lag_Close_7,Lag_Close_8,Lag_Close_9
34,2023-02-27 18:30:00,209.119995,209.419998,206.559998,207.169998,19739423,0.009413,1.9599,0.013676,0.94608,...,90.881423,207.160095,205.339996,205.554794,207.029999,196.862,196.199997,195.729904,195.522003,195.925003
35,2023-02-27 19:30:00,208.068695,209.179993,207.240005,209.125,18528196,-0.005051,-1.0513,0.009324,-0.502726,...,94.39692,209.119995,207.160095,205.339996,205.554794,207.029999,196.862,196.199997,195.729904,195.522003
36,2023-02-27 20:30:00,207.660004,208.139999,206.350006,208.067001,12161446,-0.001956,-0.408691,0.00862,-0.196421,...,93.15824,208.068695,209.119995,207.160095,205.339996,205.554794,207.029999,196.862,196.199997,195.729904
37,2023-02-28 14:30:00,205.132095,211.229996,204.820007,210.589996,46960347,-0.025917,-2.527908,0.031248,-1.21733,...,82.24359,207.660004,208.068695,209.119995,207.160095,205.339996,205.554794,207.029999,196.862,196.199997
38,2023-02-28 15:30:00,205.275696,206.330002,203.75,205.119995,24086375,0.000759,0.1436,0.012568,0.070004,...,73.434533,205.132095,207.660004,208.068695,209.119995,207.160095,205.339996,205.554794,207.029999,196.862
