In [199]:
import pandas as pd
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

class Generator():
    def __init__(self):
        pass

    def SMA(self, data, windows):
        res = data.rolling(window = windows).mean()
        return res

    def EMA(self, data, windows):
        res = data.ewm(span = windows).mean()
        return res

    def MACD(self, data, long, short, windows):
        short_ = data.ewm(span = short).mean()
        long_ = data.ewm(span = long).mean()
        macd_ = short_ - long_
        res = macd_.ewm(span = windows).mean()
        return res

    def RSI(self, data, windows):
        delta = data.diff(1)
        up = delta.copy()
        down = delta.copy()
        up[up < 0] = 0
        down[down > 0] = 0
        avg_up = up.rolling(window = windows).mean()
        avg_down = down.rolling(window = windows).mean()
        rs = avg_up/ avg_down
        rsi = 100. -(100./ (1. + rs))
        return rsi

    def atr(self, data_high, data_low, windows):
        range_ = data_high - data_low
        res = range_.rolling(window = windows).mean()
        return res

    def bollinger_band(self, data, windows):
        sma = data.rolling(window = windows).mean()
        std = data.rolling(window = windows).std()
        upper = sma + 2 * std
        lower = sma - 2 * std
        return upper, lower

    def rsv(self, data, windows):
        min_ = data.rolling(window = windows).min()
        max_ = data.rolling(window = windows).max()
        res = (data - min_)/ (max_ - min_) * 100
        return res

In [200]:
data = pd.read_csv("stock_data.csv")

In [201]:
# no need to normalize dividents, stock splits (since they are already normalized)
columns = ["Open", "High", "Low", "Close", "Volume", "Dividends", "Stock Splits"] 
data_subset = data[columns]

data_subset.head()

Unnamed: 0,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,88.7005,89.454498,88.011002,88.0345,53402000,0.0,0.0
1,88.175003,88.175003,87.0,87.024002,56476000,0.0,0.0
2,87.559998,87.720001,87.0065,87.580002,62348000,0.0,0.0
3,87.532997,88.344498,87.280502,87.475502,48856000,0.0,0.0
4,87.370003,87.533501,86.75,86.960503,50286000,0.0,0.0


In [202]:
# check for missing values
data.isnull().any()

Open            False
High            False
Low             False
Close           False
Volume          False
Dividends       False
Stock Splits    False
dtype: bool

In [203]:
# Initialize Technical Indicator Generator
Generator = Generator()

# Add Percentage and Logarithmic Changes
data['pct_change'] = data['Close'].pct_change()
data['log_change'] = np.log(data['Close'] / data['Close'].shift(1))

# Technical Indicators
data['7ma'] = Generator.EMA(data['Close'], 7)
data['14ma'] = Generator.EMA(data['Close'], 14)
data['21ma'] = Generator.EMA(data['Close'], 21)
data['7macd'] = Generator.MACD(data['Close'], 3, 11, 7)
data['14macd'] = Generator.MACD(data['Close'], 7, 21, 14)
data['7rsi'] = Generator.RSI(data['Close'], 7)
data['14rsi'] = Generator.RSI(data['Close'], 14)
data['21rsi'] = Generator.RSI(data['Close'], 21)
data['7atr'] = Generator.atr(data['High'], data['Low'], 7)
data['14atr'] = Generator.atr(data['High'], data['Low'], 14)
data['21atr'] = Generator.atr(data['High'], data['Low'], 21)
data['7upper'], data['7lower'] = Generator.bollinger_band(data['Close'], 7)
data['14upper'], data['14lower'] = Generator.bollinger_band(data['Close'], 14)
data['21upper'], data['21lower'] = Generator.bollinger_band(data['Close'], 21)

# Normalize Selected Columnse 
columns_to_normalize = ['Open', 'High', 'Low', 'Close', 'Volume', 'pct_change', 'log_change',
                        '7ma', '14ma', '21ma', '7macd', '14macd', '7rsi', '14rsi', '21rsi',
                        '7atr', '14atr', '21atr', '7upper', '7lower', '14upper', '14lower', '21upper', '21lower']

scaler = MinMaxScaler(feature_range=(0, 1))
data[columns_to_normalize] = scaler.fit_transform(data[columns_to_normalize])

In [204]:
# # normalize the data
# scaler = MinMaxScaler(feature_range=(0,1))
# scaled_data = scaler.fit_transform(data_subset)

# scaled_data

# Fourier Transform Features
close_fft = np.fft.fft(data['Close'].values)
fft_df = pd.DataFrame({'fft': close_fft})
fft_df['absolute'] = fft_df['fft'].apply(np.abs)
fft_df['angle'] = fft_df['fft'].apply(np.angle)

# Retain important components
for num_components in [3, 6, 9, 27, 81]:
    fft_filtered = np.copy(close_fft)
    fft_filtered[num_components:-num_components] = 0  # Zero out less important components
    data[f'FT_{num_components}components'] = np.fft.ifft(fft_filtered).real

# %% Drop Rows with NaNs
data = data.dropna()

data.index.name = 'Date'  

# %% Save Preprocessed Data
data.to_csv("preprocessed_stock_data.csv")

In [205]:
# Convert numpy array to pandas DataFrame
# df_scaled = pd.DataFrame(scaled_data, columns=columns)

# # Save DataFrame to CSV file
# df_scaled.to_csv('normalized_stock_data.csv', index=False)