In [1]:
from sklearn.preprocessing import MinMaxScaler



In [2]:
import numpy as np
import pandas as pd

In [3]:
mm_scaler = MinMaxScaler().set_output(transform="pandas")

In [4]:
df_data = pd.read_csv("raw_data/AAPL.csv")

In [5]:
df_technical = pd.read_csv("raw_data/AAPL_technical_analysis.csv")

# Supporting functions

In [7]:
def rename_columns(df):
    columns = ["datetime", "open", "high", "low", "close", "volume"]
    df.columns = columns
    return df

In [8]:
def rename_technical(df_technical):
    df_technical = df_technical.rename(columns = {"Unnamed: 0": "datetime"})
    return df_technical

In [9]:
def convert_datetime(df):
    data = df.copy()
    data["datetime"] = pd.to_datetime(data["datetime"])
    return data

In [10]:
def merge_columns(df_values, df_technical):
    df_merged = df_values.merge(df_technical, how="inner", on="datetime")
    return df_merged

In [11]:
def clean_data(df):
    df_inter = df.interpolate(method='linear')
    df_clean = df_inter.dropna()
    return df_clean

In [12]:
def create_target(df, column_name, new_column_name='Target'):   
    result = []
    for i in range(len(df) - 1):
        if df[column_name].iloc[i+1] > df[column_name].iloc[i]:
            result.append(1)
        else:
            result.append(0)
    result.append(float('nan'))
    df[new_column_name] = pd.Series(result, index=df.index)
    return df

In [13]:
def target_drop(df_final):
    df_final = df_final.dropna()
    return df_final

In [14]:
def convert_time_sin_cos(df):
    data = df.copy()
    data['day_of_year'] = data['datetime'].dt.dayofyear
    data['time_of_day'] = data['datetime'].dt.hour * 3600 + data['datetime'].dt.minute * 60 + data['datetime'].dt.second
    data['day_of_year_norm'] = data['day_of_year'] / 365.0
    data['time_of_day_norm'] = data['time_of_day'] / 86400.0
    data['cos_time_of_day'] = np.cos(2 * np.pi * data['time_of_day_norm'])
    data['sin_time_of_day'] = np.sin(2 * np.pi * data['time_of_day_norm'])
    data["cos_day_of_year"] = np.cos(2 * np.pi * data['day_of_year_norm'])
    data["sin_day_of_year"] = np.sin(2 * np.pi * data['day_of_year_norm'])
    df_converted = data.drop(columns = ["day_of_year", "time_of_day", "day_of_year_norm", "time_of_day_norm", "datetime"])
    return df_converted

# Final function that prepares all the data

In [15]:
def create_x_y(df_data, df_technical):
    df_technical = rename_technical(df_technical)
    df_data = rename_columns(df_data)
    df_technical = clean_data(df_technical)
    df_data = convert_datetime(df_data)
    df_technical = convert_datetime(df_technical)
    df_merged = merge_columns(df_data, df_technical)
    df_target = create_target(df_merged, "close")
    df_final = target_drop(df_target)
    df_time = convert_time_sin_cos(df_final)
    X = df_time.drop(columns = "Target")
    y = df_time[["Target"]]
    return X, y

In [16]:
X, y = create_x_y(df_data, df_technical)

In [19]:
X_scaled = mm_scaler.fit_transform(X)

In [20]:
X_scaled

Unnamed: 0,open,high,low,close,volume,EMA,SlowK,SlowD,Real Upper Band,Real Middle Band,...,MACD,MACD_Signal,MACD_Hist,RSI,OBV,SAR,cos_time_of_day,sin_time_of_day,cos_day_of_year,sin_day_of_year
0,0.000051,0.000009,0.000087,0.000056,0.000273,0.000000,0.148657,0.195115,0.000019,1.545235e-06,...,0.491652,0.503975,0.444396,0.416247,0.647608,0.000001,0.377014,0.017037,0.998149,0.542983
1,0.000056,0.000014,0.000092,0.000067,0.000378,0.000001,0.227568,0.164396,0.000016,5.150784e-07,...,0.491635,0.503955,0.444396,0.421265,0.647643,0.000003,0.442272,0.004278,0.998149,0.542983
2,0.000067,0.000024,0.000108,0.000072,0.000038,0.000002,0.417105,0.253745,0.000015,0.000000e+00,...,0.491635,0.503955,0.444396,0.431302,0.647646,0.000005,0.508666,0.000000,0.998149,0.542983
3,0.000097,0.000047,0.000113,0.000087,0.001292,0.000005,0.590537,0.403232,0.000015,5.150784e-07,...,0.491723,0.503955,0.444615,0.460587,0.647767,0.000006,0.254333,0.933013,0.997335,0.551551
4,0.000082,0.000033,0.000092,0.000072,0.001745,0.000006,0.596173,0.527931,0.000018,2.060314e-06,...,0.491740,0.503975,0.444615,0.446362,0.647604,0.000008,0.199010,0.896677,0.997335,0.551551
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
139304,0.956441,0.892865,0.957644,0.956451,0.000102,0.954641,0.258300,0.281209,0.963106,9.553942e-01,...,0.533962,0.568004,0.410340,0.650161,0.561698,0.871010,0.377014,0.017037,0.247506,0.068425
139305,0.956236,0.884583,0.957849,0.956533,0.000101,0.954867,0.251657,0.230747,0.963143,9.557007e-01,...,0.529476,0.563837,0.408233,0.647430,0.561707,0.872978,0.442272,0.004278,0.247506,0.068425
139306,0.956441,0.884726,0.958260,0.956379,0.000025,0.955092,0.208875,0.228543,0.963163,9.560087e-01,...,0.526098,0.559731,0.408760,0.649204,0.561705,0.874769,0.508666,0.000000,0.247506,0.068425
139307,0.956544,0.884963,0.958774,0.956123,0.000019,0.955313,0.173380,0.199814,0.963028,9.563626e-01,...,0.523319,0.555806,0.410384,0.650104,0.561703,0.876398,0.575060,0.004278,0.247506,0.068425
