# Indicadores


Obtención de indicadores a partir de las series temporales para la construcción del esapcio de observaciones.

# Indicadores técnicos
* Indicadores de momento: intentan capturar la tasa de cambio -> Relative strenght index (RSI), Stochastic Oscillator (SO)
* Indicadores de tendencia: capturan la dirección -> Mean Average (MA), Exponential Mean Average (EMA), Moving Average Convergence/divergence (MACD)
* Indicadores de volumen: capturan informacion acerca del volumen de transacción -> Positive indicator volume (IPVI), Indicator Negative Volume (INVI), Williams Overbought/Oversold Index, Volatility Volume Ratio (VVR), and Volume Ratio (VR)
* Indicadores de volatilidad: capturan la variación del precio -> Average True Range (ATR), Bollinguer bands


In [1]:
import sys
import os
import pandas as pd

# Añadir el directorio raíz del proyecto al PYTHONPATH
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
sys.path.append(project_root)

from helpers import *

In [2]:
# Importar datos
#SPY
spy_data = pd.read_parquet(r'C:\Users\adelapuente\Desktop\math_tfm\00_api_data\SPY_all.parquet')
spy_dollar_imb = pd.read_parquet(r'C:\Users\adelapuente\Desktop\math_tfm\01_imbalance_bars\SPY_dollar_imbalance.parquet')
spy_volume_imb = pd.read_parquet(r'C:\Users\adelapuente\Desktop\math_tfm\01_imbalance_bars\SPY_volume_imbalance.parquet')


#BTC
btc_data = pd.read_parquet(r'C:\Users\adelapuente\Desktop\math_tfm\00_api_data\BTC_all.parquet')
btc_dollar_imb = pd.read_parquet(r'C:\Users\adelapuente\Desktop\math_tfm\01_imbalance_bars\BTC_dollar_imbalance.parquet')
btc_volume_imb = pd.read_parquet(r'C:\Users\adelapuente\Desktop\math_tfm\01_imbalance_bars\BTC_volume_imbalance.parquet')


btc_data['date'] = pd.to_datetime(btc_data['date'])
spy_data['date'] = pd.to_datetime(spy_data['date'])

In [3]:
spy_data.drop(['log_returns', 'volatility', 'volume'], axis=1, inplace=True)
btc_data.drop(['log_returns', 'volatility', 'volume'], axis=1, inplace=True)

spy_dollar_imb.drop(['log_returns'], axis=1, inplace=True)
btc_dollar_imb.drop(['log_returns'], axis=1, inplace=True)

spy_volume_imb.drop(['log_returns'], axis=1, inplace=True)
btc_volume_imb.drop(['log_returns'], axis=1, inplace=True)

### Datos originales

In [4]:
df_btc_with_indicators = calculate_indicators(btc_data)
df_spy_with_indicators = calculate_indicators(spy_data)

In [5]:
btc_close = df_btc_with_indicators['close']
spy_close = df_spy_with_indicators['close']
df_btc_with_indicators = normalize_data(df_btc_with_indicators)
df_spy_with_indicators = normalize_data(df_spy_with_indicators)

In [6]:
# esto es necesario para que en la creación del entorno las considere como variables observadas. (me quedo los precios)
# tambien necesito la columna close ( y feature_close )

columns_to_rename = [col for col in df_btc_with_indicators.columns if col not in ['date']] # ['date', 'open', 'close', 'high', 'low']

df_btc_with_indicators.rename(columns={col: 'feature_' + col for col in columns_to_rename}, inplace=True)
df_btc_with_indicators = df_btc_with_indicators.join(btc_close)

columns_to_rename = [col for col in df_spy_with_indicators.columns if col not in ['date']] # ['date', 'open', 'close', 'high', 'low']

df_spy_with_indicators.rename(columns={col: 'feature_' + col for col in columns_to_rename}, inplace=True)
df_spy_with_indicators = df_spy_with_indicators.join(spy_close)

In [8]:
df_btc_with_indicators_in_sample, df_btc_with_indicators_out_of_sample = filter_by_date(df_btc_with_indicators, '2024-01-01')
df_spy_with_indicators_in_sample, df_spy_with_indicators_out_of_sample = filter_by_date(df_spy_with_indicators, '2024-01-01')

In [9]:
df_btc_with_indicators_in_sample.to_parquet('BTC_original_processed_in_sample.parquet')
df_spy_with_indicators_in_sample.to_parquet('SPY_original_processed_in_sample.parquet')

df_btc_with_indicators_out_of_sample.to_parquet('BTC_original_processed_out_of_sample.parquet')
df_spy_with_indicators_out_of_sample.to_parquet('SPY_original_processed_out_of_sample.parquet')

In [7]:
# DATOS SIN OHLC
df_btc_with_indicators_sin_ohlc = df_btc_with_indicators.drop(['feature_open', 'feature_low', 'feature_high', 'feature_open'], axis = 1)
df_spy_with_indicators_sin_ohlc = df_spy_with_indicators.drop(['feature_open', 'feature_low', 'feature_high', 'feature_open'], axis = 1)

In [8]:
df_btc_with_indicators_in_sample_sin_ohlc, df_btc_with_indicators_out_of_sample_sin_ohlc = filter_by_date(df_btc_with_indicators_sin_ohlc, '2024-01-01')
df_spy_with_indicators_in_sample_sin_ohlc, df_spy_with_indicators_out_of_sample_sin_ohlc = filter_by_date(df_spy_with_indicators_sin_ohlc, '2024-01-01')

In [9]:
df_btc_with_indicators_in_sample_sin_ohlc.to_parquet('BTC_original_processed_in_sample_sin_ohlc.parquet')
df_spy_with_indicators_in_sample_sin_ohlc.to_parquet('SPY_original_processed_in_sample_sin_ohlc.parquet')

df_btc_with_indicators_out_of_sample_sin_ohlc.to_parquet('BTC_original_processed_out_of_sample_sin_ohlc.parquet')
df_spy_with_indicators_out_of_sample_sin_ohlc.to_parquet('SPY_original_processed_out_of_sample_sin_ohlc.parquet')

### Datos de Volumen

In [18]:
df_btc_volume_with_indicators = calculate_indicators(btc_volume_imb)
df_spy_volume_with_indicators = calculate_indicators(spy_volume_imb)

In [19]:
btc_close_volume = df_btc_volume_with_indicators['close']
spy_close_volume = df_spy_volume_with_indicators['close']
df_btc_volume_with_indicators = normalize_data(df_btc_volume_with_indicators)
df_spy_volume_with_indicators = normalize_data(df_spy_volume_with_indicators)

In [20]:
columns_to_rename = [col for col in df_btc_volume_with_indicators.columns if col not in ['date']] # ['date', 'open', 'close', 'high', 'low']
df_btc_volume_with_indicators.rename(columns={col: 'feature_' + col for col in columns_to_rename}, inplace=True)
df_btc_volume_with_indicators = df_btc_volume_with_indicators.join(btc_close_volume)

columns_to_rename = [col for col in df_spy_volume_with_indicators.columns if col not in ['date']] # ['date', 'open', 'close', 'high', 'low']
df_spy_volume_with_indicators.rename(columns={col: 'feature_' + col for col in columns_to_rename}, inplace=True)
df_spy_volume_with_indicators = df_spy_volume_with_indicators.join(spy_close_volume)

In [21]:
df_btc_volume_with_indicators_in_sample, df_btc_volume_with_indicators_out_of_sample = filter_by_date(df_btc_volume_with_indicators, '2024-01-01')
df_spy_volume_with_indicators_in_sample, df_spy_volume_with_indicators_out_of_sample = filter_by_date(df_spy_volume_with_indicators, '2024-01-01')

In [22]:
df_btc_volume_with_indicators_in_sample.to_parquet('BTC_volume_processed_in_sample.parquet')
df_spy_volume_with_indicators_in_sample.to_parquet('SPY_volume_processed_in_sample.parquet')

df_btc_volume_with_indicators_out_of_sample.to_parquet('BTC_volume_processed_out_of_sample.parquet')
df_spy_volume_with_indicators_out_of_sample.to_parquet('SPY_volume_processed_out_of_sample.parquet')

In [23]:
df_btc_volume_with_indicators_sin_ohlc = df_btc_volume_with_indicators.drop(['feature_open', 'feature_low', 'feature_high', 'feature_open'], axis = 1)
df_spy_volume_with_indicatorss_sin_ohlc = df_spy_volume_with_indicators.drop(['feature_open', 'feature_low', 'feature_high', 'feature_open'], axis = 1)

In [24]:
df_btc_volume_with_indicators_in_sample_sin_ohlc, df_btc_volume_with_indicators_out_of_sample_sin_ohlc = filter_by_date(df_btc_volume_with_indicators_sin_ohlc, '2024-01-01')
df_spy_volume_with_indicators_in_sample_sin_ohlc, df_spy_volume_with_indicators_out_of_sample_sin_ohlc = filter_by_date(df_spy_volume_with_indicatorss_sin_ohlc, '2024-01-01')

In [25]:
df_btc_volume_with_indicators_in_sample_sin_ohlc.to_parquet('BTC_volume_processed_in_sample_sin_ohlc.parquet')
df_spy_volume_with_indicators_in_sample_sin_ohlc.to_parquet('SPY_volume_processed_in_sample_sin_ohlc.parquet')

df_btc_volume_with_indicators_out_of_sample_sin_ohlc.to_parquet('BTC_volume_processed_out_of_sample_sin_ohlc.parquet')
df_spy_volume_with_indicators_out_of_sample_sin_ohlc.to_parquet('SPY_volume_processed_out_of_sample_sin_ohlc.parquet')

### Datos de Dollar

In [26]:
df_btc_dollar_with_indicators = calculate_indicators(btc_dollar_imb)
df_spy_dollar_with_indicators = calculate_indicators(spy_dollar_imb)

In [27]:
btc_close_dollar = df_btc_dollar_with_indicators['close']
spy_close_dollar = df_spy_dollar_with_indicators['close']
df_btc_dollar_with_indicators = normalize_data(df_btc_dollar_with_indicators)
df_spy_dollar_with_indicators = normalize_data(df_spy_dollar_with_indicators)

In [28]:
columns_to_rename = [col for col in df_btc_dollar_with_indicators.columns if col not in ['date']] # ['date', 'open', 'close', 'high', 'low']
df_btc_dollar_with_indicators.rename(columns={col: 'feature_' + col for col in columns_to_rename}, inplace=True)
df_btc_dollar_with_indicators = df_btc_dollar_with_indicators.join(btc_close_dollar)

columns_to_rename = [col for col in df_spy_dollar_with_indicators.columns if col not in ['date']] # ['date', 'open', 'close', 'high', 'low']
df_spy_dollar_with_indicators.rename(columns={col: 'feature_' + col for col in columns_to_rename}, inplace=True)
df_spy_dollar_with_indicators = df_spy_dollar_with_indicators.join(spy_close_dollar)

In [29]:
df_btc_dollar_with_indicators_in_sample, df_btc_dollar_with_indicators_out_of_sample = filter_by_date(df_btc_dollar_with_indicators, '2024-01-01')
df_spy_dollar_with_indicators_in_sample, df_spy_dollar_with_indicators_out_of_sample = filter_by_date(df_spy_dollar_with_indicators, '2024-01-01')

In [30]:
df_btc_dollar_with_indicators_in_sample.to_parquet('BTC_dollar_processed_in_sample.parquet')
df_spy_dollar_with_indicators_in_sample.to_parquet('SPY_dollar_processed_in_sample.parquet')

df_btc_dollar_with_indicators_out_of_sample.to_parquet('BTC_dollar_processed_out_of_sample.parquet')
df_spy_dollar_with_indicators_out_of_sample.to_parquet('SPY_dollar_processed_out_of_sample.parquet')

In [31]:
df_btc_dollar_with_indicators_sin_ohlc = df_btc_with_indicators.drop(['feature_open', 'feature_low', 'feature_high', 'feature_open'], axis = 1)
df_spy_dollar_with_indicators_sin_ohlc = df_spy_with_indicators.drop(['feature_open', 'feature_low', 'feature_high', 'feature_open'], axis = 1)

In [32]:
df_btc_dollar_with_indicators_in_sample_sin_ohlc, df_btc_dollar_with_indicators_out_of_sample_sin_ohlc = filter_by_date(df_btc_dollar_with_indicators_sin_ohlc, '2024-01-01')
df_spy_dollar_with_indicators_in_sample_sin_ohlc, df_spy_dollar_with_indicators_out_of_sample_sin_ohlc = filter_by_date(df_spy_dollar_with_indicators_sin_ohlc, '2024-01-01')

In [33]:
df_btc_dollar_with_indicators_in_sample_sin_ohlc.to_parquet('BTC_dollar_processed_in_sample_sin_ohlc.parquet')
df_spy_dollar_with_indicators_in_sample_sin_ohlc.to_parquet('SPY_dollar_processed_in_sample_sin_ohlc.parquet')

df_btc_dollar_with_indicators_out_of_sample_sin_ohlc.to_parquet('BTC_dollar_processed_out_of_sample_sin_ohlc.parquet')
df_spy_dollar_with_indicators_out_of_sample_sin_ohlc.to_parquet('SPY_dollar_processed_out_of_sample_sin_ohlc.parquet')