## Instalación librerías

In [None]:
!pip install yfinance

Collecting yfinance
  Downloading https://files.pythonhosted.org/packages/a7/ee/315752b9ef281ba83c62aa7ec2e2074f85223da6e7e74efb4d3e11c0f510/yfinance-0.1.59.tar.gz
Collecting lxml>=4.5.1
[?25l  Downloading https://files.pythonhosted.org/packages/30/c0/d0526314971fc661b083ab135747dc68446a3022686da8c16d25fcf6ef07/lxml-4.6.3-cp37-cp37m-manylinux2014_x86_64.whl (6.3MB)
[K     |████████████████████████████████| 6.3MB 26.2MB/s 
Building wheels for collected packages: yfinance
  Building wheel for yfinance (setup.py) ... [?25l[?25hdone
  Created wheel for yfinance: filename=yfinance-0.1.59-py2.py3-none-any.whl size=23442 sha256=b6e8fab6ec061b8c75b54c71d5ae871aa2a8d0d4f14a9022705446d6d083cd45
  Stored in directory: /root/.cache/pip/wheels/f8/2a/0f/4b5a86e1d52e451757eb6bc17fd899629f0925c777741b6d04
Successfully built yfinance
Installing collected packages: lxml, yfinance
  Found existing installation: lxml 4.2.6
    Uninstalling lxml-4.2.6:
      Successfully uninstalled lxml-4.2.6
Successf

## Detalle de funciones

In [None]:
import yfinance as yf
import numpy as np
import pandas as pd
import xgboost as xgb
from xgboost.sklearn import XGBClassifier
pd.options.mode.chained_assignment = None
from datetime import date
from datetime import timedelta 
import matplotlib.pyplot as plt
import scipy.stats as st
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier 
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import confusion_matrix
import seaborn as sns
from sklearn.metrics import fbeta_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
pd.set_option('display.max_columns', 400)
pd.set_option('display.max_rows', 5000)
pd.set_option('display.width', 1000)

In [None]:
def descarga(ticker, fc_empieza, fc_termina):
  base = yf.download(ticker, start=fc_empieza, end=fc_termina)
  base = base[['Close', 'Volume', 'High', 'Low']]
  base.insert(loc=0, column='Ticker', value=ticker)
  base.reset_index(level=0, inplace=True)
  base.columns=['fc', 'ticker', 'y', 'vl', 'high', 'low']
  return base

In [None]:
def calcula_pc_merval(dataset):
  dataset = pd.merge(dataset,mvl,on='fc',how='left')
  dataset['pc_merval'] = dataset.y/dataset.mvl
  dataset = dataset.drop(['mvl'], axis=1)
  return dataset

In [None]:
def calcula_amplitud(dataset):
  dataset['amplitud'] = (dataset.high - dataset.low)/dataset.y
  return dataset

In [None]:
def estandariza_volumen(dataset):
  mean_vl = dataset['vl'].mean()
  std_vl = dataset['vl'].std()
  dataset['vl'] = (dataset.vl - mean_vl)/std_vl
  return dataset

In [None]:
def calcula_medias(dataset):
  period = 12
  sma = dataset['y'].rolling(period, min_periods=period).mean()
  idx_start = sma.isna().sum() + 1 - period
  idx_end = idx_start + period
  sma = sma[idx_start: idx_end]
  rest = dataset['y'][idx_end:]
  ema = pd.concat([sma, rest]).ewm(span=period, adjust=False).mean()
  dataset['exp1'] = ema
  period = 26
  sma = dataset['y'].rolling(period, min_periods=period).mean()
  idx_start = sma.isna().sum() + 1 - period
  idx_end = idx_start + period
  sma = sma[idx_start: idx_end]
  rest = dataset['y'][idx_end:]
  ema = pd.concat([sma, rest]).ewm(span=period, adjust=False).mean()
  dataset['exp2'] = ema
  macd = dataset['exp1']-dataset['exp2']
  dataset['macd'] = macd
  dataset['exp3'] = macd.ewm(span=9, adjust=False).mean()
  dataset['histog'] = dataset['macd'] - dataset['exp3'] 
  return dataset

In [None]:
def calcula_historia(dataset, lags):
  for (columnName, columnData) in dataset.iloc[:,6:].iteritems():
    i = 1
    while i < lags:
      colname = "var_%s_%s" % (columnName, i)
      dataset[colname] = columnData/columnData.shift(i)-1
      i = i + 1
  return dataset

In [None]:
def calcula_canalidad_y(dataset):
  i = 1
  dataset['lag_y_1'] = dataset.y.shift(1)
  dataset['nu_dias_y_entre_max_min_30'] = np.where((dataset['lag_y_1'] < dataset['high']) & (dataset['lag_y_1'] > dataset['low']), 1, 0)
  dataset['nu_dias_y_entre_5pc_30'] = np.where((dataset['lag_y_1'] < (dataset.y * 1.05)) & (dataset['lag_y_1'] > (dataset.y * 0.95)), 1, 0)

  dataset['nu_dias_y_entre_max_min_90'] = np.where((dataset['lag_y_1'] < dataset['high']) & (dataset['lag_y_1'] > dataset['low']), 1, 0)
  dataset['nu_dias_y_entre_5pc_90'] = np.where((dataset['lag_y_1'] < (dataset.y * 1.05)) & (dataset['lag_y_1'] > (dataset.y * 0.95)), 1, 0)

  dataset['nu_dias_y_entre_max_min_180'] = np.where((dataset['lag_y_1'] < dataset['high']) & (dataset['lag_y_1'] > dataset['low']), 1, 0)
  dataset['nu_dias_y_entre_5pc_180'] = np.where((dataset['lag_y_1'] < (dataset.y * 1.05)) & (dataset['lag_y_1'] > (dataset.y * 0.95)), 1, 0)

  dataset = dataset.drop(['lag_y_1'], axis=1)
  i = 2
  while i < 30:
    colname = "lag_y_%s" % (i)
    dataset[colname] = dataset.y.shift(i)
    dataset['nu_dias_y_entre_max_min_30'] = dataset['nu_dias_y_entre_max_min_30'] + np.where((dataset[colname] < dataset['high']) & (dataset[colname] > dataset['low']), 1, 0)
    dataset['nu_dias_y_entre_5pc_30'] = dataset['nu_dias_y_entre_5pc_30'] + np.where((dataset[colname] < (dataset.y * 1.05)) & (dataset[colname] > (dataset.y * 0.95)), 1, 0)
    i = i + 1
    dataset = dataset.drop([colname], axis=1)

  i = 2
  while i < 90:
    colname = "lag_y_%s" % (i)
    dataset[colname] = dataset.y.shift(i)
    dataset['nu_dias_y_entre_max_min_90'] = dataset['nu_dias_y_entre_max_min_90'] + np.where((dataset[colname] < dataset['high']) & (dataset[colname] > dataset['low']), 1, 0)
    dataset['nu_dias_y_entre_5pc_90'] = dataset['nu_dias_y_entre_5pc_90'] + np.where((dataset[colname] < (dataset.y * 1.05)) & (dataset[colname] > (dataset.y * 0.95)), 1, 0)
    i = i + 1
    dataset = dataset.drop([colname], axis=1)

  i = 2
  while i < 180:
    colname = "lag_y_%s" % (i)
    dataset[colname] = dataset.y.shift(i)
    dataset['nu_dias_y_entre_max_min_180'] = dataset['nu_dias_y_entre_max_min_180'] + np.where((dataset[colname] < dataset['high']) & (dataset[colname] > dataset['low']), 1, 0)
    dataset['nu_dias_y_entre_5pc_180'] = dataset['nu_dias_y_entre_5pc_180'] + np.where((dataset[colname] < (dataset.y * 1.05)) & (dataset[colname] > (dataset.y * 0.95)), 1, 0)
    i = i + 1
    dataset = dataset.drop([colname], axis=1)

  return dataset

In [None]:
def calcula_canalidad_histog_macd(dataset):
  list = [5, 30, 90, 180]
  for ventana in list:
    i = 1
    dataset['lag_histog_1'] = dataset.histog.shift(1)
    colname_nu_1 = "nu_dias_histog_entre_5pc_%s" % (ventana)
    dataset[colname_nu_1] = np.where((dataset['lag_histog_1'] < (dataset.histog * 1.05)) & (dataset['lag_histog_1'] > (dataset.histog * 0.95)), 1, 0)

    colname_nu_2 = "nu_dias_histog_positivo_%s" % (ventana)
    dataset[colname_nu_2] = np.where((dataset['lag_histog_1']>0), 1, 0)

    colname_nu_3 = "nu_dias_histog_negativo_%s" % (ventana)
    dataset[colname_nu_3] = np.where((dataset['lag_histog_1']<0), 1, 0)

    colname_nu_4 = "nu_dias_histog_mismo_signo_%s" % (ventana)
    dataset[colname_nu_4] = np.where(((dataset['lag_histog_1']>0) & (dataset['histog']>0))|((dataset['lag_histog_1']<0) & (dataset['histog']<0)), 1, 0)

    dataset = dataset.drop(['lag_histog_1'], axis=1)
    i = 2
    while i < (ventana+1):
      colname = "lag_histog_%s" % (i)
      dataset[colname] = dataset.histog.shift(i)
      dataset[colname_nu_1] = dataset[colname_nu_1] + np.where((dataset[colname] < (dataset.histog * 1.50)) & (dataset[colname] > (dataset.histog * 0.50)), 1, 0)
      dataset[colname_nu_2] = dataset[colname_nu_2] + np.where((dataset[colname]>0), 1, 0)
      dataset[colname_nu_3] = dataset[colname_nu_3] + np.where((dataset[colname]<0), 1, 0)
      dataset[colname_nu_4] = dataset[colname_nu_4] + np.where(((dataset[colname]>0) & (dataset['histog']>0))|((dataset[colname]<0) & (dataset['histog']<0)), 1, 0)
      i = i + 1
      dataset = dataset.drop([colname], axis=1)
  return dataset

In [None]:
def calcula_AT_tendencias(dataset, lags):
  
  # Construye las columnas para determinar si es un pico
  i = 1
  while i < (lags+1):
      colname = 'p%sb' % (i)                                                  
      dataset[colname] = round(dataset.y.shift(i),2)
      j = i * -1
      colname = 'p%sf' % (-j)                                                  
      dataset[colname] = round(dataset.y.shift(j),2)
      i = i + 1

  # Determina si es un pico  
  dataset['maxb'] = round(dataset.filter(regex=(".*b")).max(axis=1),2)
  dataset['maxf']= round(dataset.filter(regex=(".*f")).max(axis=1),2)
  dataset['minb'] = round(dataset.filter(regex=(".*b")).min(axis=1),2)
  dataset['minf'] = round(dataset.filter(regex=(".*f")).min(axis=1),2)
  dataset['T'] = np.where((dataset['y']>dataset['maxb']) & (dataset['y']>dataset['maxf']), 1, 0)
  dataset['P'] = np.where((dataset['y']<dataset['minb']) & (dataset['y']<dataset['minf']), 1, 0)

  techos = dataset[(dataset['T']==1)]
  techos['m'] = (techos.y.shift(1) - techos.y)/(techos.fc.shift(1) - techos.fc).dt.days
  techos.name = 'techos'
  pisos = dataset[(dataset['P']==1)]
  pisos['m'] = (pisos.y.shift(1) - pisos.y)/(pisos.fc.shift(1) - pisos.fc).dt.days
  pisos.name = 'pisos'
  dataset_list = [techos, pisos]

  for dataset_picos in dataset_list:  # En cada dataset (techos y pisos)
    name = dataset_picos.name
    dias = len(dataset)
    for index, row in dataset_picos.iloc[1:].iterrows(): # Para cada pico detectado (fila del dataset) a partir del segundo (porque el primero no tiene anterior, no tiene tendencia)
      y_start = row['y']
      pendiente = row['m']
      if (dias < np.where(dataset.fc==row['fc'])[0] + lags):
        continue    
      serie = [] # Crea la serie que va a contener el precio proyectado
      serie = np.append(serie, np.repeat(np.nan, (np.where(dataset.fc==row['fc'])[0] + lags))) # Appendea nulos hasta el día en el que confirmamos que nació una tendencia
      i = np.where(dataset.fc==row['fc'])[0] + lags
      while (i < dias):
        dia = i - (np.where(dataset.fc==row['fc'])[0] + lags)
        serie = np.append(serie, (y_start + pendiente*lags) + pendiente*dia)
        i = i + 1 # Appendea el precio proyectado hasta el final

      colname = '%s_%s_proy' % (name, index)  # Precio proyectado
      dataset[colname] = serie # Construye la columna de toda la serie

      # Construyo columna con veces en la que el pico fue superado
      colname_pass = '%s_%s_pass' % (name, index) # Pico pasado
      if name == 'techos':
        dataset[colname_pass] = np.where(dataset['y']>(dataset[colname])*1.005, 1, 0)
      elif name == 'pisos':
        dataset[colname_pass] = np.where(dataset['y']<(dataset[colname])*0.995, 1, 0)
      dataset[colname_pass] = dataset[colname_pass].cumsum()

      # Construyo columna con veces en la que el pico fue probado
      colname_prueba = '%s_%s_prueba' % (name, index)  
      dataset[colname_prueba] = np.where((dataset['y']>dataset[colname]*0.995)&(dataset['y']<dataset[colname]*1.005), 1, 0)
      dataset[colname_prueba] = dataset[colname_prueba].cumsum()

      # Construyo columna con pendiente del pico
      colname_pendiente = '%s_%s_pendiente' % (name, index)  
      dataset[colname_pendiente] = row['m']

      # Creo la combinacion y elimino cada uno
      colname_comb = '%s_%s' % (name, index)
      dataset[colname_comb] = dataset[[colname, colname_pass, colname_prueba, colname_pendiente]].values.tolist()
      del dataset[colname]
      del dataset[colname_pass]
      del dataset[colname_prueba]
      del dataset[colname_pendiente]

  # Creo el objeto por cada techo o piso individual
  names_techos = dataset.filter(regex=("(techos)(.*)")).columns
  names_pisos = dataset.filter(regex=("(pisos)(.*)")).columns

  for index, row in dataset.iterrows():  # Por cada fila del dataset original (por cada precio)

    # Genero las rows vacías con las variables agregadas
    nu_pruebas_techo_vivo_mas_probado = np.nan    
    precio_proyectado_techo_vivo_mas_probado = np.nan
    precio_proyectado_techo_vivo_mas_cercano = np.nan
    precio_proyectado_techo_muerto_mas_cercano = np.nan
    tendencia_techo_vivo_mas_probado = np.nan

    nu_pruebas_piso_vivo_mas_probado = np.nan
    precio_proyectado_piso_vivo_mas_probado = np.nan
    precio_proyectado_piso_vivo_mas_cercano = np.nan
    precio_proyectado_piso_muerto_mas_cercano = np.nan
    tendencia_piso_vivo_mas_probado = np.nan

    # Voy a recorrer cada tendencia proyectada para definir cuáles van, en caso de que corresponda lo asigno a estas variables agregadas

    i = 0
    while i < len(row.index): # Por cada uno de los picos de los que se puede armar tendencia
      if (row.index[i] in names_techos):  # Si es un techo
        if row[i][1]>5: # Si está muerto
          if abs(row['y']-row[i][0]) < abs(row['y']-precio_proyectado_techo_muerto_mas_cercano) or np.isnan(precio_proyectado_techo_muerto_mas_cercano): # Si está muerto y proyecta precio más cercano que el actual
            precio_proyectado_techo_muerto_mas_cercano = row[i][0]
            
        else: # Si está vivo
          if row[i][2] > nu_pruebas_techo_vivo_mas_probado or (np.isnan(nu_pruebas_techo_vivo_mas_probado) and row[i][2]>0): # Si fue más probado que el actual
            nu_pruebas_techo_vivo_mas_probado = row[i][2]
            precio_proyectado_techo_vivo_mas_probado = row[i][0]
            tendencia_techo_vivo_mas_probado = row[i][3]

          if (np.isnan(precio_proyectado_techo_vivo_mas_cercano)) or (abs(row['y']-row[i][0]) < abs(row['y']-precio_proyectado_techo_vivo_mas_cercano)): # Si, sin haber muerto, proyecta un techo más alto que el actual
            precio_proyectado_techo_vivo_mas_cercano = row[i][0]

      elif (row.index[i] in names_pisos):
        if row[i][1]>5: # Si está muerto
          if abs(row['y']-row[i][0]) < abs(row['y']-precio_proyectado_piso_muerto_mas_cercano) or np.isnan(precio_proyectado_piso_muerto_mas_cercano): # Si proyecta precio más cercano que el actual
            precio_proyectado_piso_muerto_mas_cercano = row[i][0]
            
        else: # Si está vivo
          if row[i][2] > nu_pruebas_piso_vivo_mas_probado or (np.isnan(nu_pruebas_piso_vivo_mas_probado) and row[i][2]>0): # Si fue más probado que el actual
            nu_pruebas_piso_vivo_mas_probado = row[i][2]
            precio_proyectado_piso_vivo_mas_probado = row[i][0]
            tendencia_piso_vivo_mas_probado = row[i][3]

          if (np.isnan(precio_proyectado_piso_vivo_mas_cercano)) or (abs(row['y']-row[i][0]) < abs(row['y']-precio_proyectado_piso_vivo_mas_cercano)): # Si, sin haber muerto, proyecta un techo más alto que el actual
            precio_proyectado_piso_vivo_mas_cercano = row[i][0]
      i = i + 1
        
    dataset.loc[index,'nu_pruebas_techo_vivo_mas_probado_'f"{lags}"] = nu_pruebas_techo_vivo_mas_probado
    dataset.loc[index,'precio_proyectado_techo_vivo_mas_probado_'f"{lags}"] = (precio_proyectado_techo_vivo_mas_probado - row['y'])/row['y']
    dataset.loc[index,'precio_proyectado_techo_vivo_mas_cercano_'f"{lags}"] = (precio_proyectado_techo_vivo_mas_cercano - row['y'])/row['y']
    dataset.loc[index,'precio_proyectado_techo_muerto_mas_cercano_'f"{lags}"] = (precio_proyectado_techo_muerto_mas_cercano - row['y'])/row['y']
    dataset.loc[index,'tendencia_techo_vivo_mas_probado_'f"{lags}"] = tendencia_techo_vivo_mas_probado/row['y']

    dataset.loc[index,'nu_pruebas_piso_vivo_mas_probado_'f"{lags}"] = nu_pruebas_piso_vivo_mas_probado
    dataset.loc[index,'precio_proyectado_piso_vivo_mas_probado_'f"{lags}"] = (precio_proyectado_piso_vivo_mas_probado - row['y'])/row['y']
    dataset.loc[index,'precio_proyectado_piso_vivo_mas_cercano_'f"{lags}"] = (precio_proyectado_piso_vivo_mas_cercano - row['y'])/row['y']
    dataset.loc[index,'precio_proyectado_piso_muerto_mas_cercano_'f"{lags}"] = (precio_proyectado_piso_muerto_mas_cercano - row['y'])/row['y']
    dataset.loc[index,'tendencia_piso_vivo_mas_probado_'f"{lags}"] = tendencia_piso_vivo_mas_probado/row['y']

  i = 1
  while i < (lags+1):
      colname = 'p%sb' % (i)                                                  
      dataset = dataset.drop(colname, axis=1)
      j = i * -1
      colname = 'p%sf' % (-j)                                                  
      dataset = dataset.drop(colname, axis=1)
      i = i + 1

  ultimas_drop = ['maxb', 'maxf', 'minb', 'minf', 'T', 'P']
  dataset = dataset.drop(ultimas_drop, axis=1)
  dataset = dataset.drop(names_techos, axis=1)
  dataset = dataset.drop(names_pisos, axis=1)
  return dataset

In [None]:
def calcula_target_class(dataset, SL, TG, dias_indeterminacion):
  dataset['target'] = 99
  i = 1
  while i <= dias_indeterminacion:
    var_y_low = dataset.low.shift(-i)/df.y-1 # Variación del mínimo de cada día contra el precio de compra
    var_y_high = dataset.high.shift(-i)/df.y-1 # Variación del máximo de cada día contra el precio de compra
    target = np.where(var_y_low < -SL, 0, 99)
    target = np.where(var_y_high > TG, 1, target)
    dataset['target'] = np.where(dataset['target'] == 99 , target , dataset['target'])
    i = i + 1
#  df = df.iloc[:-dias_indeterminacion]  # Elimino las últimas filas que no llegan a tener target
  return dataset

In [None]:
def divide_dev_test(dataset, start_train, start_test, end_test):
  global df, x_dev, y_dev, x_test, y_test, x_val, y_val, df_test, df_dev
  month = dataset['fc'].dt.strftime('%Y%m')
  month = pd.to_numeric(month)
  if 'month' not in dataset:
    dataset.insert (1, "month", month)
  dataset = dataset[(dataset.target) < 90] # Elimina indeterminados

  df_dev = dataset[(dataset.month >= start_train) & (dataset.month < start_test)]
  df_test = dataset[(dataset.month >= start_test) & (dataset.month <= end_test)]

## Consolidado

In [None]:
dias_empieza = 2500
dias_termina = 200
today = date.today()
fc_empieza = today + timedelta(days=(dias_empieza*-1))
fc_termina = today + timedelta(days=(dias_termina*-1))

#mvl = yf.download('^MERV', start=fc_empieza, end=fc_termina)
#print("Descargado Merval")
#mvl = mvl[['Close']]
#mvl.reset_index(level=0, inplace=True)
#mvl.columns=['fc','mvl']
base = pd.DataFrame()

for ticker in (
  'BTC-USD',
  'ETH-USD',
  'USDT-USD',
  'XRP-USD',
  'ADA-USD',
  'LTC-USD',
  'BCH-USD',
  'LINK-USD',
  'BNB-USD',
  'XLM-USD',
  'USDC-USD',
  'BSV-USD',
  'XMR-USD',
  'EOS-USD',
  'VET-USD',
  'XTZ-USD',
  'TRX-USD',
  'XEM-USD',
  'THETA-USD',
  'NEO-USD',
  'ATOM1-USD',
  'CCXX-USD',
  'MIOTA-USD',
  'DASH-USD',
  'CEL-USD',
  'DFI-USD',
  'ZEC-USD',
  'AVAX-USD',
  'SOL1-USD',
  'ETC-USD',
  'KSM-USD',
  'ZIL-USD',
  'WAVES-USD',
  'DCR-USD',
  'HBAR-USD',
  'ALGO-USD',
  'LRC-USD',
  'OMG-USD',
  'ONT-USD',
  'CTC1-USD'
):
  df = descarga(ticker, fc_empieza, fc_termina) # (Días empieza, días termina)
  print("Descargado ", ticker)
  # df = calcula_pc_merval(df)
  df = calcula_amplitud(df)
  df = estandariza_volumen(df)
 
  df = calcula_medias(df)
  df = df.dropna()
  df = calcula_historia(df, 5) # (Lags)
  df = calcula_canalidad_y(df)
  df = calcula_canalidad_histog_macd(df)
  
  for per in (360, 120, 90, 60, 30, 15, 8, 4):
    df = calcula_AT_tendencias(df,per)
    print("Calculé AT para", ticker, "en lags de", per)
  
  df = calcula_target_class(df, 0.06, 0.14, 90) # (Stop loss, Take gain, Días para indeterminación)
  base = base.append(df)

[*********************100%***********************]  1 of 1 completed
Descargado  BTC-USD
Calculé AT para BTC-USD en lags de 360
Calculé AT para BTC-USD en lags de 120
Calculé AT para BTC-USD en lags de 90
Calculé AT para BTC-USD en lags de 60
Calculé AT para BTC-USD en lags de 30
Calculé AT para BTC-USD en lags de 15
Calculé AT para BTC-USD en lags de 8
Calculé AT para BTC-USD en lags de 4
[*********************100%***********************]  1 of 1 completed
Descargado  ETH-USD
Calculé AT para ETH-USD en lags de 360
Calculé AT para ETH-USD en lags de 120
Calculé AT para ETH-USD en lags de 90
Calculé AT para ETH-USD en lags de 60
Calculé AT para ETH-USD en lags de 30
Calculé AT para ETH-USD en lags de 15
Calculé AT para ETH-USD en lags de 8
Calculé AT para ETH-USD en lags de 4
[*********************100%***********************]  1 of 1 completed
Descargado  USDT-USD
Calculé AT para USDT-USD en lags de 360
Calculé AT para USDT-USD en lags de 120
Calculé AT para USDT-USD en lags de 90
Calc

In [None]:
dias_empieza = 6500
dias_termina = 200
today = date.today()
fc_empieza = today + timedelta(days=(dias_empieza*-1))
fc_termina = today + timedelta(days=(dias_termina*-1))

df = descarga('^GSPC', fc_empieza, fc_termina) # (Días empieza, días termina)
df = calcula_amplitud(df)
df = estandariza_volumen(df)

df = calcula_medias(df)
del df['exp1']
del df['exp2']
del df['exp3']

df = df.dropna()
df = calcula_historia(df, 3) # (Lags)

df['target'] = (df.y.shift(-10)/df.y) - 1
df = df.dropna()
df

[*********************100%***********************]  1 of 1 completed


In [None]:
from google.colab import files
df.to_csv('df.csv') 
files.download('df.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## Working

1.   **OBJETIVOS**:

*   Imposible: Tener un modelo entrenado una sola vez, capaz de predecir correctamente alzas y bajas en todos los períodos.
*   Posible: Tener un script que sepa cada cuanto entrenarse y por cuánto va a valer prediciendo correctamente alzas y bajas en todos los períodos.
*   Realista: Si no va a predecir alzas y bajas correctamente en todos los períodos, que sea capaz de predecir alzas O bajas correctamente.

2.   **PREGUNTAS**:

*   ¿Cuántos meses para entrenar? ¿Cuántos meses para testear? ¿Es igualmente efectivo en todos los meses?
*   ¿Cómo garantizar en el testeo que no se trate de un éxito por aciertos muy juntos, sea en mismo activo o en diferentes activos pero los mismos días?
*   ¿Cómo medir, al final de todo, la comparación entre usar el modelo y comprar el activo en el plazo?
*   ¿Cómo optimizar los parámetros? ¿Incluir en los parámetros el número de rondas, el SL y el TG?

3.   **TO DO**:

*   Modificar la lógica de los deciles, por una que vaya generando la posición del activo, sumando en cada 1 que haya, y compararla con solo tener el activo..





### XGBoost

In [None]:
month = base['fc'].dt.strftime('%Y%m')
month = pd.to_numeric(month)
if 'month' not in base:
  base.insert (1, "month", month)

base = base[(base.target) < 90] # Elimina indeterminados
base = base.sort_values(by=['fc'])
un_quintos = int(len(base)/5)
dos_quintos = int(len(base)/5)*2
cuatro_quintos = int(len(base)/5)*4
base_params = base.iloc[un_quintos:dos_quintos]
base_train = base.iloc[dos_quintos:cuatro_quintos]
base_test = base.iloc[cuatro_quintos:(len(base)-180)]

# Parametrizo

X=base_params.iloc[:, 7:].drop("target", 1)
targets=base_params.target

pred_train, pred_test, tar_train, tar_test = train_test_split(X, targets, test_size=.4)
eval_set = [(pred_train, tar_train),(pred_test, tar_test)]

bst=XGBClassifier(objective= 'binary:logistic',seed=5)
params = {  
    "n_estimators": st.randint(15, 100),
    "max_depth": st.randint(3, 4),
    "learning_rate": st.uniform(0.05, 0.3),
    "colsample_bytree": st.beta(10, 1) ,
    "subsample": st.beta(10, 1)  ,
    "gamma": st.uniform(0, 10),
    'reg_alpha': st.expon(0, 50),
    "min_child_weight": st.expon(0, 50)
                         }
gs = RandomizedSearchCV(bst, params,  n_iter=30, n_jobs=1,cv=4 )  
bst=gs.fit(pred_train, tar_train,eval_set=eval_set, eval_metric=["auc"],early_stopping_rounds=10) 

# Entreno

X=base_train.iloc[:, 7:].drop("target", 1)
targets=base_train.target

pred_train, pred_test, tar_train, tar_test = train_test_split(X, targets, test_size=.4)
eval_set = [(pred_train, tar_train),(pred_test, tar_test)]

classif = XGBClassifier(**bst.best_params_)
model=classif.fit(pred_train,tar_train,eval_set=eval_set, eval_metric=["auc"],early_stopping_rounds=10)
results = model.evals_result()

# Análisis

epochs = len(results['validation_0']['auc'])
x_axis = range(0, epochs)
# plot log loss
fig, ax = plt.subplots(figsize=(15, 8))
ax.plot(x_axis, results['validation_0']['auc'], label='Train')
ax.plot(x_axis, results['validation_1']['auc'], label='Test')
ax.legend()
plt.ylabel('auc')
plt.title('XGBoost auc')
plt.show()

importancia=model.feature_importances_ 
importancia=pd.DataFrame(importancia)
importancia=importancia.transpose()
importancia.columns=X.columns
b=importancia
c=b.transpose()
r=pd.DataFrame(c).loc[(c[0]>0),].sort_values([0],ascending=False)
r

In [None]:
# Test

X=base_test.iloc[:, 7:].drop("target", 1)
targets=base_test.target

preds = model.predict_proba(X)
A = preds[:,1]
data_ks=pd.DataFrame(targets)
data_ks['prob_suba'] = A
data_ks['bucket'] = pd.qcut(data_ks['prob_suba'].values, 50, duplicates='drop')
data_ks['no_target'] = 1 - data_ks['target']
grouped = data_ks.groupby('bucket', as_index = False)

agg1 = grouped.min().prob_suba
agg1 = pd.DataFrame({'min_prob_suba': grouped.min().prob_suba})
agg1['max_prob_suba'] = grouped.max().prob_suba
agg1['targets'] = grouped.sum().target
agg1['no_targets'] = grouped.sum().no_target
agg1['total'] = agg1.targets + agg1.no_targets
agg1['ganancia_esperada'] = (agg1.targets/agg1.total)*(0.14-0.015) - (agg1.no_targets/agg1.total)*(0.06-0.015)

agg1


In [None]:
id_test = base_test.iloc[:, 0:4]
id_test
id_test['pred'] = preds[:,1]
id_test['efec'] = base_test.target
id_test['result'] = np.where(((id_test['pred'] > 0.5) & (id_test['efec'] == 1)), 0.14 - 0.015, 
                    np.where(((id_test['pred'] > 0.5) & (id_test['efec'] == 0)), -0.06 - 0.015, 
                   0))

resumen = []

for activo in id_test.ticker.unique():
  un_activo = id_test[(id_test.ticker == activo)]
  for mes in un_activo.month.unique():
    un_mes = un_activo[(un_activo.month == mes)]
    ganancia_mensual = (un_mes.y[un_mes.index[-1]]/un_mes.y[un_mes.index[0]])-1
    ganancia_modelo = un_mes.result.sum()
    resumen.append([activo, mes, ganancia_modelo, ganancia_mensual])

ganancias = pd.DataFrame(resumen, columns=['activo', 'mes', 'ganancia_modelo', 'ganancia_mensual'])
groups = ganancias.groupby("activo")

for name, group in groups:
    plt.plot(group["ganancia_mensual"], group["ganancia_modelo"], marker="o", linestyle="", label=name)

orig = np.linspace(-0.1,0.1,3)
plt.plot(orig,orig,'k-') # identity line

In [None]:
for ticker in id_test.ticker.unique():
  toplot = id_test[(id_test.ticker == ticker)]

  ds = toplot[['fc']].to_numpy()
  y = toplot[['y']].to_numpy()

  ds_list = tuple(ds.reshape(1, -1)[0])
  efec_lis = toplot['efec'].tolist()
  col = []
  for vela in efec_lis:
      if vela < 0.50:
          col.append('red')
      elif vela >= 0.50:
          col.append('green')

  pred_list = toplot['pred'].tolist()
  y_pos = np.arange(len(ds_list))

  plt.figure(figsize=(12,8))
  plt.subplot(211)
  plt.title(ticker) 
  plt.bar(y_pos, pred_list, color=col)
  plt.subplot(212)
  plt.plot(ds, y)
  plt.show()

### Stops

In [None]:
# por lo que vi hasta ahora, 0.06 y 0.14 son buenos candidatos

descarga("YPFD.BA", 4000, 2) # (Días empieza, días termina)
dias_cierre = 30
resumen_stop = []
x = range(4, 20, 1)
y = range(4, 20, 1)
for m in y:
  TG = (m*0.01)
  for n in x:
    SL = (n*-0.01)
    i = 1
    while i <= dias_cierre:
      var_y_cierre = df.y.shift(-dias_cierre)/df.y-1
      var_y_low = df.low.shift(-i)/df.y-1 # Variación del mínimo de cada día contra el precio de compra
      var_y_high = df.high.shift(-i)/df.y-1 # Variación del máximo de cada día contra el precio de compra
      target = np.where(var_y_low < SL, SL-0.015, 99)
      target = np.where(var_y_high > TG, TG-0.015, target)
      df['target'] = target
      df['target'] = np.where(df['target'] == 99 , var_y_cierre , df['target'])
      i = i + 1
    
    results = df.target.dropna()
    positivos = len(df.loc[df.target > 0])
    negativos = len(df.loc[df.target < 0])
    avg = df.target.mean()
    resumen_stop.append([SL, TG, avg, positivos, negativos])
    
resumen_stop = pd.DataFrame(resumen_stop, columns=['SL', 'TG', 'avg', 'pos', 'neg'])
resumen_stop.sort_values(by=['avg'], ascending=False)

In [None]:
descarga("YPFD.BA", 300, 2)
SL = -0.1
TG = 0.1
i = 1
dias_cierre = 5
df['target'] = 99
while i <= dias_cierre:
  var_y_cierre = df.y.shift(-dias_cierre)/df.y-1
  var_y_low = df.low.shift(-i)/df.y-1 # Variación del mínimo de cada día contra el precio de compra
  var_y_high = df.high.shift(-i)/df.y-1 # Variación del máximo de cada día contra el precio de compra
  target = np.where(var_y_low < SL, SL-0.015, 99)
  target = np.where(var_y_high > TG, TG-0.015, target)
  df['target'] = target
  df['target'] = np.where(df['target'] == 99 , var_y_cierre , df['target'])
  i = i + 1
df = df[(df.target>0)|(df.target<0)]
p5 = np.percentile(df.target, 5)
p5

In [None]:
def multi_analisis_rf(dataset, semillas):
  global df, x_dev, y_dev, x_test, y_test, x_val, y_val, y_pred_prob, y_pred
  x_dev, x_test, y_dev, y_test = train_test_split(X_dev, y_dev, test_size=0.25, random_state=0)

  n_seeds = semillas
  accs = []
  precisions = []
  recalls = []
  f1s = []
  f1s_two = []

  for seed in range(n_seeds):
    x_train, x_val, y_train, y_val = train_test_split(x_dev, y_dev, test_size=0.2, random_state=seed, stratify=y_dev) 

    sc = StandardScaler()
    x_train = sc.fit_transform(x_train)
    x_val = sc.transform(x_val)

    classifier = RandomForestClassifier()
    classifier = classifier.fit(x_train, y_train)
    y_pred = classifier.predict(x_val)
    y_pred_prob = classifier.predict(x_val)

    # Medimos (accuracy, precision, recall y f-scores) de nuestra predicción
    accs.append(accuracy_score(y_val,y_pred_val))
    precisions.append(precision_score(y_val,y_pred_val))
    recalls.append(recall_score(y_val,y_pred_val))
    f1s.append(f1_score(y_val,y_pred_val))
    f1s_two.append(fbeta_score(y_val, y_pred_val, beta=2))

  metric_labels = ['Accuracy']*len(accs) + ['Precision']*len(precisions) + ['Recall']*len(recalls) + ['F1 Score']*len(f1s) + ['F2 Score']*len(f1s_two)
  all_metrics = accs + precisions + recalls + f1s + f1s_two

  wanted_metric_labels = ['Precision']*len(precisions) + ['Recall']*len(recalls) + ['F2 Score']*len(f1s_two)
  wanted_metrics = precisions + recalls + f1s_two

  sns.set_context('talk')
  plt.figure(figsize=(15,8))
  return sns.boxplot(metric_labels,all_metrics)

### Tendencias

In [None]:
# GRAFICAR (relación: dias_empieza=25*lags)
dias_empieza = 1200
dias_termina = 2
today = date.today()
fc_empieza = today + timedelta(days=(dias_empieza*-1))
fc_termina = today + timedelta(days=(dias_termina*-1))

descarga("YPFD.BA", fc_empieza, fc_termina) # (Días empieza, días termina)

lags = 48

i = 1
while i < (lags+1):
    colname = 'p%sb' % (i)                                                  
    df[colname] = round(df.y.shift(i),2)
    j = i * -1
    colname = 'p%sf' % (-j)                                                  
    df[colname] = round(df.y.shift(j),2)
    i = i + 1

df['maxb'] = round(df.filter(regex=(".*b")).max(axis=1),2)
df['maxf']= round(df.filter(regex=(".*f")).max(axis=1),2)
df['minb'] = round(df.filter(regex=(".*b")).min(axis=1),2)
df['minf'] = round(df.filter(regex=(".*f")).min(axis=1),2)
df['techo'] = np.where((df['y']>df['maxb']) & (df['y']>df['maxf']), 1, 0)
df['piso'] = np.where((df['y']<df['minb']) & (df['y']<df['minf']), 1, 0)
techos = df[(df['techo']==1)]
techos['m'] = (techos.y.shift(1) - techos.y)/(techos.fc.shift(1) - techos.fc).dt.days

pisos = df[(df['piso']==1)]
pisos['m'] = (pisos.y.shift(1) - pisos.y)/(pisos.fc.shift(1) - pisos.fc).dt.days

df_list = [techos, pisos]

for df_picos in df_list:  # En cada df (techos y pisos)
  name = [x for x in globals() if globals()[x] is df_picos][0]
  dias = len(df)
  for index, row in df_picos.iloc[1:].iterrows(): # Para cada pico detectado
    y_start = row['y']
    pendiente = row['m']
    if (dias < np.where(df.fc==row['fc'])[0] + lags):
      continue    
    serie = []
    serie = np.append(serie, np.repeat(np.nan, (np.where(df.fc==row['fc'])[0] + lags)))
    i = np.where(df.fc==row['fc'])[0] + lags
    while (i < dias):
      dia = i - (np.where(df.fc==row['fc'])[0] + lags)
      serie = np.append(serie, (y_start + pendiente*lags) + pendiente*dia)
      i = i + 1

    colname = '%s_%s_proy' % (name, index)  # Precio proyectado
    df[colname] = serie # Construye la columna de toda la serie

    colname_pass = '%s_%s_pass' % (name, index) # Pico pasado
    if name == 'techos':
      df[colname_pass] = np.where(df['y']>(df[colname])*1.005, 1, 0)
    else:
      df[colname_pass] = np.where(df['y']<(df[colname])*0.995, 1, 0)
    df[colname_pass] = df[colname_pass].cumsum()

    colname_prueba = '%s_%s_prueba' % (name, index)  
    df[colname_prueba] = np.where((df['y']>df[colname]*0.995)&(df['y']<df[colname]*1.005), 1, 0)
    df[colname_prueba] = df[colname_prueba].cumsum()
'''
    if ((df[colname_pass].sum() > 1)):
      del df[colname]
      del df[colname_pass]
      del df[colname_prueba]
'''
toplot = df.filter(regex=("fc|y|techos(.*)proy|pisos(.*)proy"))
toplot.plot(x='fc', figsize=(10, 5))

## Otros

In [None]:
from sklearn.model_selection import StratifiedKFold, KFold, GridSearchCV, RandomizedSearchCV

best_tree = None
best_score = None

kfoldcv = StratifiedKFold(n_splits=50)

for train_index, test_index in kfoldcv.split(x_dev, y_dev):
  x_train, x_val = x_dev[train_index], x_dev[test_index]
  y_train, y_val = y_dev[train_index], y_dev[test_index]

  # Tree Classifier
  kfold_tree = DecisionTreeClassifier(max_depth=len(df.columns)-5,random_state=42)

  # Entrenamos al arbol
  kfold_tree.fit(x_train,y_train)

  # Ejecutamos nuestra predicción
  y_pred_val = kfold_tree.predict(x_val)

  f2score = fbeta_score(y_val, y_pred_val, beta=2)

  if best_score is None or best_score < f2score:
    best_score = f2score
    best_tree = kfold_tree

features =  df.iloc[:, 5:-2].columns

feat_imps = best_tree.feature_importances_
for feat_imp,feat in sorted(zip(feat_imps,features)):
  if feat_imp>0:
    print('{}: {}'.format(feat,feat_imp))

In [None]:
df.hist(column='target')
df.target.value_counts()

In [None]:
import pickle
pickle.dump(model, open("v1/model.dat", "wb"))


In [None]:
prueba = pickle.load(open("v1/model.dat", "rb"))

In [None]:
# Redefino target
dias = 30
pc = 0.25

base = base.sort_values(["ticker", "fc"], ascending = (False, True))
base['target_alza'] = np.where(base.y.shift(-dias) > base.y * (1+pc), 1, 0)
base['target_alza'] = np.where(base.ticker != base.ticker.shift(-dias), np.nan, base.target_alza)
base['target_baja'] = np.where(base.y.shift(-dias) < base.y * (1-pc), 1, 0)
base['target_baja'] = np.where(base.ticker != base.ticker.shift(-dias), np.nan, base.target_baja)
base = base.sort_values(["fc"], ascending = (True)) # KS 11
base = base[(base.target) < 90] # Elimina indeterminados
base = base[(base.target_alza) >= 0] # Elimina indeterminados
base = base[(base.target_baja) >= 0] # Elimina indeterminados

base = base.sort_values(by=['fc'])
un_quintos = int(len(base)/5)
dos_quintos = int(len(base)/5)*2
cuatro_quintos = int(len(base)/5)*4
base_params = base.iloc[un_quintos:dos_quintos]
base_train = base.iloc[dos_quintos:cuatro_quintos]
base_test = base.iloc[cuatro_quintos:(len(base)-180)]

# Target si subió 10% en 30 días

# Parametrizo
X=base_params.iloc[:, 7:].drop(["target_alza", "target_baja", "target"], 1)
targets=base_params.target_alza
pred_train, pred_test, tar_train, tar_test = train_test_split(X, targets, test_size=.4)
eval_set = [(pred_train, tar_train),(pred_test, tar_test)]
bst=XGBClassifier(objective= 'binary:logistic',seed=5)
params = {  
    "n_estimators": st.randint(15, 100),
    "max_depth": st.randint(3, 4),
    "learning_rate": st.uniform(0.05, 0.3),
    "colsample_bytree": st.beta(10, 1) ,
    "subsample": st.beta(10, 1)  ,
    "gamma": st.uniform(0, 10),
    'reg_alpha': st.expon(0, 50),
    "min_child_weight": st.expon(0, 50)
                         }
gs = RandomizedSearchCV(bst, params,  n_iter=20, n_jobs=1,cv=4 )  
bst=gs.fit(pred_train, tar_train,eval_set=eval_set, eval_metric=["auc"],early_stopping_rounds=10) 

# Entreno
X=base_train.iloc[:, 7:].drop(["target_alza", "target_baja", "target"], 1)
targets=base_train.target_alza
pred_train, pred_test, tar_train, tar_test = train_test_split(X, targets, test_size=.4)
eval_set = [(pred_train, tar_train),(pred_test, tar_test)]
classif = XGBClassifier(**bst.best_params_)
model_alza=classif.fit(pred_train,tar_train,eval_set=eval_set, eval_metric=["auc"],early_stopping_rounds=10)

# Target si bajó 10% en 30 días

# Parametrizo
X=base_params.iloc[:, 7:].drop(["target_alza", "target_baja", "target"], 1)
targets=base_params.target_baja
pred_train, pred_test, tar_train, tar_test = train_test_split(X, targets, test_size=.4)
eval_set = [(pred_train, tar_train),(pred_test, tar_test)]
bst=XGBClassifier(objective= 'binary:logistic',seed=5)
params = {  
    "n_estimators": st.randint(15, 100),
    "max_depth": st.randint(3, 4),
    "learning_rate": st.uniform(0.05, 0.3),
    "colsample_bytree": st.beta(10, 1) ,
    "subsample": st.beta(10, 1)  ,
    "gamma": st.uniform(0, 10),
    'reg_alpha': st.expon(0, 50),
    "min_child_weight": st.expon(0, 50)
                         }
gs = RandomizedSearchCV(bst, params,  n_iter=20, n_jobs=1,cv=4 )  
bst=gs.fit(pred_train, tar_train,eval_set=eval_set, eval_metric=["auc"],early_stopping_rounds=10) 

# Entreno
X=base_train.iloc[:, 7:].drop(["target_alza", "target_baja", "target"], 1)
targets=base_train.target_baja
pred_train, pred_test, tar_train, tar_test = train_test_split(X, targets, test_size=.4)
eval_set = [(pred_train, tar_train),(pred_test, tar_test)]
classif = XGBClassifier(**bst.best_params_)
model_baja=classif.fit(pred_train,tar_train,eval_set=eval_set, eval_metric=["auc"],early_stopping_rounds=10)

[0]	validation_0-auc:0.700539	validation_1-auc:0.685149
Multiple eval metrics have been passed: 'validation_1-auc' will be used for early stopping.

Will train until validation_1-auc hasn't improved in 10 rounds.
[1]	validation_0-auc:0.710164	validation_1-auc:0.687943
[2]	validation_0-auc:0.745365	validation_1-auc:0.725016
[3]	validation_0-auc:0.76796	validation_1-auc:0.746783
[4]	validation_0-auc:0.766441	validation_1-auc:0.743944
[5]	validation_0-auc:0.764863	validation_1-auc:0.739811
[6]	validation_0-auc:0.767619	validation_1-auc:0.740672
[7]	validation_0-auc:0.767949	validation_1-auc:0.740482
[8]	validation_0-auc:0.767949	validation_1-auc:0.740482
[9]	validation_0-auc:0.776306	validation_1-auc:0.742071
[10]	validation_0-auc:0.788797	validation_1-auc:0.752366
[11]	validation_0-auc:0.793592	validation_1-auc:0.759607
[12]	validation_0-auc:0.793592	validation_1-auc:0.759607
[13]	validation_0-auc:0.806697	validation_1-auc:0.770089
[14]	validation_0-auc:0.806697	validation_1-auc:0.770089

In [None]:
# Test
X=base_test.iloc[:, 7:].drop(["target_alza", "target_baja", "target"], 1)
preds_alza = model_alza.predict_proba(X)
preds_baja = model_baja.predict_proba(X)
oos = base_test.iloc[:, 0:4]
oos['pred_alza'] = preds_alza[:,1]
oos['pred_baja'] = preds_baja[:,1]
oos = oos.sort_values(["ticker", "fc"], ascending = (False, True))
oos['result'] = (oos.y.shift(-dias)/oos.y) - 1
oos['result'] = np.where(oos.ticker != oos.ticker.shift(-dias), np.nan, oos.result)
oos['result_pn'] = np.where(oos.result > 0, "Alza", "Baja")
oos['result_pn'] = np.where(oos.ticker != oos.ticker.shift(-dias), np.nan, oos.result_pn)
oos = oos.sort_values(["fc"], ascending = (True))

In [None]:
oos['bucket_alza'] = pd.qcut(oos['pred_alza'].values, 10, duplicates='drop')
oos['bucket_baja'] = pd.qcut(oos['pred_baja'].values, 10, duplicates='drop')

#print("Cantidad:")
#oos.groupby(['bucket_alza', 'bucket_baja']).agg({'result': ['count']}).unstack()
print("Promedios:")
oos.groupby(['bucket_alza', 'bucket_baja']).agg({'result': ['median']}).unstack()
#print("Máximos:")
#oos.groupby(['bucket_alza', 'bucket_baja']).max().unstack()
#print("Mínimos:")
#oos.groupby(['bucket_alza', 'bucket_baja']).min().unstack()

Promedios:


Unnamed: 0_level_0,result,result,result,result,result,result,result,result,result,result
Unnamed: 0_level_1,median,median,median,median,median,median,median,median,median,median
bucket_baja,"(0.0030299999999999997, 0.0151]","(0.0151, 0.021]","(0.021, 0.0283]","(0.0283, 0.0362]","(0.0362, 0.0532]","(0.0532, 0.0785]","(0.0785, 0.105]","(0.105, 0.138]","(0.138, 0.202]","(0.202, 0.71]"
bucket_alza,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3
"(0.0154, 0.0781]",-0.002106,-0.027406,-0.018938,-0.15442,-0.17725,-0.146592,-0.175,-0.104145,0.080817,-0.195892
"(0.0781, 0.12]",-0.016088,0.002853,-0.031574,0.018257,-0.007788,0.183447,-0.031104,-0.027381,0.066852,-0.149711
"(0.12, 0.148]",0.006337,-0.00263,-0.007178,-0.001817,0.059093,0.138627,0.051949,0.082096,-0.040082,-0.031418
"(0.148, 0.179]",0.02645,0.057533,0.135254,0.204751,0.159389,0.015388,0.078327,0.084318,-0.043411,-0.150497
"(0.179, 0.216]",0.22566,0.059163,0.225894,0.310949,0.05712,0.016515,0.05867,-0.006889,0.015373,-0.088639
"(0.216, 0.256]",0.285029,-0.073037,0.257622,0.104363,0.056224,0.039088,-0.126528,-0.099356,0.209333,-0.106327
"(0.256, 0.326]",0.279587,0.123875,0.092372,-0.178011,-0.106924,-0.012963,0.086008,-0.149106,-0.061456,-0.106596
"(0.326, 0.425]",0.331968,0.342992,-0.053239,0.060294,0.017042,0.014622,-0.151387,-0.319634,0.070003,-0.017299
"(0.425, 0.577]",0.342325,0.289071,0.250798,0.095568,0.295402,-0.239239,-0.165391,-0.222001,-0.134677,-0.291532
"(0.577, 0.906]",-0.089963,0.127851,0.073093,-0.254244,0.086645,-0.264306,-0.369829,0.026383,-0.47672,-0.303346
