In [13]:
# Random forest
import pandas as pd

import talib as ta
import yfinance as yf
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, classification_report

In [14]:
# Descargar los datos de precios del futuro del Nasdaq
df = yf.Ticker('NQ=F')
df = df.history(start='2020-01-01', end='2023-01-01')

In [15]:
# Calcular indicadores técnicos adicionales
df['SMA_10'] = ta.SMA(df['Close'], timeperiod=10) # Media Móvil Simple de 10 dias

# Crear la columna objetivo 'Sube', que indica si el precio sube al dia siguiente
df['Sube'] = (df['Close'].shift(-1) > df['Close']).astype(int)

In [16]:
# Limpiar datos eliminando NaNs
df.dropna(inplace=True)

In [17]:
# Definir las variables independientes (indicadores técnicos y precio de cierre)
X = df[['Close', 'SMA_10']]

y = df['Sube']

In [18]:
# Dividir los datos en conjunto de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=42)

In [19]:
# Definir los hiperparámetros que queremos probar con Grid

param_grid = {'n_estimators' : [100, 200, 300], # Número de árboles
              'max_depth' : [None, 10, 20, 30], # Profundidad máxima de los árboles
              'min_samples_split' : [2, 5, 10], # Mínimo de muestra para divir un nodo
              'min_samples_leaf' : [1, 2, 4],    # Mínimo de muestras en una hoja
              'bootstrap': [True, False]}       # Si utilizar o no el bootstrap

In [20]:
# Crear el modelo Random Forest
rf_model = RandomForestClassifier(random_state=42)

In [21]:
# Crear el objeto GridSearchCV
grid_search = GridSearchCV(estimator=rf_model, param_grid=param_grid,
                           cv=5, n_jobs=-1, verbose=2)

In [22]:
# Ajustar el modelo con los datos de entrenamiento
grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 216 candidates, totalling 1080 fits
[CV] END bootstrap=True, max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   0.3s
[CV] END bootstrap=True, max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   0.4s
[CV] END bootstrap=True, max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   0.3s
[CV] END bootstrap=True, max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   0.4s
[CV] END bootstrap=True, max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   0.4s
[CV] END bootstrap=True, max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=200; total time=   0.6s
[CV] END bootstrap=True, max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=200; total time=   0.6s
[CV] END bootstrap=True, max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=200; tot

In [23]:
# Imprimir los mejores hiperparametros encontrados
print(f'Mejores Hiperparámetros: {grid_search.best_estimator_}')

Mejores Hiperparámetros: RandomForestClassifier(max_depth=20, min_samples_leaf=4, random_state=42)


In [24]:
# Evaluar el modelo con los mejores hiperparámetros
best_rf_model = grid_search.best_estimator_
y_pred = best_rf_model.predict(X_test)

In [25]:
# Evaluar el modelo
accuracy = accuracy_score(y_test, y_pred)
print(f'Precisión del Random Forest despues de ajuste: {accuracy}')
print(classification_report(y_test, y_pred))

Precisión del Random Forest despues de ajuste: 0.4866666666666667
              precision    recall  f1-score   support

           0       0.40      0.35      0.37        66
           1       0.54      0.60      0.56        84

    accuracy                           0.49       150
   macro avg       0.47      0.47      0.47       150
weighted avg       0.48      0.49      0.48       150

