In [1]:
import os
import glob
import pandas as pd

In [5]:
file_folder = r"/mnt/c/Users/Administrador/AppData/Roaming/MetaQuotes/Terminal/Common/Files/"
# Leer los archivos CSV
file_pattern = os.path.join(file_folder, 'training_dataset_*.csv')
df_file_path = glob.glob(file_pattern)
df = pd.read_csv(df_file_path[0])
df = df.rename(columns={'target': 'profit'})
df_buy = pd.read_csv(os.path.join(file_folder, 'buy_training_dataset.csv'))
df_sell = pd.read_csv(os.path.join(file_folder, 'sell_training_dataset.csv'))
# Seleccionar las columnas para la unión
df_key_columns = df.columns[:-2]
# Realizar la unión de conjuntos
merged_df = df.merge(df_buy, on=df_key_columns.tolist(), how='outer')
merged_df = merged_df.merge(df_sell, on=df_key_columns.tolist(), how='outer')
merged_df.sample(frac=1).reset_index(drop=True).head(10)

Unnamed: 0,f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,...,f206,f207,f208,f209,f210,f211,type,profit,target_x,target_y
0,-0.229222,-0.716816,2.639495,0.389555,1.0228,0.012361,0.011462,-0.505163,1.394765,-0.098319,...,20.752655,0.000845,-0.160874,15.357991,7.0,1.0,-1.0,-2.5,,0.0
1,0.71517,0.253498,6.843569,-0.24393,0.383555,0.069937,0.507837,0.204452,3.126978,0.142434,...,25.273376,0.430911,0.075332,20.660525,21.0,1.0,-1.0,23.6,,1.0
2,1.340641,1.794771,3.101519,0.777882,0.592874,0.033398,1.183137,1.614689,0.678872,0.442769,...,11.931607,-0.038083,-0.622013,9.385552,14.0,4.0,-1.0,4.9,,
3,0.067444,0.018684,3.259784,0.124494,-0.109022,0.035305,0.234956,-0.806622,1.533229,1.251674,...,18.221842,0.48572,-0.392502,17.125959,19.0,1.0,1.0,5.1,1.0,
4,0.596353,0.28317,11.288288,0.53932,-0.455662,0.162908,1.42816,2.287298,3.485994,0.735704,...,16.018207,-0.117832,-0.677927,13.666846,17.0,2.0,-1.0,17.5,,1.0
5,-0.076826,-1.590011,11.918304,-0.850932,1.087858,0.036705,0.071253,-0.476449,7.261816,-1.652164,...,25.487235,-0.423433,-0.097447,24.172909,6.0,3.0,-1.0,26.5,,1.0
6,-0.396569,-0.662891,7.386234,0.576533,-0.384117,0.044185,-0.453918,-0.483395,4.126388,0.173018,...,14.678997,-0.097006,0.087517,10.797127,20.0,1.0,1.0,1.3,,
7,0.611469,0.18381,4.483177,0.810253,0.27842,0.033004,1.095388,1.10893,1.977439,-1.06714,...,19.062175,-0.336267,-0.31326,14.701112,6.0,3.0,-1.0,11.2,,1.0
8,0.041794,-0.513998,7.746495,-0.186135,-0.650183,0.060794,0.669992,0.317564,3.041877,-0.46383,...,18.704942,0.963843,0.492635,11.699487,22.0,1.0,-1.0,14.1,,1.0
9,-0.148189,-0.332966,6.134623,0.041089,-0.903942,0.054768,-1.156916,1.047938,2.548965,0.293508,...,22.916282,-0.268328,-0.174053,19.692494,8.0,4.0,1.0,-15.3,0.0,


In [7]:
def create_training_dataset(df, trade_type):
    df = df.drop_duplicates()
    # Filtrar las operaciones del tipo especificado y con profit != 0
    df_trade = df[(df['type'] == trade_type) & (df['profit'] != 0)].copy()
    # Separar en ganadoras y perdedoras
    df_winning = df_trade[df_trade['profit'] > 0]
    df_losing = df_trade[df_trade['profit'] < 0]
    n_winning = len(df_winning)
    n_losing = len(df_losing)
    print(f"Tipo de operación: {'Buy' if trade_type == 1 else 'Sell'}")
    print(f"Total Ganadoras: {n_winning}")
    print(f"Total Perdedoras: {n_losing}")
    # Verificar que hay suficientes datos
    if n_winning == 0 or n_losing == 0:
        print(f"No hay suficientes datos para {'compras' if trade_type == 1 else 'ventas'} para entrenar el modelo.")
        return False
    # Equilibrar las clases
    if n_winning <= n_losing:
        n_samples_per_class = n_winning
        # Seleccionar todas las ganadoras
        selected_winning = df_winning.copy()
        # Ordenar las perdedoras por pérdida de mayor a menor (menor profit a mayor)
        df_losing_sorted = df_losing.sort_values(by='profit', ascending=True)
        # Seleccionar las perdedoras con mayor pérdida
        selected_losing = df_losing_sorted.head(n_samples_per_class)
    else:
        n_samples_per_class = n_losing
        # Seleccionar todas las perdedoras
        selected_losing = df_losing.copy()
        # Ordenar las ganadoras por profit de mayor a menor
        df_winning_sorted = df_winning.sort_values(by='profit', ascending=False)
        # Seleccionar las ganadoras con mayor profit
        selected_winning = df_winning_sorted.head(n_samples_per_class)
    print(f"Se seleccionarán {n_samples_per_class} muestras por clase.")
    # Combinar las muestras seleccionadas
    df_training = pd.concat([selected_winning, selected_losing], ignore_index=True)
    # Añadir la columna 'Target' basada en el profit
    df_training['target'] = df_training['profit'].apply(lambda x: 1 if x > 0 else 0)
    # Seleccionar las columnas necesarias (todas menos las dos últimas para el conjunto principal,
    # y todas las columnas de los subconjuntos excepto la última)
    # Suponiendo que las dos últimas columnas en el conjunto principal son 'type' y 'profit'
    feature_columns = df.columns[:-2]
    df_training = df_training[feature_columns.tolist() + ['target']]
    # Mezclar los datos
    df_training = df_training.sample(frac=1).reset_index(drop=True)
    # retunr df
    return df_training
# Crear conjunto de datos de entrenamiento para Buy (trade_type = 1)
df_buy_ = create_training_dataset(df, trade_type=1)
# Crear conjunto de datos de entrenamiento para Sell (trade_type = -1)
df_sell_ = create_training_dataset(df, trade_type=-1)

Tipo de operación: Buy
Total Ganadoras: 3037
Total Perdedoras: 2505
Se seleccionarán 2505 muestras por clase.
Tipo de operación: Sell
Total Ganadoras: 2789
Total Perdedoras: 2180
Se seleccionarán 2180 muestras por clase.
