In [51]:
import pandas as pd
import numpy as np
from datetime import datetime
import plotly.express as px
import plotly.graph_objects as go

In [52]:
def read_prediction_file(file_path):
    try:
        df = pd.read_csv(file_path)
        # verify that the file at least has "True Values" and "Predictions" value columns
        if not "True Values" in df.columns or not "Predictions" in df.columns:
            print(df.columns)
            raise Exception("Archivo no contiene columnas de valores reales y predicciones.")
            
        # verify that the file has at least one row
        if df.shape[0] < 1:
            raise Exception("Archivo de pronostico no contiene datos.")
        # verify that the file has the same number of rows in both columns
        if df["True Values"].shape[0] != df["Predictions"].shape[0]:
            raise Exception("Número de filas de valores reales y predicciones no coinciden.")
    except Exception as e:
        print("ERROR: no se completó la lectura de archivo de pronósticos. {}".format(e))
        return None
    return df

In [53]:
df = read_prediction_file("./input/strat.csv")
df

Unnamed: 0,Date,LMCADS03,DXY,COMXCOPR,SPX,BCOM,MXWD,CL1,XAU,XAG,LMCADY RETURN,True Values,LMCADY,Predictions,PREDICTED PRICE,ACTUAL FUTURE PRICE,ERROR,ERROR%,xpc_chg,act_chg
0,2/3/2021,7842.5,91.17,71605,3830.17,81.8088,662.97,55.69,1834.04,26.8915,0.008249,0.055912,7851.50,0.036211,8141.023004,8157.75,16.7270,0.002050442,289.52,306.25
1,2/4/2021,7823.0,91.53,70709,3871.74,81.9778,666.76,56.23,1794.03,26.3516,-0.003317,0.057572,7825.50,0.046348,8196.731196,8303.00,106.2688,0.012798844,371.23,477.50
2,2/5/2021,7912.5,91.04,70313,3886.83,82.4711,670.56,56.85,1814.11,26.9205,0.012887,0.052256,7927.00,0.036340,8220.367260,8289.25,68.8827,0.008309888,293.37,362.25
3,2/8/2021,8037.0,90.93,69788,3915.59,83.5421,675.53,57.97,1830.81,27.2725,0.015491,0.045141,8050.75,0.034328,8331.916042,8352.25,20.3340,0.002434549,281.17,301.50
4,2/9/2021,8147.5,90.44,69106,3911.23,83.7736,676.70,58.36,1838.32,27.2674,0.013203,0.032116,8157.75,0.029217,8399.608307,8422.50,22.8917,0.002717921,241.86,264.75
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
823,4/1/2024,8867.0,105.02,29905,5243.77,99.9046,780.83,83.71,2251.44,25.0842,0.000000,0.057085,8766.51,0.041895,9141.588452,9208.50,66.9115,0.007266281,375.08,441.99
824,4/2/2024,8991.0,104.82,29891,5205.81,100.6986,777.04,85.15,2280.67,26.1411,0.013770,0.043476,8888.06,0.035317,9207.571392,0.00,9207.5714,#DIV/0!,319.51,-8888.06
825,4/3/2024,9262.5,104.25,29618,5211.49,101.9094,777.81,85.43,2300.00,27.1822,0.029106,0.009776,9150.56,0.017592,9312.957030,0.00,9312.9570,#DIV/0!,162.40,-9150.56
826,4/4/2024,9359.0,104.12,29684,5147.21,102.2595,773.44,86.59,2290.94,26.9141,0.009950,-0.003486,9242.06,0.002809,9268.056916,0.00,9268.0569,#DIV/0!,26.00,-9242.06


In [54]:
# Dado
R = 0.015400
P0 = 1594.75
# Calcular el valor final del índice
Pf = P0 * np.exp(R)
Pf

rmse = 0.015782

def adjust_prediction(pred, rmse, th = 2):
    # print(pred, rmse)
    if abs(rmse*th) > abs(pred):
        return 0
    if pred < 0:
        return pred + rmse
    else:
        return pred - rmse

# Aplicar la función a cada predicción
df['pred-rmse'] = df['Predictions'].apply(lambda pred: adjust_prediction(pred, rmse))
df[['pred-rmse', 'Predictions']]

# df[df['pred-rmse']==0.0].count()

# Calcular el valor ajustado
df['pr'] = df['LMCADY'] * np.exp(df['pred-rmse'])



In [55]:
np.random.seed(0)
max_wait = 5

# Lista con 50 enteros aleatorios entre 0 y 829
random_indices = np.random.randint(0, 829, 50)

def prepare_predictions(df, max_wait):
    # Añadir columna que indique si la fila es elegible para predicción
    df['eligible'] = False
    last_true_index = -max_wait  # Comenzar antes para permitir true en el primer índice

    for index in range(len(df)):
        if df.loc[index, 'pred-rmse'] != 0 and index >= last_true_index + max_wait:
            df.loc[index, 'eligible'] = True
            last_true_index = index

    return df

# Ajustar el DataFrame
df = prepare_predictions(df, max_wait)

total_predictions = 0

for index, row in df.iterrows():
    # Si el índice no está marcado como elegible, continuar con el siguiente
    if not row['eligible']:
        continue

    # Imprimir información de predicción
    fecha = datetime.strptime(row['Date'], "%m/%d/%Y")
    print("Periodo {}, se pronostica un retorno logarítmico acumulado de {:.6f} para los siguientes {} días.".format(fecha.strftime("%d/%m/%Y"), row["pred-rmse"], max_wait))
    print("\tEl precio actual es                              {:.2f}".format(row["LMCADY"]))
    print("\tEl precio esperado al fin del periodo es         {:.2f}".format(row["pr"]))
    print("\tEl precio REAL alcanzado al fin del periodo es   {:.2f}".format(row["ACTUAL FUTURE PRICE"]))
    total_predictions += 1

print("Total de predicciones: {}".format(total_predictions))


Periodo 03/02/2021, se pronostica un retorno logarítmico acumulado de 0.020429 para los siguientes 5 días.
	El precio actual es                              7851.50
	El precio esperado al fin del periodo es         8013.55
	El precio REAL alcanzado al fin del periodo es   8157.75
Periodo 15/02/2021, se pronostica un retorno logarítmico acumulado de 0.043830 para los siguientes 5 días.
	El precio actual es                              8422.50
	El precio esperado al fin del periodo es         8799.87
	El precio REAL alcanzado al fin del periodo es   8946.75
Periodo 23/02/2021, se pronostica un retorno logarítmico acumulado de -0.022395 para los siguientes 5 días.
	El precio actual es                              9231.50
	El precio esperado al fin del periodo es         9027.06
	El precio REAL alcanzado al fin del periodo es   9089.00
Periodo 09/03/2021, se pronostica un retorno logarítmico acumulado de 0.023865 para los siguientes 5 días.
	El precio actual es                             

In [60]:
import plotly.express as px
import plotly.graph_objects as go

fig = px.line(df, x=df.index, y='LMCADY', title='Precio de cierre del índice LMCADY')

# Calcular el indicador booleano inicial para la predicción
df['bool_pred'] = df['pred-rmse'] != 0
df['coso'] = df['pr'] * df['bool_pred']

# Inicializar la serie de coso con nans
df['coso_line'] = np.nan

# Definir la longitud de la línea horizontal
line_length = 5
last_true_index = -line_length  # Asegurar que el primer índice puede ser 0

for idx in range(len(df)):
    if df['bool_pred'].iloc[idx] and idx >= last_true_index + line_length:
        # Establecer el valor de coso en los siguientes 5 puntos
        df.loc[idx:idx+line_length-1, 'coso_line'] = df['coso'].iloc[idx]
        last_true_index = idx

# Agregar la línea horizontal de coso al gráfico
fig.add_scatter(x=df.index, y=df['coso_line'], mode='lines', name='Precio esperado', line=dict(color='orange'))

fig.show()


In [61]:
import plotly.express as px
import plotly.graph_objects as go

fig = px.line(df, x=df.index, y='LMCADY', title='Precio de cierre del índice LMCADY')

# Calcular el indicador booleano inicial para la predicción
df['bool_pred'] = df['pred-rmse'] != 0

# Inicializar coso y ajustar la lógica de 5 índices
coso = df['pr'] * df['bool_pred']
last_true_index = -5  # Asumir que el primer índice válido puede ser 0

# Crear listas para almacenar índices y colores basados en la condición
indices = []
colors = []
coso_values = []

for idx in range(len(df)):
    if df['bool_pred'].iloc[idx] and idx >= last_true_index + 5:
        last_true_index = idx
        if coso.iloc[idx] > df['LMCADY'].iloc[idx]:
            colors.append('green')  # Verde si está por encima
        else:
            colors.append('red')    # Rojo si está por debajo
        indices.append(idx)
        coso_values.append(coso.iloc[idx])
    elif df['bool_pred'].iloc[idx]:
        coso.iloc[idx] = 0  # Asegurar que los valores no válidos se establecen en cero

# Agregar los puntos al gráfico
fig.add_trace(go.Scatter(
    x=indices,
    y=coso_values,
    mode='markers',
    marker=dict(color=colors),
    name='Precio esperado'
))

fig.show()


In [None]:
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error

y_true = np.array([3, -0.5, 2, 7, 4.5, 3.0, 5.2, 1.1, 6.0, 2.8, 3.5, 2.1, 0.3, 4.2, 3.8, 7.4, 5.1, 6.8, 4.0, 2.7])

# Generar valores predichos y_pred con un pequeño error en relación a y_true
np.random.seed(0)  # Para reproducibilidad
errors = np.random.normal(0, 0.5, len(y_true))  # Errores moderados con media 0 y desviación estándar 0.5
y_pred = y_true + errors

# Calcular MAE y MSE
mae = mean_absolute_error(y_true, y_pred)
mse = mean_squared_error(y_true, y_pred)
rmse = np.sqrt(mse)

# Calcular métricas comparables
mad = np.mean(np.abs(y_true - np.mean(y_true)))
variance = np.var(y_true)
stdev = np.std(y_true)

print(f'MAE: {mae}')
print(f'MAD: {mad}')
print()
print(f'MSE: {mse}')
print(f'Variance: {variance}')
print()
print(f'RMSE: {rmse}')
print(f'Standard Deviation: {stdev}')




MAE: 0.39922269362457696
MAD: 1.7

MSE: 0.26173816820491236
Variance: 4.446

RMSE: 0.5116035263804506
Standard Deviation: 2.1085540068966693
