In [2]:
import pandas as pd
import numpy as np

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [3]:
def read_prediction_file(file_path):
    try:
        df = pd.read_csv(file_path)
        # verify that the file at least has "True Values" and "Predictions" value columns
        if not "True Values" in df.columns or not "Predictions" in df.columns:
            print(df.columns)
            raise Exception("Archivo no contiene columnas de valores reales y predicciones.")
            
        # verify that the file has at least one row
        if df.shape[0] < 1:
            raise Exception("Archivo de pronostico no contiene datos.")
        # verify that the file has the same number of rows in both columns
        if df["True Values"].shape[0] != df["Predictions"].shape[0]:
            raise Exception("Número de filas de valores reales y predicciones no coinciden.")
    except Exception as e:
        print("ERROR: no se completó la lectura de archivo de pronósticos. {}".format(e))
        return None
    return df

In [4]:
df = read_prediction_file("./input/strat.csv")
df

Unnamed: 0,Date,LMCADS03,DXY,COMXCOPR,SPX,BCOM,MXWD,CL1,XAU,XAG,LMCADY RETURN,True Values,LMCADY,Predictions,PREDICTED PRICE,ACTUAL FUTURE PRICE,ERROR,ERROR%,xpc_chg,act_chg
0,2/3/2021,7842.5,91.17,71605,3830.17,81.8088,662.97,55.69,1834.04,26.8915,0.008249,0.055912,7851.50,0.036211,8141.023004,8157.75,16.7270,0.002050442,289.52,306.25
1,2/4/2021,7823.0,91.53,70709,3871.74,81.9778,666.76,56.23,1794.03,26.3516,-0.003317,0.057572,7825.50,0.046348,8196.731196,8303.00,106.2688,0.012798844,371.23,477.50
2,2/5/2021,7912.5,91.04,70313,3886.83,82.4711,670.56,56.85,1814.11,26.9205,0.012887,0.052256,7927.00,0.036340,8220.367260,8289.25,68.8827,0.008309888,293.37,362.25
3,2/8/2021,8037.0,90.93,69788,3915.59,83.5421,675.53,57.97,1830.81,27.2725,0.015491,0.045141,8050.75,0.034328,8331.916042,8352.25,20.3340,0.002434549,281.17,301.50
4,2/9/2021,8147.5,90.44,69106,3911.23,83.7736,676.70,58.36,1838.32,27.2674,0.013203,0.032116,8157.75,0.029217,8399.608307,8422.50,22.8917,0.002717921,241.86,264.75
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
823,4/1/2024,8867.0,105.02,29905,5243.77,99.9046,780.83,83.71,2251.44,25.0842,0.000000,0.057085,8766.51,0.041895,9141.588452,9208.50,66.9115,0.007266281,375.08,441.99
824,4/2/2024,8991.0,104.82,29891,5205.81,100.6986,777.04,85.15,2280.67,26.1411,0.013770,0.043476,8888.06,0.035317,9207.571392,0.00,9207.5714,#DIV/0!,319.51,-8888.06
825,4/3/2024,9262.5,104.25,29618,5211.49,101.9094,777.81,85.43,2300.00,27.1822,0.029106,0.009776,9150.56,0.017592,9312.957030,0.00,9312.9570,#DIV/0!,162.40,-9150.56
826,4/4/2024,9359.0,104.12,29684,5147.21,102.2595,773.44,86.59,2290.94,26.9141,0.009950,-0.003486,9242.06,0.002809,9268.056916,0.00,9268.0569,#DIV/0!,26.00,-9242.06


In [5]:
# Dado
R = 0.015400
P0 = 1594.75
# Calcular el valor final del índice
Pf = P0 * np.exp(R)
Pf

rmse = 0.015782

def adjust_prediction(pred, rmse, th = 1.75):
    # print(pred, rmse)
    if abs(rmse*th) > abs(pred):
        return 0
    if pred < 0:
        return pred + rmse
    else:
        return pred - rmse

# Aplicar la función a cada predicción
df['pred-rmse'] = df['Predictions'].apply(lambda pred: adjust_prediction(pred, rmse))
df[['pred-rmse', 'Predictions']]

# df[df['pred-rmse']==0.0].count()

# si el error es mayor a la prediccion, entonces se ajusta a 0, es decir, no se tomara en cuenta
df

# Calcular el valor ajustado
df['pr'] = df['LMCADY'] * np.exp(df['pred-rmse'])



In [15]:
max_wait = 5

np.random.seed(0)

# list with 50 random integers between 0 and 829
random_indices = np.random.randint(0, 829, 50)

for index, row in df.iterrows():
    if(index not in random_indices):
        continue
    if row["pred-rmse"] == 0:
        print("Periodo {}, no se puede pronosticar con certeza.".format(index))
    else:
        print("Periodo {}, se pronostica un retorno logarítmico acumulado de {:.6f} para los siguientes {} dias.".format(index, row["pred-rmse"], max_wait))
        print("El precio actual es                              {:.2f}".format(row["LMCADY"]))
        print("El precio esperado al fin del periodo es         {:.2f}".format(row["pr"]))
        print("El precio real alcanzado al fin del periodo es   {:.2f}".format(row["ACTUAL FUTURE PRICE"]))


Periodo 9, se pronostica un retorno logarítmico acumulado de 0.067341 para los siguientes 5 dias.
El precio actual es                              8424.00
El precio esperado al fin del periodo es         9010.82
El precio real alcanzado al fin del periodo es   9132.50
Periodo 70, no se puede pronosticar con certeza.
Periodo 72, no se puede pronosticar con certeza.
Periodo 87, no se puede pronosticar con certeza.
Periodo 99, no se puede pronosticar con certeza.
Periodo 115, no se puede pronosticar con certeza.
Periodo 147, no se puede pronosticar con certeza.
Periodo 151, no se puede pronosticar con certeza.
Periodo 174, se pronostica un retorno logarítmico acumulado de 0.027637 para los siguientes 5 dias.
El precio actual es                              9177.00
El precio esperado al fin del periodo es         9434.16
El precio real alcanzado al fin del periodo es   9604.75
Periodo 177, se pronostica un retorno logarítmico acumulado de 0.052360 para los siguientes 5 dias.
El precio actu

In [12]:
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error

y_true = np.array([3, -0.5, 2, 7, 4.5, 3.0, 5.2, 1.1, 6.0, 2.8, 3.5, 2.1, 0.3, 4.2, 3.8, 7.4, 5.1, 6.8, 4.0, 2.7])

# Generar valores predichos y_pred con un pequeño error en relación a y_true
np.random.seed(0)  # Para reproducibilidad
errors = np.random.normal(0, 0.5, len(y_true))  # Errores moderados con media 0 y desviación estándar 0.5
y_pred = y_true + errors

# Calcular MAE y MSE
mae = mean_absolute_error(y_true, y_pred)
mse = mean_squared_error(y_true, y_pred)
rmse = np.sqrt(mse)

# Calcular métricas comparables
mad = np.mean(np.abs(y_true - np.mean(y_true)))
variance = np.var(y_true)
stdev = np.std(y_true)

print(f'MAE: {mae}')
print(f'MAD: {mad}')
print()
print(f'MSE: {mse}')
print(f'Variance: {variance}')
print()
print(f'RMSE: {rmse}')
print(f'Standard Deviation: {stdev}')




MAE: 0.39922269362457696
MAD: 1.7

MSE: 0.26173816820491236
Variance: 4.446

RMSE: 0.5116035263804506
Standard Deviation: 2.1085540068966693
