In [None]:
import numpy
import pandas
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split

In [None]:
opt_degree = 5
freq = '3W'
range_lim = 400

In [None]:
data = pandas.read_csv('../assets/data_filtered.csv', parse_dates=['fecha'], date_format='%Y-%m-%d')
data = data.groupby(pandas.Grouper(key='fecha', freq=freq, sort=True)).sum().reset_index()
data['dia'] = (data['fecha'] - data['fecha'].min()).dt.days + 1

fecha = data[['dia']]
total = data[['TOTAL']]

# Convertir las listas a arreglos de NumPy
X = numpy.array(fecha).reshape(-1, 1)
y = numpy.array(total)

# data.tail()
# y

In [None]:
# # Dividir los datos en conjuntos de entrenamiento y validación
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Lista para almacenar los errores de validación
val_errors = []

# Probar diferentes grados del polinomio
for degree in range(1, 20):
    poly = PolynomialFeatures(degree=degree)
    X_train_poly = poly.fit_transform(X_train)
    X_val_poly = poly.transform(X_val)

    lin_reg = LinearRegression()
    lin_reg.fit(X_train_poly, y_train)

    y_val_pred = lin_reg.predict(X_val_poly)
    val_error = mean_squared_error(y_val, y_val_pred)
    val_errors.append(val_error)

    # print(f"Grado {degree}: Error de validación = {val_error:.2f}")

# Seleccionar el grado con el menor error de validación
optimal_degree = val_errors.index(min(val_errors)) + 1
print(f"\nEl grado óptimo del polinomio es: {optimal_degree}")

In [None]:
poly = PolynomialFeatures(degree=opt_degree)  # Grado 2 para un polinomio cuadrático
X_poly = poly.fit_transform(X)

In [None]:
lin_reg = LinearRegression()
lin_reg.fit(X_poly, y)
y_pred = lin_reg.predict(X_poly)

In [None]:
mse = mean_squared_error(y, y_pred)
r2 = r2_score(y, y_pred)
print(f"MSE: {mse:.2f}, R^2: {r2:.2f}")

In [None]:
plt.figure().set_figheight(4)
plt.figure().set_figwidth(15)
plt.scatter(data['fecha'], y, color='red')
plt.plot(data['fecha'], y_pred, color='blue')
plt.title('Regresión Polinomial')
plt.xlabel('X')
plt.ylabel('y')
plt.ylim(-10, range_lim)
plt.show()