In [1]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn import svm
import pandas as pd
import os  # La biblioteca os permite interactuar con el sistema operativo.
# Ventana de tiempo prepandemia 
import pandas as pd  # Pandas proporciona estructuras de datos y herramientas para el análisis de datos.
import seaborn as sns  # Seaborn es una biblioteca de visualización de datos basada en matplotlib.
import matplotlib.pyplot as plt  # Matplotlib es una biblioteca de gráficos 2D que produce figuras de calidad de publicación.
from pathlib import Path  # Pathlib ofrece clases para manejar rutas de archivos de manera orientada a objetos.

In [2]:
s_path = Path.cwd()  # Obtiene el directorio de trabajo actual como un objeto Path.
s_prt_path = s_path.parent/'..'/'Datasets' # Accede al directorio padre del directorio actual. Utilizamos '..' para subir un nivel en la jerarquía de directorios.
ruta_carpeta_atras = s_prt_path.resolve() # Normalizamos la ruta para resolver cualquier ruta relativa
ruta_csv = os.path.join(s_prt_path,'Data_XM.csv')  # Crea una ruta al archivo CSV en el directorio padre.
df_DataModel= pd.read_csv(ruta_csv)  # Lee el archivo CSV en un DataFrame de pandas.

In [3]:
df_DataModel['Date'] = pd.to_datetime(df_DataModel['Date'])  # Convierte la columna 'Date' a tipo datetime.
df_DataModel.drop('Unnamed: 0', axis=1, inplace=True)  # Elimina la columna 'Unnamed: 0'.

In [4]:
df_DataModel.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8886 entries, 0 to 8885
Data columns (total 8 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   Date       8886 non-null   datetime64[ns]
 1   Demanda    8886 non-null   float64       
 2   VolUtil    8886 non-null   float64       
 3   Aportes    8886 non-null   float64       
 4   PrecioB    8886 non-null   float64       
 5   PrecioO    8886 non-null   float64       
 6   DispTer    8886 non-null   float64       
 7   DispNoTer  8886 non-null   float64       
dtypes: datetime64[ns](1), float64(7)
memory usage: 555.5 KB


In [6]:
# Establece la fecha límite hasta la cual deseas mantener los datos para la prueba
start_time = pd.to_datetime('2000-01-01')
end_time = pd.to_datetime('2019-03-01')

In [7]:
# Filtra el DataFrame
filtered_df = df_DataModel[df_DataModel['Date'].between(start_time, end_time)]

In [8]:
# Convertir la columna de fecha a múltiples columnas numéricas
filtered_df['year'] = filtered_df['Date'].dt.year
filtered_df['month'] = filtered_df['Date'].dt.month
filtered_df['day'] = filtered_df['Date'].dt.day
filtered_df = filtered_df.drop('Date', axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['year'] = filtered_df['Date'].dt.year
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['month'] = filtered_df['Date'].dt.month
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['day'] = filtered_df['Date'].dt.day


In [9]:
filtered_df.head(-1)

Unnamed: 0,Demanda,VolUtil,Aportes,PrecioB,PrecioO,DispTer,DispNoTer,year,month,day
0,3.619477e+06,1.080803e+10,56677300.0,32.868123,98.359312,2.846091e+06,7.774834e+06,2000,1,1
1,3.660170e+06,1.078818e+10,54645200.0,33.034790,96.103387,2.754967e+06,7.766716e+06,2000,1,2
2,4.366035e+06,1.075306e+10,57488300.0,37.272290,96.537377,2.751592e+06,7.761926e+06,2000,1,3
3,4.491048e+06,1.071967e+10,58418200.0,41.688957,96.636300,2.751300e+06,7.744445e+06,2000,1,4
4,4.459620e+06,1.068971e+10,63039500.0,40.893123,94.805851,2.751300e+06,7.759892e+06,2000,1,5
...,...,...,...,...,...,...,...,...,...,...
6993,7.122057e+06,8.644510e+09,87179100.0,322.590895,370.304514,4.224538e+06,9.659271e+06,2019,2,24
6994,8.137477e+06,8.613842e+09,88485100.0,361.728695,371.224236,4.263101e+06,9.263221e+06,2019,2,25
6995,8.239534e+06,8.559566e+09,84367800.0,292.077995,358.720625,4.178875e+06,9.031502e+06,2019,2,26
6996,8.274899e+06,8.510944e+09,93936600.0,338.385517,362.446310,4.047174e+06,9.017892e+06,2019,2,27


In [10]:
# Preparar los datos
X = filtered_df.drop('PrecioB', axis=1)  # todas las columnas excepto 'PrecioB'
y = filtered_df['PrecioB']  # la columna 'PrecioB'

In [11]:
# Dividir los datos en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [12]:
# Normalizar los datos
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [13]:
# Definir los parámetros que quieres probar
param_grid = {
    'C': [0.1, 1, 10, 100], 
    'gamma': [1, 0.1, 0.01, 0.001],
    'kernel': ['rbf', 'linear', 'poly'] 
}

In [14]:
# Crear el GridSearchCV con validación cruzada de 10 pliegues
grid = GridSearchCV(svm.SVR(), param_grid, cv=10, refit=True, verbose=2)

In [15]:
# Entrenar el GridSearchCV
grid.fit(X_train, y_train)

Fitting 10 folds for each of 48 candidates, totalling 480 fits
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   1.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   1.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   1.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   1.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   1.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   1.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   1.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   1.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   1.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   1.0s
[CV] END ......................C=0.1, gamma=1, kernel=linear; total time=   0.5s
[CV] END ......................C=0.1, gamma=1,

: 