In [14]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error
from sklearn.datasets import load_breast_cancer

<h1>Carga de datos</h1>

In [15]:
data = load_breast_cancer()

In [16]:
df = pd.DataFrame(data.data, columns=data.feature_names)
df["Diagnosis"] = data.target

In [17]:
x = df.drop(columns="Diagnosis")
y = df["Diagnosis"]

In [18]:
scaler = StandardScaler()
x = scaler.fit_transform(x)

<h1>Ajuste de parámetros para problema de clasificación</h1>

In [19]:
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)


In [20]:
param_grid = {
    'C': [0.1, 1, 10],  
    'solver': ['lbfgs', 'liblinear', 'sag', 'saga'] 
}

In [21]:
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()

In [22]:
from sklearn.model_selection import GridSearchCV

grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)



In [23]:
best_params = grid_search.best_params_

In [24]:
best_model = LogisticRegression(**best_params)
best_model.fit(X_train, y_train)

In [25]:
y_pred = best_model.predict(X_test)

In [26]:
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test, y_pred)
print(f'Precisión en el conjunto de prueba: {accuracy:.2f}')

Precisión en el conjunto de prueba: 0.99


<h1>Ajuste de parámetros para problema de regresión</h1>

In [27]:
dataframe = pd.read_csv("./dataset/housing.csv",header=0,delimiter=",")

In [28]:
x=dataframe.iloc[:,0:3]
y=dataframe.iloc[:,3]
print(y)

0      504000.0
1      453600.0
2      728700.0
3      701400.0
4      760200.0
         ...   
484    470400.0
485    432600.0
486    501900.0
487    462000.0
488    249900.0
Name: MEDV, Length: 489, dtype: float64


In [29]:
scaler = StandardScaler()
x = scaler.fit_transform(x)

In [30]:
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [31]:
param_grid = {
    'alpha': [0.0001, 0.001, 0.01],  
    'learning_rate': ['constant', 'optimal', 'invscaling'],  
}

In [40]:
from sklearn.linear_model import SGDRegressor
from sklearn.metrics import mean_squared_error, r2_score

In [33]:
model = SGDRegressor(max_iter=1000, random_state=42)

In [34]:
from sklearn.model_selection import GridSearchCV

grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)

In [35]:
best_params = grid_search.best_params_

In [36]:
best_model = SGDRegressor(**best_params, max_iter=1000, random_state=42)
best_model.fit(X_train, y_train)

In [37]:
y_pred = best_model.predict(X_test)

In [41]:
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Error cuadrático medio en el conjunto de prueba: {mse:.2f}')
print(f'R2 en el conjunto de prueba: {r2:.2f}')

Error cuadrático medio en el conjunto de prueba: 6796811431.13
R2 en el conjunto de prueba: 0.69
