# Práctica 1 Pregunta 4: Regresión usando 2 características de los datos

Selecciona 2 características de los datos. Usando estas características, implementa los mismos métodos de regresión del punto 2). Describe en el informe los parámetros usados y los resultados obtenidos con los distintos métodos y deposita el código Python en el Aula Virtual en el fichero 'answer4.ipynb'.

## Importación de bibliotecas para análisis de datos y escalado

In [17]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

## Carga del dataset desde un archivo CSV

In [18]:
columns = ['Col1', 'Col2', 'Col3', 'Col4', 'Col5', 'Col6',
           'Col7', 'Col8', 'Col9', 'Col10', 'Col11', 'Target']

df = pd.read_csv('dataset_practica_2.csv', 
                 sep=',',
                 usecols=columns)

## Entrenamiento y evaluación del modelo por regresión lineal (Linear Regression)

In [19]:
X = df[['Col8', 'Col9']].values
y = df['Target'].values

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=123)

from sklearn.linear_model import LinearRegression

slr = LinearRegression()

slr.fit(X_train, y_train)

In [20]:
y_pred = slr.predict(X_test)

from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
R2 = r2_score(y_test, y_pred)

print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("R-squared:", R2)

Mean Absolute Error (MAE): 1173.3870385433568
Mean Squared Error (MSE): 2379735.851714338
R-squared: 0.3862173223547336


In [21]:
coefficients = slr.coef_
intercept = slr.intercept_

print("Coefficients:", coefficients)

print("Intercept:", intercept)

Coefficients: [-11157.27889147  19571.50517178]
Intercept: 704.9361033089199


## Entrenamiento y evaluación del modelo por regresión polinómica cuadrática (Quadratic Polynomial Regression)

In [22]:
X = df[['Col8', 'Col9']].values
y = df['Target'].values

from sklearn.preprocessing import PolynomialFeatures

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=123)

from sklearn.linear_model import LinearRegression

regr = LinearRegression()

quadratic = PolynomialFeatures(degree=2)
X_train_quadratic = quadratic.fit_transform(X_train)

regr_quadratic = regr.fit(X_train_quadratic, y_train)

print("Quadratic Model Coefficients:", regr_quadratic.coef_)
print("Quadratic Model Intercept:", regr_quadratic.intercept_)

new_data_quadratic = np.array([[2, 4]])

transformed_new_data_quadratic = quadratic.transform(new_data_quadratic)
print("Quadratic Transformed Data:", transformed_new_data_quadratic[0])

predicted_target_quadratic = regr_quadratic.predict(transformed_new_data_quadratic)
print("Predicted Target:", predicted_target_quadratic)

# CÁLCULO MANUAL
# coefficients = regr_quadratic.coef_
# intercept = regr_quadratic.intercept_
# manual_prediction = np.dot(coefficients, transformed_new_data_quadratic[0]) + intercept
# print("Manually Calculated Target:", manual_prediction)

Quadratic Model Coefficients: [      0.          -16579.25843208   40626.46056906 -382889.86124731
  832584.55496059 -469095.68011986]
Quadratic Model Intercept: -2193.6913572500544
Quadratic Transformed Data: [ 1.  2.  4.  4.  8. 16.]
Predicted Target: [-2249260.25316745]


In [23]:
X_test_quadratic = quadratic.fit_transform(X_test)

y_pred_quadratic = regr.predict(X_test_quadratic)

mae = mean_absolute_error(y_test, y_pred_quadratic)
mse = mean_squared_error(y_test, y_pred_quadratic)
R2 = r2_score(y_test, y_pred_quadratic)

print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("R-squared:", R2)

Mean Absolute Error (MAE): 1528.521456763323
Mean Squared Error (MSE): 41485252.48999322
R-squared: -9.699897359513558


## Entrenamiento y evaluación del modelo por regresión polinómica cúbica (Cubic Polynomial Regression)

In [24]:
from sklearn.linear_model import LinearRegression

from sklearn.preprocessing import PolynomialFeatures

from sklearn.model_selection import train_test_split

X = df[['Col8', 'Col9']].values
y = df['Target'].values

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=123)

regr = LinearRegression()

cubic = PolynomialFeatures(degree=3)
X_train_cubic = cubic.fit_transform(X_train)

regr_cubic = regr.fit(X_train_cubic, y_train)

print("Cubic Model Coefficients:", regr_cubic.coef_)
print("Cubic Model Intercept:", regr_cubic.intercept_)

new_data_cubic = np.array([[2, 4]])

transformed_new_data_cubic = cubic.transform(new_data_cubic)
print("Cubic Transformed Data:", transformed_new_data_cubic[0])

predicted_target_cubic = regr_cubic.predict(transformed_new_data_cubic)
print("Predicted Target:", predicted_target_cubic)

# CÁLCULO MANUAL
# coefficients = regr_cubic.coef_
# intercept = regr_cubic.intercept_
# manual_prediction = np.dot(coefficients, transformed_new_data_cubic[0]) + intercept
# print("Manually Calculated Target:", manual_prediction)

Cubic Model Coefficients: [ 0.00000000e+00  3.21776828e+04 -4.80395692e+04 -1.17047969e+06
  2.21418726e+06 -9.64964546e+05 -8.84552338e+06  2.98015857e+07
 -3.30299714e+07  1.20036566e+07]
Cubic Model Intercept: 2545.394800053482
Cubic Transformed Data: [ 1.  2.  4.  4.  8. 16.  8. 16. 32. 64.]
Predicted Target: [1.1480301e+08]


In [25]:
X_test_cubic = cubic.fit_transform(X_test)

y_pred_cubic = regr_cubic.predict(X_test_cubic)

# print(y_pred_cubic)

In [26]:
# Calculate evaluation metrics

from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

mae = mean_absolute_error(y_test, y_pred_cubic)
mse = mean_squared_error(y_test, y_pred_cubic)
R2 = r2_score(y_test, y_pred_cubic)

print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("R-squared:", R2)

Mean Absolute Error (MAE): 5973.18610843088
Mean Squared Error (MSE): 5251833514.825112
R-squared: -1353.5555633640356


## Entrenamiento y evaluación del modelo por árboles de decisión (Decision Tree Regression)

In [27]:
from sklearn.tree import DecisionTreeRegressor

from sklearn.model_selection import train_test_split

X = df[['Col8', 'Col9']].values
y = df['Target'].values

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=123)

tree = DecisionTreeRegressor(max_depth=3)
tree.fit(X_train, y_train)

In [28]:
y_pred_random_tree = tree.predict(X_test)

# print(y_pred_random_tree)

In [29]:
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

mae = mean_absolute_error(y_test, y_pred_random_tree)
mse = mean_squared_error(y_test, y_pred_random_tree)
R2 = r2_score(y_test, y_pred_random_tree)

print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("R-squared:", R2)

Mean Absolute Error (MAE): 1170.4950340205899
Mean Squared Error (MSE): 2110840.49523358
R-squared: 0.455570949055883


## Entrenamiento y evaluación del modelo por bosques aleatorios (Random Forest Regression)

In [30]:
from sklearn.ensemble import RandomForestRegressor

from sklearn.model_selection import train_test_split

X = df[['Col8', 'Col9']].values
y = df['Target'].values

x_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=123)

forest = RandomForestRegressor(n_estimators=1000,
                               criterion='squared_error',
                               random_state=1,
                               n_jobs=-1)
forest.fit(X_train, y_train)

y_pred_random_forest = forest.predict(X_test)

In [31]:
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

mae = mean_absolute_error(y_test, y_pred_random_forest)
mse = mean_squared_error(y_test, y_pred_random_forest)
R2 = r2_score(y_test, y_pred_random_forest)

print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("R-squared:", R2)

Mean Absolute Error (MAE): 1180.3149609307359
Mean Squared Error (MSE): 2240500.6677562585
R-squared: 0.422128931607776


## Conversión de Jupyter Notebook en un archivo Python

In [32]:
! python .convert_notebook_to_script.py --input answer4.ipynb --output answer4.py

[NbConvertApp] Converting notebook answer4.ipynb to script
[NbConvertApp] Writing 7431 bytes to answer4.py
