# Práctica 1 Pregunta 2: Regresión usando todas las características de los datos

Usando todas las características, implementa los métodos Linear Regression, Polynomial Regression, Decision Tree Regression y Random Forest Regression. Describe en el informe los parámetros usados y los resultados obtenidos con los distintos métodos y deposita el código Python en Aula Virtual en el fichero 'answer2.ipynb'.

## Importación de bibliotecas para análisis de datos y escalado

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

## Carga del dataset desde un archivo CSV

In [2]:
columns = ['Col1', 'Col2', 'Col3', 'Col4', 'Col5', 'Col6',
           'Col7', 'Col8', 'Col9', 'Col10', 'Col11', 'Target']

df = pd.read_csv('dataset_practica_2.csv', 
                 sep=',',
                 usecols=columns)

## Entrenamiento y evaluación del modelo por regresión lineal (Linear Regression)

In [3]:
X = df[['Col1', 'Col2', 'Col3', 'Col4', 'Col5', 'Col6', 'Col7', 'Col8', 'Col9', 'Col10', 'Col11']].values
y = df['Target'].values

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=123)

from sklearn.linear_model import LinearRegression

slr = LinearRegression()

slr.fit(X_train, y_train)

In [4]:
y_pred = slr.predict(X_test)

from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
R2 = r2_score(y_test, y_pred)

print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("R-squared:", R2)

Mean Absolute Error (MAE): 633.5742018072091
Mean Squared Error (MSE): 730543.31878569
R-squared: 0.8115778967581124


In [5]:
coefficients = slr.coef_
intercept = slr.intercept_

print("Coefficients:", coefficients)

print("Intercept:", intercept)

Coefficients: [ 2063.04727581  -623.66179176 -2632.38937092    79.17521876
   -42.26469126    59.14374487 -1008.08546094 -2493.17651117
  8449.80161201   530.46670183  -627.00964606]
Intercept: 1441.2578727959967


## Entrenamiento y evaluación del modelo por regresión polinómica cuadrática (Quadratic Polynomial Regression)

In [6]:
X = df[['Col1', 'Col2', 'Col3', 'Col4', 'Col5', 'Col6', 'Col7', 'Col8', 'Col9', 'Col10', 'Col11']].values
y = df['Target'].values

from sklearn.preprocessing import PolynomialFeatures

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=123)

from sklearn.linear_model import LinearRegression

regr = LinearRegression()

quadratic = PolynomialFeatures(degree=2)
X_train_quadratic = quadratic.fit_transform(X_train)

regr_quadratic = regr.fit(X_train_quadratic, y_train)

print("Quadratic Model Coefficients:", regr_quadratic.coef_)
print("Quadratic Model Intercept:", regr_quadratic.intercept_)

new_data_quadratic = np.array([[2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048]])

transformed_new_data_quadratic = quadratic.transform(new_data_quadratic)
print("Quadratic Transformed Data:", transformed_new_data_quadratic[0])

predicted_target_quadratic = regr_quadratic.predict(transformed_new_data_quadratic)
print("Predicted Target:", predicted_target_quadratic)

# CÁLCULO MANUAL
# coefficients = regr_quadratic.coef_
# intercept = regr_quadratic.intercept_
# manual_prediction = np.dot(coefficients, transformed_new_data_quadratic[0]) + intercept
# print("Manually Calculated Target:", manual_prediction)

Quadratic Model Coefficients: [ 2.80139375e+15  1.89247985e+03  2.48711157e+03 -3.95831062e+03
 -3.03100112e+02 -1.36816262e+03  8.12480109e+02  9.18361050e+03
  1.16868066e+05 -1.11994544e+05  4.01155710e+03 -6.51695983e+03
  1.89247985e+03  5.58754593e+02 -2.84949374e+03  7.34029805e+02
  3.25876895e+02 -1.86462217e+01 -7.44195616e+03 -9.00038249e+03
  1.33008098e+04 -7.57199048e+02  6.30087969e+03  3.01997462e+02
 -7.28582860e+03  1.23556742e+03 -3.18698775e+02 -2.57264080e+02
 -4.18308312e+03  7.84639312e+03 -7.05974910e+03  5.85904887e+02
 -2.06418752e+03  2.26864195e+04 -9.05223682e+03  3.56088587e+02
 -2.06264317e+02 -1.47006474e+04 -2.38479774e+04  4.59051208e+04
  3.25571314e+03 -1.56671262e+03 -3.03100112e+02  4.24356656e+02
  2.84003702e+01 -3.95206380e+03  1.15800792e+04 -5.13311821e+03
 -1.71810141e+03  1.39698386e-09  2.09755761e+01  8.04362579e+01
  1.57097711e+03 -1.07775193e+04  8.07505202e+03  5.07760119e+02
  8.36060465e+01 -1.10206232e+02  1.20623396e+03 -2.28019190

In [7]:
X_test_quadratic = quadratic.fit_transform(X_test)

y_pred_quadratic = regr.predict(X_test_quadratic)

mae = mean_absolute_error(y_test, y_pred_quadratic)
mse = mean_squared_error(y_test, y_pred_quadratic)
R2 = r2_score(y_test, y_pred_quadratic)

print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("R-squared:", R2)

Mean Absolute Error (MAE): 1594.9704545454545
Mean Squared Error (MSE): 12181805.05340909
R-squared: -2.141937336804724


## Entrenamiento y evaluación del modelo por regresión polinómica cúbica (Cubic Polynomial Regression)

In [8]:
from sklearn.linear_model import LinearRegression

from sklearn.preprocessing import PolynomialFeatures

from sklearn.model_selection import train_test_split

X = df[['Col1', 'Col2', 'Col3', 'Col4', 'Col5', 'Col6', 'Col7', 'Col8', 'Col9', 'Col10', 'Col11']].values
y = df['Target'].values

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=123)

regr = LinearRegression()

cubic = PolynomialFeatures(degree=3)
X_train_cubic = cubic.fit_transform(X_train)

regr_cubic = regr.fit(X_train_cubic, y_train)

print("Cubic Model Coefficients:", regr_cubic.coef_)
print("Cubic Model Intercept:", regr_cubic.intercept_)

new_data_cubic = np.array([[2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048]])

transformed_new_data_cubic = cubic.transform(new_data_cubic)
print("Cubic Transformed Data:", transformed_new_data_cubic[0])

predicted_target_cubic = regr_cubic.predict(transformed_new_data_cubic)
print("Predicted Target:", predicted_target_cubic)

# CÁLCULO MANUAL
# coefficients = regr_cubic.coef_
# intercept = regr_cubic.intercept_
# manual_prediction = np.dot(coefficients, transformed_new_data_cubic[0]) + intercept
# print("Manually Calculated Target:", manual_prediction)

Cubic Model Coefficients: [ 2.23071261e+13 -8.74980990e+13  2.64861943e+13  1.95309791e+04
  5.75042936e+12  7.53422724e+02  3.14384993e+12 -3.09226662e+04
 -1.66124330e+05  1.98486596e+05 -7.04251105e+03 -2.98143667e+12
  4.35561644e+13 -1.39583971e+12  4.74673357e+11 -4.96022464e+12
  3.74232200e+12 -5.08548794e+11  4.77943292e+11 -4.16379940e+11
 -7.20389006e+11  3.89390570e+11  1.56172236e+12 -1.44470151e+13
 -1.33968053e+04  3.11539469e+12 -1.40546728e+03 -9.11369253e+02
  5.18128207e+04 -3.93006188e+04  4.29087734e+04 -4.22709253e+02
 -5.92968080e+11 -4.98166548e+04 -5.49448717e+10 -2.91583521e+03
 -1.99317737e+03  6.30024269e+04  7.55361588e+05 -9.27741010e+05
  3.17812289e+04  3.61465981e+10 -2.95664417e+12  3.10270223e+10
 -1.65076930e+12  5.43891110e+10  1.66811177e+11  6.13732246e+10
 -5.68947321e+10 -8.40729462e+09  1.30707031e+02  1.54324219e+02
 -2.47009033e+03 -3.42173154e+04  3.85618628e+04 -1.27243164e+02
  1.78110746e+11 -5.23974989e+11  2.84109192e+01  3.27773108e+04

In [9]:
X_test_cubic = cubic.fit_transform(X_test)

y_pred_cubic = regr_cubic.predict(X_test_cubic)

# print(y_pred_cubic)

In [10]:
# Calculate evaluation metrics

from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

mae = mean_absolute_error(y_test, y_pred_cubic)
mse = mean_squared_error(y_test, y_pred_cubic)
R2 = r2_score(y_test, y_pred_cubic)

print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("R-squared:", R2)

Mean Absolute Error (MAE): 49651320517.6826
Mean Squared Error (MSE): 1.3583083718944588e+23
R-squared: -3.5033558408122044e+16


## Entrenamiento y evaluación del modelo por árboles de decisión (Decision Tree Regression)

In [11]:
from sklearn.tree import DecisionTreeRegressor

from sklearn.model_selection import train_test_split

X = df[['Col1', 'Col2', 'Col3', 'Col4', 'Col5', 'Col6', 'Col7', 'Col8', 'Col9', 'Col10', 'Col11']].values
y = df['Target'].values

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=123)

tree = DecisionTreeRegressor(max_depth=3)
tree.fit(X_train, y_train)

In [12]:
y_pred_random_tree = tree.predict(X_test)

# print(y_pred_random_tree)

In [13]:
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

mae = mean_absolute_error(y_test, y_pred_random_tree)
mse = mean_squared_error(y_test, y_pred_random_tree)
R2 = r2_score(y_test, y_pred_random_tree)

print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("R-squared:", R2)

Mean Absolute Error (MAE): 678.516076560234
Mean Squared Error (MSE): 816306.3073106996
R-squared: 0.7894578632670762


## Entrenamiento y evaluación del modelo por bosques aleatorios (Random Forest Regression)

In [14]:
from sklearn.ensemble import RandomForestRegressor

from sklearn.model_selection import train_test_split

X = df[['Col1', 'Col2', 'Col3', 'Col4', 'Col5', 'Col6', 'Col7', 'Col8', 'Col9', 'Col10', 'Col11']].values
y = df['Target'].values

x_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=123)

forest = RandomForestRegressor(n_estimators=1000,
                               criterion='squared_error',
                               random_state=1,
                               n_jobs=-1)
forest.fit(X_train, y_train)

y_pred_random_forest = forest.predict(X_test)

In [15]:
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

mae = mean_absolute_error(y_test, y_pred_random_forest)
mse = mean_squared_error(y_test, y_pred_random_forest)
R2 = r2_score(y_test, y_pred_random_forest)

print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("R-squared:", R2)

Mean Absolute Error (MAE): 414.3948818181817
Mean Squared Error (MSE): 390019.91855095455
R-squared: 0.8994058648270811


## Conversión de Jupyter Notebook en un archivo Python

In [16]:
! python .convert_notebook_to_script.py --input answer2.ipynb --output answer2.py

[NbConvertApp] Converting notebook answer2.ipynb to script
[NbConvertApp] Writing 7921 bytes to answer2.py
