# Práctica 1 Pregunta 3: Regresión usando 4 características de los datos

Selecciona 4 características de los datos. Usando estas características, implementa los mismos métodos de regresión del punto 2). Describe en el informe los parámetros usados y los resultados obtenidos con los distintos métodos y deposita el código Python en Aula Virtual en el fichero 'answer3.ipynb'.

## Importación de bibliotecas para análisis de datos y escalado

In [7]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

## Carga del dataset desde un archivo CSV

In [8]:
columns = ['Col1', 'Col2', 'Col3', 'Col4', 'Col5', 'Col6',
           'Col7', 'Col8', 'Col9', 'Col10', 'Col11', 'Target']

df = pd.read_csv('dataset_practica_2.csv', 
                 sep=',',
                 usecols=columns)

## Entrenamiento y evaluación del modelo por regresión lineal (Linear Regression)

In [9]:
X = df[['Col1', 'Col8', 'Col9', 'Col10']].values
y = df['Target'].values

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=123)

from sklearn.linear_model import LinearRegression

slr = LinearRegression()

slr.fit(X_train, y_train)

In [10]:
y_pred = slr.predict(X_test)

from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
R2 = r2_score(y_test, y_pred)

print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("R-squared:", R2)

Mean Absolute Error (MAE): 738.6631169578069
Mean Squared Error (MSE): 1051526.146163516
R-squared: 0.7287898431494804


In [11]:
coefficients = slr.coef_
intercept = slr.intercept_

print("Coefficients:", coefficients)

print("Intercept:", intercept)

Coefficients: [ 2127.59250789 -6596.43054245 13420.17344751   417.15829468]
Intercept: -710.0661733399611


## Entrenamiento y evaluación del modelo por regresión polinómica cuadrática (Quadratic Polynomial Regression)

In [12]:
X = df[['Col1', 'Col8', 'Col9', 'Col10']].values
y = df['Target'].values

from sklearn.preprocessing import PolynomialFeatures

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=123)

from sklearn.linear_model import LinearRegression

regr = LinearRegression()

quadratic = PolynomialFeatures(degree=2)
X_train_quadratic = quadratic.fit_transform(X_train)

regr_quadratic = regr.fit(X_train_quadratic, y_train)

print("Quadratic Model Coefficients:", regr_quadratic.coef_)
print("Quadratic Model Intercept:", regr_quadratic.intercept_)

new_data_quadratic = np.array([[2, 4, 8, 16]])

transformed_new_data_quadratic = quadratic.transform(new_data_quadratic)
print("Quadratic Transformed Data:", transformed_new_data_quadratic[0])

predicted_target_quadratic = regr_quadratic.predict(transformed_new_data_quadratic)
print("Predicted Target:", predicted_target_quadratic)

# CÁLCULO MANUAL
# coefficients = regr_quadratic.coef_
# intercept = regr_quadratic.intercept_
# manual_prediction = np.dot(coefficients, transformed_new_data_quadratic[0]) + intercept
# print("Manually Calculated Target:", manual_prediction)

Quadratic Model Coefficients: [ 0.00000000e+00  2.10593014e+02  1.50968292e+04 -6.03478761e+02
  7.48559277e+02  2.10593015e+02 -1.53589743e+04  1.87822962e+04
  1.39545551e+02 -2.44598011e+05  5.34059015e+05 -7.97121153e+03
 -3.03963231e+05  8.96132546e+03 -1.48417687e+02]
Quadratic Model Intercept: -1955.3904618296347
Quadratic Transformed Data: [  1.   2.   4.   8.  16.   4.   8.  16.  32.  16.  32.  64.  64. 128.
 256.]
Predicted Target: [-5429474.31757075]


In [13]:
X_test_quadratic = quadratic.fit_transform(X_test)

y_pred_quadratic = regr.predict(X_test_quadratic)

mae = mean_absolute_error(y_test, y_pred_quadratic)
mse = mean_squared_error(y_test, y_pred_quadratic)
R2 = r2_score(y_test, y_pred_quadratic)

print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("R-squared:", R2)

Mean Absolute Error (MAE): 973.5190010722921
Mean Squared Error (MSE): 20690971.75897632
R-squared: -4.336625928528283


## Entrenamiento y evaluación del modelo por regresión polinómica cúbica (Cubic Polynomial Regression)

In [14]:
from sklearn.linear_model import LinearRegression

from sklearn.preprocessing import PolynomialFeatures

from sklearn.model_selection import train_test_split

X = df[['Col1', 'Col8', 'Col9', 'Col10']].values
y = df['Target'].values

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=123)

regr = LinearRegression()

cubic = PolynomialFeatures(degree=3)
X_train_cubic = cubic.fit_transform(X_train)

regr_cubic = regr.fit(X_train_cubic, y_train)

print("Cubic Model Coefficients:", regr_cubic.coef_)
print("Cubic Model Intercept:", regr_cubic.intercept_)

new_data_cubic = np.array([[2, 4, 8, 16]])

transformed_new_data_cubic = cubic.transform(new_data_cubic)
print("Cubic Transformed Data:", transformed_new_data_cubic[0])

predicted_target_cubic = regr_cubic.predict(transformed_new_data_cubic)
print("Predicted Target:", predicted_target_cubic)

# CÁLCULO MANUAL
# coefficients = regr_cubic.coef_
# intercept = regr_cubic.intercept_
# manual_prediction = np.dot(coefficients, transformed_new_data_cubic[0]) + intercept
# print("Manually Calculated Target:", manual_prediction)

Cubic Model Coefficients: [ 2.50435457e-05 -2.17420394e+02  3.58454241e+04 -6.06448221e+04
 -6.55411211e+02 -2.17420413e+02 -8.59509276e+03  1.36081706e+04
  1.39949848e+02 -5.94342609e+05  1.47070764e+06 -1.00082205e+05
 -7.92400270e+05  9.81149422e+04  9.70736492e+02 -2.17420413e+02
 -8.59509276e+03  1.36081706e+04  1.39949856e+02 -3.96041596e+05
  8.33642822e+05  2.87705745e+03 -4.49300645e+05 -2.14003718e+03
 -8.99392722e+01  1.63253228e+06 -2.76377985e+06 -2.59173122e+05
  2.68891759e+05  5.60703731e+05  1.47022141e+04  7.88294664e+05
 -2.95884637e+05 -1.54088963e+04 -1.19897220e+02]
Cubic Model Intercept: 3628.202668263357
Cubic Transformed Data: [1.000e+00 2.000e+00 4.000e+00 8.000e+00 1.600e+01 4.000e+00 8.000e+00
 1.600e+01 3.200e+01 1.600e+01 3.200e+01 6.400e+01 6.400e+01 1.280e+02
 2.560e+02 8.000e+00 1.600e+01 3.200e+01 6.400e+01 3.200e+01 6.400e+01
 1.280e+02 1.280e+02 2.560e+02 5.120e+02 6.400e+01 1.280e+02 2.560e+02
 2.560e+02 5.120e+02 1.024e+03 5.120e+02 1.024e+03 2.04

In [15]:
X_test_cubic = cubic.fit_transform(X_test)

y_pred_cubic = regr_cubic.predict(X_test_cubic)

# print(y_pred_cubic)

In [16]:
# Calculate evaluation metrics

from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

mae = mean_absolute_error(y_test, y_pred_cubic)
mse = mean_squared_error(y_test, y_pred_cubic)
R2 = r2_score(y_test, y_pred_cubic)

print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("R-squared:", R2)

Mean Absolute Error (MAE): 1210.2604059627517
Mean Squared Error (MSE): 84037403.1733884
R-squared: -20.67496964209659


## Entrenamiento y evaluación del modelo por árboles de decisión (Decision Tree Regression)

In [17]:
from sklearn.tree import DecisionTreeRegressor

from sklearn.model_selection import train_test_split

X = df[['Col1', 'Col8', 'Col9', 'Col10']].values
y = df['Target'].values

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=123)

tree = DecisionTreeRegressor(max_depth=3)
tree.fit(X_train, y_train)

In [18]:
y_pred_random_tree = tree.predict(X_test)

# print(y_pred_random_tree)

In [19]:
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

mae = mean_absolute_error(y_test, y_pred_random_tree)
mse = mean_squared_error(y_test, y_pred_random_tree)
R2 = r2_score(y_test, y_pred_random_tree)

print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("R-squared:", R2)

Mean Absolute Error (MAE): 669.6670196338655
Mean Squared Error (MSE): 773954.1576470401
R-squared: 0.800381351185228


## Entrenamiento y evaluación del modelo por bosques aleatorios (Random Forest Regression)

In [20]:
from sklearn.ensemble import RandomForestRegressor

from sklearn.model_selection import train_test_split

X = df[['Col1', 'Col8', 'Col9', 'Col10']].values
y = df['Target'].values

x_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=123)

forest = RandomForestRegressor(n_estimators=1000,
                               criterion='squared_error',
                               random_state=1,
                               n_jobs=-1)
forest.fit(X_train, y_train)

y_pred_random_forest = forest.predict(X_test)

In [21]:
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

mae = mean_absolute_error(y_test, y_pred_random_forest)
mse = mean_squared_error(y_test, y_pred_random_forest)
R2 = r2_score(y_test, y_pred_random_forest)

print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("R-squared:", R2)

Mean Absolute Error (MAE): 631.1058454545454
Mean Squared Error (MSE): 812433.8010116727
R-squared: 0.7904566620554807


## Conversión de Jupyter Notebook en un archivo Python

In [22]:
! python .convert_notebook_to_script.py --input answer3.ipynb --output answer3.py

[NbConvertApp] Converting notebook answer3.ipynb to script
[NbConvertApp] Writing 7520 bytes to answer3.py
