# Árboles de decisión de regresión

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
data_frame = pd.read_csv("3.3.2 turbines_df.csv")
data_frame.head()

<img src="Figura turbina.png">

In [None]:
data_frame.shape

In [None]:
data_frame.dtypes

In [None]:
# Valores nulos
for feature in data_frame.columns:
    print('Total de valores nulos de', feature, '=', data_frame[feature].isna().sum())

In [None]:
# Valores únicos
for feature in data_frame.columns:
    print('Valores únicos de', feature, '=', data_frame[feature].unique())

In [None]:
# Medidas estadísticas
data_frame.describe()

In [None]:
# Considerando solo las columnas de tipo object
import numpy as np
data_frame.describe(include=[object])

In [None]:
for col in ['rotor_diameter_m','hub_height_m', 'commissioning_date']:
    fig, ax =plt.subplots(1, 2, figsize=(20, 6))
    fig.suptitle(col, fontsize=18)
    sns.histplot(data_frame[col], ax=ax[0], kde=False)
    data_frame[[col]+['turbine_capacity']].plot.scatter(x=col, y='turbine_capacity', ax=ax[1])
    plt.show()

In [None]:
## Correlación de las variables
plt.figure(figsize=(15,15))
p=sns.heatmap(data_frame.corr(), annot=True,cmap='RdYlGn',square=True)

In [None]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split

X = data_frame[['rotor_diameter_m','hub_height_m']] 
y = data_frame.turbine_capacity

# Separar los datos de "train" en entrenamiento y prueba para probar el modelo
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state = 29)
modelo = DecisionTreeRegressor(max_depth = 5, random_state = 29)

# Entrenamiento del modelo
modelo.fit(X_train, y_train)

# Validación del modelo
Y_pred = modelo.predict(X_test)

# Evaluación del modelo
print('Precisión del modelo (en entrenamiento):', modelo.score(X_train, y_train))
print('Precisión del modelo (en validación):', modelo.score(X_test, y_test))

## Representación gráfica del árbol

In [None]:
from sklearn.tree import plot_tree
fig, ax = plt.subplots(figsize=(30, 10))

print(f"Profundidad del árbol: {modelo.get_depth()}")
print(f"Número de nodos terminales: {modelo.get_n_leaves()}")

plot = plot_tree(
            decision_tree = modelo,
            feature_names = data_frame.drop(columns = ["province_territory", "model", "turbine_capacity"]).columns,
            class_names   = 'turbine_capacity',
            filled        = True,
            impurity      = False,
            fontsize      = 10,
            precision     = 2,
            ax            = ax
       )

In [None]:
fig.savefig("arbol.jpg")

In [None]:
%pylab inline
pylab.rcParams['figure.figsize'] = (15, 6)

plt.plot(Y_pred[:100], label='Predicted')
plt.plot(y_test.values[:100], label='Actual')
plt.ylabel('Turbine Capacity')

plt.legend()
plt.show()