In [76]:
# importando bibliotecas
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

## **Exercício 1 – Classificação (Solar)**

In [77]:
# chamando o dataset
df = pd.read_csv('SolarPrediction.csv', sep=',', low_memory=False)

In [78]:
# testando o dataset
df.head(10)

Unnamed: 0,UNIXTime,Data,Time,Radiation,Temperature,Pressure,Humidity,WindDirection(Degrees),Speed,TimeSunRise,TimeSunSet
0,1475229326,9/29/2016 12:00:00 AM,23:55:26,1.21,48,30.46,59,177.39,5.62,06:13:00,18:13:00
1,1475229023,9/29/2016 12:00:00 AM,23:50:23,1.21,48,30.46,58,176.78,3.37,06:13:00,18:13:00
2,1475228726,9/29/2016 12:00:00 AM,23:45:26,1.23,48,30.46,57,158.75,3.37,06:13:00,18:13:00
3,1475228421,9/29/2016 12:00:00 AM,23:40:21,1.21,48,30.46,60,137.71,3.37,06:13:00,18:13:00
4,1475228124,9/29/2016 12:00:00 AM,23:35:24,1.17,48,30.46,62,104.95,5.62,06:13:00,18:13:00
5,1475227824,9/29/2016 12:00:00 AM,23:30:24,1.21,48,30.46,64,120.2,5.62,06:13:00,18:13:00
6,1475227519,9/29/2016 12:00:00 AM,23:25:19,1.2,49,30.46,72,112.45,6.75,06:13:00,18:13:00
7,1475227222,9/29/2016 12:00:00 AM,23:20:22,1.24,49,30.46,71,122.97,5.62,06:13:00,18:13:00
8,1475226922,9/29/2016 12:00:00 AM,23:15:22,1.23,49,30.46,80,101.18,4.5,06:13:00,18:13:00
9,1475226622,9/29/2016 12:00:00 AM,23:10:22,1.21,49,30.46,85,141.87,4.5,06:13:00,18:13:00


In [79]:
# 2. criando a variável-alvo em cima da mediana da radiação (Alta/Baixa Radiação)
mediana = df['Radiation'].median()
df['Rad_Class'] = np.where(df['Radiation'] >= mediana, 'Alta', 'Baixa')

In [80]:
# 3. selecionando os atributos 
x = df.drop(['Radiation', 'Rad_Class'], axis=1).select_dtypes(include=[np.number])
y = df['Rad_Class']

In [81]:
# testando a separação
y.head(5)

0    Baixa
1    Baixa
2    Baixa
3    Baixa
4    Baixa
Name: Rad_Class, dtype: object

In [82]:
x.head(5)

Unnamed: 0,UNIXTime,Temperature,Pressure,Humidity,WindDirection(Degrees),Speed
0,1475229326,48,30.46,59,177.39,5.62
1,1475229023,48,30.46,58,176.78,3.37
2,1475228726,48,30.46,57,158.75,3.37
3,1475228421,48,30.46,60,137.71,3.37
4,1475228124,48,30.46,62,104.95,5.62


In [83]:
# 4. Separando o treino e o teste
x_treino, x_teste, y_treino, y_teste = train_test_split(x, y, test_size=0.3, random_state=42, stratify=y)

In [84]:
# 5. normalizando os atributos contínuos
scaler = StandardScaler()
x_treino_scaled = scaler.fit_transform(x_treino)
x_teste_scaled = scaler.transform(x_teste)

In [85]:
# 6. treinando e avaliando modelos
modelos = {
    'Árvore de Decisão': DecisionTreeClassifier(random_state=42),
    'Random Forest': RandomForestClassifier(random_state=42),
    'SVM': SVC(random_state=42)
}

for nome, modelo in modelos.items():
    modelo.fit(x_treino_scaled, y_treino)
    y_previ = modelo.predict(x_teste_scaled)
    acc = accuracy_score(y_teste, y_previ)
    cm = confusion_matrix(y_teste, y_previ)
    print(f'{nome} - Acurácia: {acc:.3f}')
    print(f'Matriz de confusão:\n{cm}\n')

Árvore de Decisão - Acurácia: 0.938
Matriz de confusão:
[[4594  309]
 [ 296 4607]]

Random Forest - Acurácia: 0.946
Matriz de confusão:
[[4553  350]
 [ 183 4720]]

SVM - Acurácia: 0.847
Matriz de confusão:
[[3634 1269]
 [ 233 4670]]



## **Exercício 2 – Regressão (Eólica)**

In [86]:
# chamando o dataset 
df2 = pd.read_csv('T1.csv', sep=',', low_memory=False)

In [87]:
# testando o dataset
df2.head(10)

Unnamed: 0,Date/Time,LV ActivePower (kW),Wind Speed (m/s),Theoretical_Power_Curve (KWh),Wind Direction (°)
0,01 01 2018 00:00,380.047791,5.311336,416.328908,259.994904
1,01 01 2018 00:10,453.769196,5.672167,519.917511,268.641113
2,01 01 2018 00:20,306.376587,5.216037,390.900016,272.564789
3,01 01 2018 00:30,419.645905,5.659674,516.127569,271.258087
4,01 01 2018 00:40,380.650696,5.577941,491.702972,265.674286
5,01 01 2018 00:50,402.391998,5.604052,499.436385,264.578613
6,01 01 2018 01:00,447.605713,5.793008,557.372363,266.163605
7,01 01 2018 01:10,387.242188,5.30605,414.898179,257.949493
8,01 01 2018 01:20,463.651215,5.584629,493.677652,253.480698
9,01 01 2018 01:30,439.725708,5.523228,475.706783,258.723785


In [88]:
# 1. Selecionando atributos numéricos (exceto Power_kW)
x_reg = df2.drop(['LV ActivePower (kW)'], axis=1).select_dtypes(include=[np.number])
y_reg = df2['LV ActivePower (kW)']

In [89]:
# testando a separação
y_reg.head(5)

0    380.047791
1    453.769196
2    306.376587
3    419.645905
4    380.650696
Name: LV ActivePower (kW), dtype: float64

In [90]:
x_reg.head(5)

Unnamed: 0,Wind Speed (m/s),Theoretical_Power_Curve (KWh),Wind Direction (°)
0,5.311336,416.328908,259.994904
1,5.672167,519.917511,268.641113
2,5.216037,390.900016,272.564789
3,5.659674,516.127569,271.258087
4,5.577941,491.702972,265.674286


In [91]:
# 2. separando o treino e  o teste (80/20)
x2_train, x2_test, y2_train, y2_test = train_test_split(x_reg, y_reg, test_size=0.2, random_state=42)

In [65]:
# 3. normalizando os dados
scaler_reg = StandardScaler()
x2_train_scaled = scaler_reg.fit_transform(x2_train)
x2_test_scaled = scaler_reg.transform(x2_test)

In [None]:
# 4. treinando e avaliando modelos
regressores = {
    'Regressão linear': LinearRegression(),
    'Árvore de Regressão': DecisionTreeRegressor(random_state=42),
    'Random Forest': RandomForestRegressor(random_state=42)
}

for nome, reg in regressores.items():
    reg.fit(x2_train_scaled, y2_train)
    y_pred = reg.predict(x2_test_scaled)
    rmse = mean_squared_error(y2_test, y_pred) ** 0.5  
    r2 = r2_score(y2_test, y_pred)
    print(f'{nome} - RMSE: {rmse:.2f} | R²: {r2:.3f}')

Regressão linear - RMSE: 410.91 | R²: 0.901
Regressão das Árvores - RMSE: 550.55 | R²: 0.822
Regressão Random Forest - RMSE: 408.83 | R²: 0.902
