In [None]:
import tensorflow as tf
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score,mean_squared_error
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
pd.set_option('display.max_columns', 25)

In [None]:
# Define a semente
seed_value = 2023
tf.random.set_seed(seed_value)
np.random.seed(seed_value)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Atividade

A base '05_df_treino_teste.csv' envolve características de imóveis e o preço de venda desses respectivos imóveis. Nessa base contém um conjunto variáveis explicativas. **Algumas dessas variáveis** são geradas após a venda do imóvel. Por esse motivo, na base '05_validacao.csv', constam menos colunas.

Da atividade:
1. Tratamento de dados: tratamento de missing, conversão de variável categórica para numérica e criação de novas variáveis;
2. Teste diferentes arquiteturas de redes neurais, utilizando a base '05_df_treino_teste.csv'. Não esqueça de dividir entre treino e teste.
3. Faça a predição na base '05_validacao.csv'. Cada grupo poderá enviar até 3 colunas de predição. É obrigatório enviar a coluna PRT_ID e as três colunas de predição.

Lembrem-se que a coluna **PRT_ID** não é uma variável explicativa.

In [None]:
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Data Science/Databases/05_df_treino_teste.csv')
df.tail()

Unnamed: 0,PRT_ID,AREA,INT_SQFT,DATE_SALE,DIST_MAINROAD,N_BEDROOM,N_BATHROOM,N_ROOM,SALE_COND,PARK_FACIL,DATE_BUILD,BUILDTYPE,UTILITY_AVAIL,STREET,MZZONE,QS_ROOMS,QS_BATHROOM,QS_BEDROOM,QS_OVERALL,REG_FEE,COMMIS,SALES_PRICE
4995,P07626,KK Nagar,1733,17-09-2008,148,2.0,1.0,4,AbNormal,No,21-09-1993,Commercial,NoSeWa,Gravel,RL,2.9,4.5,4.9,4.18,709109,132958,14773100
4996,P04588,Karapakkam,1459,04-08-2010,35,2.0,2.0,4,Partial,Yes,06-08-2002,House,AllPub,Gravel,RH,2.0,3.0,4.4,3.26,333648,207403,9017500
4997,P00161,Anna Nagar,1854,02-11-2014,120,2.0,1.0,5,Normal Sale,Yes,07-11-1993,Others,ELO,No Access,RL,2.3,3.3,2.8,2.75,404223,236959,13938740
4998,P08529,Anna Nagar,1617,20-11-2007,58,1.0,1.0,4,AdjLand,Yes,21-11-2001,House,ELO,Gravel,RM,3.9,4.4,3.0,3.665,367007,211735,14115670
4999,P05833,Karapakkam,725,13-12-2009,147,1.0,1.0,2,AdjLand,Yes,21-12-1976,House,ELO,Paved,C,2.0,3.7,2.4,2.67,178317,49533,4953250


In [None]:
df['STREET'].value_counts()

Paved        1785
Gravel       1776
No Access    1426
Pavd           10
NoAccess        3
Name: STREET, dtype: int64

### 1. Tratamento de dados: tratamento de missing, conversão de variável categórica para numérica e criação de novas variáveis;

In [None]:
df = df.drop(columns=['PRT_ID', 'DATE_SALE', 'REG_FEE', 'COMMIS', 'SALE_COND', 'INT_SQFT', 'DIST_MAINROAD'])
# remover SALE_COND, 'INT_SQFT', 'DIST_MAINROAD' da validacao

In [None]:
df.dtypes

AREA              object
N_BEDROOM        float64
N_BATHROOM       float64
N_ROOM             int64
PARK_FACIL        object
DATE_BUILD        object
BUILDTYPE         object
UTILITY_AVAIL     object
STREET            object
MZZONE            object
QS_ROOMS         float64
QS_BATHROOM      float64
QS_BEDROOM       float64
QS_OVERALL       float64
SALES_PRICE        int64
dtype: object

In [None]:
df.describe()

Unnamed: 0,N_BEDROOM,N_BATHROOM,N_ROOM,QS_ROOMS,QS_BATHROOM,QS_BEDROOM,QS_OVERALL,SALES_PRICE
count,5000.0,4996.0,5000.0,5000.0,5000.0,5000.0,4971.0,5000.0
mean,1.639,1.215973,3.69,3.5136,3.50376,3.48926,3.502875,10905640.0
std,0.808214,0.411537,1.022404,0.892326,0.902051,0.879391,0.527007,3799634.0
min,1.0,1.0,2.0,2.0,2.0,2.0,2.06,2156875.0
25%,1.0,1.0,3.0,2.7,2.7,2.7,3.12,8271050.0
50%,1.0,1.0,4.0,3.5,3.5,3.5,3.5,10363000.0
75%,2.0,1.0,4.0,4.3,4.3,4.3,3.89,12988520.0
max,4.0,2.0,6.0,5.0,5.0,5.0,4.97,23667340.0


In [None]:
df = df.drop_duplicates(keep=False).copy()

In [None]:
df.tail()

Unnamed: 0,AREA,N_BEDROOM,N_BATHROOM,N_ROOM,PARK_FACIL,DATE_BUILD,BUILDTYPE,UTILITY_AVAIL,STREET,MZZONE,QS_ROOMS,QS_BATHROOM,QS_BEDROOM,QS_OVERALL,SALES_PRICE
4995,KK Nagar,2.0,1.0,4,No,21-09-1993,Commercial,NoSeWa,Gravel,RL,2.9,4.5,4.9,4.18,14773100
4996,Karapakkam,2.0,2.0,4,Yes,06-08-2002,House,AllPub,Gravel,RH,2.0,3.0,4.4,3.26,9017500
4997,Anna Nagar,2.0,1.0,5,Yes,07-11-1993,Others,ELO,No Access,RL,2.3,3.3,2.8,2.75,13938740
4998,Anna Nagar,1.0,1.0,4,Yes,21-11-2001,House,ELO,Gravel,RM,3.9,4.4,3.0,3.665,14115670
4999,Karapakkam,1.0,1.0,2,Yes,21-12-1976,House,ELO,Paved,C,2.0,3.7,2.4,2.67,4953250


In [None]:
# Normalizando dados

df['DATE_BUILD'] = 2023 - pd.DatetimeIndex(df['DATE_BUILD']).year

  df['DATE_BUILD'] = 2023 - pd.DatetimeIndex(df['DATE_BUILD']).year


In [None]:
df.tail()

Unnamed: 0,AREA,N_BEDROOM,N_BATHROOM,N_ROOM,PARK_FACIL,DATE_BUILD,BUILDTYPE,UTILITY_AVAIL,STREET,MZZONE,QS_ROOMS,QS_BATHROOM,QS_BEDROOM,QS_OVERALL,SALES_PRICE
4995,KK Nagar,2.0,1.0,4,No,30,Commercial,NoSeWa,Gravel,RL,2.9,4.5,4.9,4.18,14773100
4996,Karapakkam,2.0,2.0,4,Yes,21,House,AllPub,Gravel,RH,2.0,3.0,4.4,3.26,9017500
4997,Anna Nagar,2.0,1.0,5,Yes,30,Others,ELO,No Access,RL,2.3,3.3,2.8,2.75,13938740
4998,Anna Nagar,1.0,1.0,4,Yes,22,House,ELO,Gravel,RM,3.9,4.4,3.0,3.665,14115670
4999,Karapakkam,1.0,1.0,2,Yes,47,House,ELO,Paved,C,2.0,3.7,2.4,2.67,4953250


In [None]:
colums = ['QS_ROOMS', 'QS_BATHROOM', 'QS_BEDROOM', 'QS_OVERALL']

for column in colums:
    max = df[column].max()
    min = df[column].min()

    df[column] = df[column].apply(lambda item: (item - min) / (max - min))

In [None]:
df.tail()

Unnamed: 0,AREA,N_BEDROOM,N_BATHROOM,N_ROOM,PARK_FACIL,DATE_BUILD,BUILDTYPE,UTILITY_AVAIL,STREET,MZZONE,QS_ROOMS,QS_BATHROOM,QS_BEDROOM,QS_OVERALL,SALES_PRICE
4995,KK Nagar,2.0,1.0,4,No,30,Commercial,NoSeWa,Gravel,RL,0.3,0.833333,0.966667,0.728522,14773100
4996,Karapakkam,2.0,2.0,4,Yes,21,House,AllPub,Gravel,RH,0.0,0.333333,0.8,0.412371,9017500
4997,Anna Nagar,2.0,1.0,5,Yes,30,Others,ELO,No Access,RL,0.1,0.433333,0.266667,0.237113,13938740
4998,Anna Nagar,1.0,1.0,4,Yes,22,House,ELO,Gravel,RM,0.633333,0.8,0.333333,0.551546,14115670
4999,Karapakkam,1.0,1.0,2,Yes,47,House,ELO,Paved,C,0.0,0.566667,0.133333,0.209622,4953250


In [None]:
# removendo algumas linhas para quando gerar os dummies de treino e validacao tenha o mesmo numero de colunas

list_data = ['Gravel', 'Paved', 'No Access']
mascara = df['STREET'].isin(list_data)
df = df[mascara]

list_data = ['Adyar', 'Ann Nagar', 'Anna Nagar', 'Chormpet', 'Chrmpet', 'Chrompet', 'Chrompt', 'KK Nagar', 'Karapakam', 'Karapakkam', 'T Nagar', 'TNagar', 'Velachery']
mascara = df['AREA'].isin(list_data)
df = df[mascara]

list_data = ['Yes', 'No']
mascara = df['PARK_FACIL'].isin(list_data)
df = df[mascara]

list_data = ['House', 'Others', 'Commercial']
mascara = df['BUILDTYPE'].isin(list_data)
df = df[mascara]

list_data = ['NoSeWa', 'ELO', 'NoSewr ', 'AllPub']
mascara = df['UTILITY_AVAIL'].isin(list_data)
df = df[mascara]

In [None]:
# Removendo nulos, tem nulos nas colunas 'QS_OVERALL', 'N_BATHROOM'
# df.isnull().sum()

media_qs_overall = df['QS_OVERALL'].mean()
mediana_n_bathroom = df['N_BATHROOM'].median()

df['QS_OVERALL'].fillna(media_qs_overall, inplace=True)
df['N_BATHROOM'].fillna(mediana_n_bathroom, inplace=True)


In [None]:
df.isnull().sum()

AREA             0
N_BEDROOM        0
N_BATHROOM       0
N_ROOM           0
PARK_FACIL       0
DATE_BUILD       0
BUILDTYPE        0
UTILITY_AVAIL    0
STREET           0
MZZONE           0
QS_ROOMS         0
QS_BATHROOM      0
QS_BEDROOM       0
QS_OVERALL       0
SALES_PRICE      0
dtype: int64

In [None]:
df

Unnamed: 0,AREA,N_BEDROOM,N_BATHROOM,N_ROOM,PARK_FACIL,DATE_BUILD,BUILDTYPE,UTILITY_AVAIL,STREET,MZZONE,QS_ROOMS,QS_BATHROOM,QS_BEDROOM,QS_OVERALL,SALES_PRICE
0,Anna Nagar,1.0,1.0,4,Yes,37,House,NoSeWa,Gravel,RL,0.433333,0.366667,0.366667,0.381443,12952940
1,Velachery,2.0,1.0,4,No,32,Others,ELO,Paved,I,0.033333,0.966667,0.533333,0.601375,8308730
2,Chrompet,1.0,1.0,3,No,29,House,ELO,Paved,RM,0.433333,0.566667,0.566667,0.522337,8316400
3,Karapakkam,1.0,1.0,3,No,26,House,ELO,Gravel,RM,0.566667,0.300000,0.600000,0.494845,7827000
4,Chrompet,2.0,1.0,4,Yes,33,Commercial,NoSewr,Gravel,RH,0.500000,1.000000,0.300000,0.639175,15199400
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,KK Nagar,2.0,1.0,4,No,30,Commercial,NoSeWa,Gravel,RL,0.300000,0.833333,0.966667,0.728522,14773100
4996,Karapakkam,2.0,2.0,4,Yes,21,House,AllPub,Gravel,RH,0.000000,0.333333,0.800000,0.412371,9017500
4997,Anna Nagar,2.0,1.0,5,Yes,30,Others,ELO,No Access,RL,0.100000,0.433333,0.266667,0.237113,13938740
4998,Anna Nagar,1.0,1.0,4,Yes,22,House,ELO,Gravel,RM,0.633333,0.800000,0.333333,0.551546,14115670


In [None]:
df = pd.get_dummies(df, columns=['AREA', 'PARK_FACIL', 'BUILDTYPE', 'UTILITY_AVAIL', 'STREET', 'MZZONE'])

In [None]:
df.tail()

Unnamed: 0,N_BEDROOM,N_BATHROOM,N_ROOM,DATE_BUILD,QS_ROOMS,QS_BATHROOM,QS_BEDROOM,QS_OVERALL,SALES_PRICE,AREA_Adyar,AREA_Ann Nagar,AREA_Anna Nagar,...,UTILITY_AVAIL_ELO,UTILITY_AVAIL_NoSeWa,UTILITY_AVAIL_NoSewr,STREET_Gravel,STREET_No Access,STREET_Paved,MZZONE_A,MZZONE_C,MZZONE_I,MZZONE_RH,MZZONE_RL,MZZONE_RM
4995,2.0,1.0,4,30,0.3,0.833333,0.966667,0.728522,14773100,0,0,0,...,0,1,0,1,0,0,0,0,0,0,1,0
4996,2.0,2.0,4,21,0.0,0.333333,0.8,0.412371,9017500,0,0,0,...,0,0,0,1,0,0,0,0,0,1,0,0
4997,2.0,1.0,5,30,0.1,0.433333,0.266667,0.237113,13938740,0,0,1,...,1,0,0,0,1,0,0,0,0,0,1,0
4998,1.0,1.0,4,22,0.633333,0.8,0.333333,0.551546,14115670,0,0,1,...,1,0,0,1,0,0,0,0,0,0,0,1
4999,1.0,1.0,2,47,0.0,0.566667,0.133333,0.209622,4953250,0,0,0,...,1,0,0,0,0,1,0,1,0,0,0,0


In [None]:
X = df.drop(columns = ['SALES_PRICE']) # pegando o db sem a variável resposta
y = df['SALES_PRICE']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=2023)

print(f" Treino {len(X_train)}, Teste {len(X_test)}")

 Treino 3470, Teste 1488


In [None]:
early_stopping_callback = tf.keras.callbacks.EarlyStopping(
    monitor='loss',  # Métrica a ser monitorada
    patience=50,          # Número de épocas sem melhoria antes de parar o treinamento
    restore_best_weights=True  # Restaura os melhores pesos encontrados durante o treinamento
)

In [None]:
# Definindo a função para agendar o learning rate
def lr_scheduler(epoch):
    if epoch < 50:
        return 0.005
    elif epoch < 100:
        return 0.001
    elif epoch < 150:
        return 0.0005
    else:
        return 0.0001

# Criando o callback para o Learning Rate Scheduler
lr_scheduler_callback = tf.keras.callbacks.LearningRateScheduler(lr_scheduler)

In [None]:

model = tf.keras.models.Sequential()

model.add(tf.keras.layers.Dense(32, activation='linear', input_shape=(X_train.shape[1],)))

model.add(tf.keras.layers.Dense(64, activation='relu'))

model.add(tf.keras.layers.Dense(128, activation='linear'))

model.add(tf.keras.layers.Dense(256, activation='relu'))

model.add(tf.keras.layers.Dense(512, activation='linear'))

model.add(tf.keras.layers.Dense(256, activation='relu'))

model.add(tf.keras.layers.Dense(128, activation='linear'))

model.add(tf.keras.layers.Dense(64, activation='relu'))

model.add(tf.keras.layers.Dense(1, activation='linear'))

model.compile(optimizer='adam', loss=tf.keras.losses.Huber(delta=1.0))

In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 32)                1280      
                                                                 
 dense_1 (Dense)             (None, 64)                2112      
                                                                 
 dense_2 (Dense)             (None, 128)               8320      
                                                                 
 dense_3 (Dense)             (None, 256)               33024     
                                                                 
 dense_4 (Dense)             (None, 512)               131584    
                                                                 
 dense_5 (Dense)             (None, 256)               131328    
                                                                 
 dense_6 (Dense)             (None, 128)               3

In [None]:
# print(y_train)
print(np.array(X_train))

[[1. 1. 2. ... 1. 0. 0.]
 [3. 2. 5. ... 0. 0. 1.]
 [3. 2. 5. ... 0. 0. 0.]
 ...
 [1. 1. 3. ... 0. 0. 0.]
 [2. 1. 5. ... 0. 0. 1.]
 [3. 2. 5. ... 1. 0. 0.]]


In [None]:
history = model.fit(X_train.values, np.array(y_train), epochs=1000, batch_size=int(0.50*len(X)), verbose=1,
    callbacks=[lr_scheduler_callback, early_stopping_callback]
)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

In [None]:
validation = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Data Science/Databases/05_validacao.csv', sep=';')

In [None]:
list_data = ['Gravel', 'Paved', 'No Access']
mascara = validation['STREET'].isin(list_data)
validation = validation[mascara]

list_data = ['Yes', 'No']
mascara = validation['PARK_FACIL'].isin(list_data)
validation = validation[mascara]

list_data = ['House', 'Others', 'Commercial']
mascara = validation['BUILDTYPE'].isin(list_data)
validation = validation[mascara]

list_data = ['NoSeWa', 'ELO', 'NoSewr ', 'AllPub']
mascara = validation['UTILITY_AVAIL'].isin(list_data)
validation = validation[mascara]

In [None]:
validation['STREET'].value_counts()

Paved        774
Gravel       736
No Access    582
Name: STREET, dtype: int64

In [None]:
prt_id_values = validation['PRT_ID']
validation = validation.drop(columns=['PRT_ID', 'SALE_COND', 'INT_SQFT', 'DIST_MAINROAD'])

validation['DATE_BUILD'] = 2023 - pd.DatetimeIndex(validation['DATE_BUILD']).year

colums = ['QS_ROOMS', 'QS_BATHROOM', 'QS_BEDROOM', 'QS_OVERALL']

for column in colums:
    max = validation[column].max()
    min = validation[column].min()

    validation[column] = validation[column].apply(lambda item: (item - min) / (max - min))

media_qs_overall = validation['QS_OVERALL'].mean()
mediana_n_bathroom = validation['N_BATHROOM'].median()
mediana_n_bedroom = validation['N_BEDROOM'].median()

validation['QS_OVERALL'].fillna(media_qs_overall, inplace=True)
validation['N_BATHROOM'].fillna(mediana_n_bathroom, inplace=True)
validation['N_BEDROOM'].fillna(mediana_n_bedroom, inplace=True)

validation = pd.get_dummies(validation, columns=['AREA', 'PARK_FACIL', 'BUILDTYPE', 'UTILITY_AVAIL', 'STREET', 'MZZONE'])

validation.tail()


  validation['DATE_BUILD'] = 2023 - pd.DatetimeIndex(validation['DATE_BUILD']).year


Unnamed: 0,N_BEDROOM,N_BATHROOM,N_ROOM,DATE_BUILD,QS_ROOMS,QS_BATHROOM,QS_BEDROOM,QS_OVERALL,AREA_Adyar,AREA_Ann Nagar,AREA_Anna Nagar,AREA_Chormpet,...,UTILITY_AVAIL_ELO,UTILITY_AVAIL_NoSeWa,UTILITY_AVAIL_NoSewr,STREET_Gravel,STREET_No Access,STREET_Paved,MZZONE_A,MZZONE_C,MZZONE_I,MZZONE_RH,MZZONE_RL,MZZONE_RM
2104,2.0,1.0,5,52,0.233333,0.566667,0.4,0.391156,0,0,1,0,...,1,0,0,0,1,0,0,0,0,1,0,0
2105,1.0,1.0,3,35,0.7,0.3,0.566667,0.510204,0,0,0,0,...,1,0,0,1,0,0,0,0,0,0,1,0
2106,4.0,2.0,6,40,1.0,0.866667,0.066667,0.598639,0,0,0,0,...,0,0,1,0,0,1,0,0,0,1,0,0
2107,4.0,2.0,6,31,0.6,0.133333,0.866667,0.578231,0,0,0,0,...,0,1,0,1,0,0,0,0,0,0,0,1
2108,1.0,1.0,3,44,0.166667,0.833333,0.466667,0.496599,0,0,0,0,...,0,1,0,0,1,0,0,0,0,0,1,0


In [None]:
validation.isnull().sum()

N_BEDROOM                0
N_BATHROOM               0
N_ROOM                   0
DATE_BUILD               0
QS_ROOMS                 0
QS_BATHROOM              0
QS_BEDROOM               0
QS_OVERALL               0
AREA_Adyar               0
AREA_Ann Nagar           0
AREA_Anna Nagar          0
AREA_Chormpet            0
AREA_Chrmpet             0
AREA_Chrompet            0
AREA_Chrompt             0
AREA_KK Nagar            0
AREA_Karapakam           0
AREA_Karapakkam          0
AREA_T Nagar             0
AREA_TNagar              0
AREA_Velachery           0
PARK_FACIL_No            0
PARK_FACIL_Yes           0
BUILDTYPE_Commercial     0
BUILDTYPE_House          0
BUILDTYPE_Others         0
UTILITY_AVAIL_AllPub     0
UTILITY_AVAIL_ELO        0
UTILITY_AVAIL_NoSeWa     0
UTILITY_AVAIL_NoSewr     0
STREET_Gravel            0
STREET_No Access         0
STREET_Paved             0
MZZONE_A                 0
MZZONE_C                 0
MZZONE_I                 0
MZZONE_RH                0
M

In [None]:
predictions = model.predict(validation.values)



In [None]:
len(predictions)

2092

# Modelo 2

In [None]:
from sklearn.ensemble import RandomForestRegressor

In [None]:
model_rf = RandomForestRegressor(min_samples_leaf=int(len(X_train)*0.05),random_state=2023,n_estimators=250)

In [None]:
model_rf

In [None]:
model_rf.fit(X_train, y_train)

In [None]:
predictions_model_rf = model_rf.predict(validation.values)



In [None]:
predictions_model_rf

array([11978737.97214175,  9715609.70780766, 16715390.83468789, ...,
       11978737.97214175, 16588421.24434321, 10709971.36604007])

# Modelo 3

In [None]:
early_stopping_callback_model3 = tf.keras.callbacks.EarlyStopping(
    monitor='loss',  # Métrica a ser monitorada
    patience=50,          # Número de épocas sem melhoria antes de parar o treinamento
    restore_best_weights=True  # Restaura os melhores pesos encontrados durante o treinamento
)

In [None]:
# Definindo a função para agendar o learning rate
def lr_scheduler_model2(epoch):
    if epoch < 50:
        return 0.005
    elif epoch < 100:
        return 0.001
    elif epoch < 150:
        return 0.0005
    else:
        return 0.0001

# Criando o callback para o Learning Rate Scheduler
lr_scheduler_callback_model2 = tf.keras.callbacks.LearningRateScheduler(lr_scheduler_model2)


In [None]:
model_3 = tf.keras.models.Sequential()

model_3.add(tf.keras.layers.Dense(32, activation='linear', input_shape=(X_train.shape[1],)))

model_3.add(tf.keras.layers.Dense(64, activation='relu'))

model_3.add(tf.keras.layers.Dense(128, activation='linear'))

model_3.add(tf.keras.layers.Dense(256, activation='relu'))

model_3.add(tf.keras.layers.Dense(512, activation='linear'))

model_3.add(tf.keras.layers.Dense(256, activation='relu'))

model_3.add(tf.keras.layers.Dense(128, activation='linear'))

model_3.add(tf.keras.layers.Dense(64, activation='relu'))

model_3.add(tf.keras.layers.Dense(1, activation='linear'))

model_3.compile(optimizer='adam', loss=tf.keras.losses.Huber(delta=1.0))


In [None]:
history_model3 = model.fit(X_train.values, np.array(y_train), epochs=1000, batch_size=int(0.70*len(X)), verbose=1,
    callbacks=[lr_scheduler_callback_model2, early_stopping_callback_model3]
)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000


In [None]:
predictions_model3 = model_3.predict(validation.values)




In [None]:
csv_values = pd.DataFrame({'PRT_ID': prt_id_values, 'modelo_1': predictions.flatten(), 'modelo_2': predictions_model_rf.flatten(), 'modelo_3': predictions_model3.flatten()})


In [1]:
csv_values.to_csv('DAVID_ALMEIDA_MATHEUS_OLIVEIRA.csv', index=False)

NameError: ignored