In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")

In [None]:
df_train = pd.read_csv('data/train.csv')
df_val = pd.read_csv('data/val.csv')
df_test = pd.read_csv('data/test.csv')
df = pd.concat([df_train, df_val], axis=0)
df

# Visualize bar chart with each feature

In [None]:
features = df.drop(columns=['price', 'area', 'street_in_front_of_house', 'width'])
total_features = features.columns.to_list()
total_features

In [None]:
feature_unique_fields = {}
for i in total_features:
    feature_unique_fields[i] = features[i].unique().tolist()

len_features = len(feature_unique_fields)
for i in range(len_features):
    list(feature_unique_fields.values())[i].sort()
feature_unique_value = {}
for feature in total_features:
    # print("Feature: \n", feature)
    feature_len = len(feature_unique_fields[feature])
    # print("Len: ", feature_len)
    feature_value = feature_unique_fields[feature]
    # print(feature_value)
    feature_unique_value[feature] = [sum(df[df[feature] == feature_value[i]].price) / df[feature].value_counts()[feature_value[i]] for i in range(feature_len)]
    
# print(feature_unique_value)
feature_unique_fields


In [None]:
visualized_features = total_features
name_of_features = ['floor_number',
 'bedroom_number',
 'is_dinning_room',
 'is_kitchen',
 'is_terrace',
 'is_car_pack',
 'type',
 'direction',
 'city',
 'district']
labels = ['Số tầng', 'Số phòng ngủ', 'Có phòng ăn hay không', 'Có phòng bếp hay không', 'Có sân thượng hay không', 
'Có chỗ để xe hay không','Loại bất động sản','Thành phố', 'Quận/Huyện']
titles = ['Biểu đồ thể hiện giá nhà trung bình theo số tầng nhà','Biểu đồ giá nhà trung bình theo số phòng ngủ', 'Biểu đồ giá nhà trung bình theo phòng ăn', 
'Biểu đồ giá nhà trung bình có và không có bếp', 'Giá nhà trung bình với sân thượng', 'Giá nhà trung bình với chỗ để xe', 
'Biểu đồ thể hiện giá nhà trung bình theo loại bất động sản', 
'Biểu đồ thể hiện giá nhà trung bình theo thành phố', 'Biểu đồ thể hiện giá nhà trung bình theo quận/huyện bất động sản']

In [None]:
import random
count = 0
for feature in visualized_features:
    N = len(feature_unique_value[feature])
    ind = np.arange(N) 
    all_colors = list(plt.cm.colors.cnames.keys())
    random.seed(100)
    c = random.choices(all_colors, k=N) 
    text_value = {}
    for i in ind:
        text_value[i] = feature_unique_value[feature][i]
        
    fig = plt.subplots(figsize=(10, 7))
    plt.bar(ind, feature_unique_value[feature], color=c)
    
    for key in text_value:
        plt.text(key, text_value[key], float(round(text_value[key], 2)), 
                horizontalalignment='center', verticalalignment='bottom', 
                fontdict={'fontweight':500, 'size':12})
    
    # Decide whether to rotate labels based on number of categories and label length
    max_label_length = max([len(str(label)) for label in feature_unique_fields[feature]])
    available_width = 10  # Figure width in inches
    
    # Apply rotation if many values or long labels
    if N > 5 or (N * max_label_length > 30):
        plt.xticks(ind, list(feature_unique_fields[feature]), rotation=45, ha='right')
    else:
        plt.xticks(ind, list(feature_unique_fields[feature]))  # No rotation
    
    plt.xlabel(labels[count])
    plt.ylabel("Giá (tỷ đồng)")
    plt.title(titles[count], fontsize=22)
    plt.tight_layout()
    count += 1
    
plt.show()

# Preprocessing to be ready for predict 

In [None]:
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

In [None]:
features_numerical = df.select_dtypes(exclude=['object', 'bool']).copy()
numerical_cols = features_numerical.columns.tolist()

numerical_cols.remove('price')
numerical_cols

In [None]:
features_categorical = df.select_dtypes(include=['object', 'bool']).copy()
categorical_cols = features_categorical.columns.tolist()
categorical_cols

In [None]:
categorical_transformer = OneHotEncoder(handle_unknown='ignore', sparse_output=False)
numerical_transformer = StandardScaler()

preprocessor = ColumnTransformer(
    transformers=[
        ('cat', categorical_transformer, categorical_cols),
        ('num', numerical_transformer, numerical_cols)     
    ],
    remainder='passthrough' 
)


In [None]:
y_train = df_train['price'].copy()
y_val = df_val['price'].copy()

df_train = df_train.drop(['price'], axis = 1)
df_val = df_val.drop(['price'], axis = 1)

In [None]:
y_train.to_numpy()
y_val.to_numpy()

In [None]:
y_train.shape, y_val.shape

In [None]:
X_train = preprocessor.fit_transform(df_train)
X_val = preprocessor.transform(df_val)
X_test = preprocessor.transform(df_test)

In [None]:
from matplotlib import pyplot as plt

def plot_evaluate(y_true, y_pred):
    plt.plot(y_true, y_pred, 'b.')
    x = [np.min(y_true), np.max(y_true)]
    y = x
    plt.plot(x, y, 'r')
    plt.title('XGBoost')
    plt.xlabel('Reality')
    plt.ylabel('Predict')
    plt.show()

# Multi-layer Perceptron
So sánh dựa trên các tiêu chí: Số tầng ẩn, Learning rate (Adam), Số lượng epoch, Hàm kích hoạt, Dropout rate

## Số tầng ẩn

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

scaler_y = StandardScaler()
y_train_scaled = scaler_y.fit_transform(y_train.values.reshape(-1, 1))
y_val_scaled = scaler_y.transform(y_val.values.reshape(-1, 1))

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)

y_train_tensor = torch.tensor(y_train_scaled, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val_scaled, dtype=torch.float32)

def set_seed(seed):
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

set_seed(69)

class FlexibleMLP(nn.Module):
    def __init__(self, input_dim, hidden_sizes):
        super(FlexibleMLP, self).__init__()
        layers = []
        in_features = input_dim
        for hidden_size in hidden_sizes:
            layers.append(nn.Linear(in_features, hidden_size))
            layers.append(nn.LeakyReLU(negative_slope=0.01))
            layers.append(nn.Dropout(0.3))
            in_features = hidden_size
        layers.append(nn.Linear(in_features, 1))
        self.model = nn.Sequential(*layers)

    def forward(self, x):
        return self.model(x)
    
def init_weights(m):
    if isinstance(m, nn.Linear):
        nn.init.xavier_uniform_(m.weight)
        if m.bias is not None:
            nn.init.constant_(m.bias, 0)

def train_and_evaluate(hidden_sizes):
    model = FlexibleMLP(X_train_tensor.shape[1], hidden_sizes)
    model.apply(init_weights)
    criterion = nn.L1Loss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    best_val_loss = float('inf')
    patience = 10
    patience_counter = 0
    batch_size = 64
    epochs = 100
    train_losses = []
    val_losses = []

    for epoch in range(epochs):
        model.train()
        epoch_train_loss = 0
        for i in range(0, len(X_train_tensor), batch_size):
            X_batch = X_train_tensor[i:i + batch_size]
            y_batch = y_train_tensor[i:i + batch_size]
            optimizer.zero_grad()
            y_pred = model(X_batch)
            loss = criterion(y_pred, y_batch)
            loss.backward()
            optimizer.step()
            epoch_train_loss += loss.item() * len(X_batch)
        epoch_train_loss /= len(X_train_tensor) 
        model.eval()
        with torch.no_grad():
            val_pred = model(X_val_tensor)
            val_loss = criterion(val_pred, y_val_tensor)
        train_losses.append(epoch_train_loss)
        val_losses.append(val_loss.item())
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
            best_model = model.state_dict()
        else:
            patience_counter += 1
        if patience_counter >= patience:
            break

    model.load_state_dict(best_model)
    model.eval()

    with torch.no_grad():
        y_val_pred_scaled = model(X_val_tensor)
        y_val_pred = scaler_y.inverse_transform(y_val_pred_scaled.numpy())
        y_val_true = scaler_y.inverse_transform(y_val_tensor.numpy())
        
        y_train_pred_scaled = model(X_train_tensor)
        y_train_pred = scaler_y.inverse_transform(y_train_pred_scaled.numpy())
        y_train_true = scaler_y.inverse_transform(y_train_tensor.numpy())
        
        train_rmse = np.sqrt(mean_squared_error(y_train_true, y_train_pred))
        val_rmse = np.sqrt(mean_squared_error(y_val_true, y_val_pred))
    return model, train_rmse, val_rmse, train_losses, val_losses

results = []
best_rmse = float('inf')
best_model = None
best_test_pred = None
all_train_losses = []
all_val_losses = []

for n_layers in range(1, 6):
    hidden_sizes = [int(256 / (2 ** i)) for i in range(n_layers)]
    print(f"\nTraining MLP with {n_layers} hidden layer(s), sizes = {hidden_sizes}")
    model, train_rmse, val_rmse, train_losses, val_losses = train_and_evaluate(hidden_sizes)
    print(f"RMSE: {val_rmse:.4f}")
    results.append((n_layers, hidden_sizes, train_rmse, val_rmse))
    all_train_losses.append(train_losses)
    all_val_losses.append(val_losses)
    if val_rmse < best_rmse:
        best_rmse = val_rmse
        best_model = model
        with torch.no_grad():
            y_test_pred_scaled = best_model(X_test_tensor)
            y_test_pred = scaler_y.inverse_transform(y_test_pred_scaled.numpy())

mlp_submit = pd.DataFrame({
    'Id': df_test.index,
    'TARGET': y_test_pred.flatten()
})
mlp_submit.to_csv('data/mlp_model_pytorch_best.csv', index=False)

print(f"\nMô hình tốt nhất: {best_rmse:.4f}")
n_layers_list = [result[0] for result in results]
val_rmse_list = [result[3] for result in results]
train_rmse_list = [result[2] for result in results]
plt.figure(figsize=(8, 5))
plt.plot(n_layers_list, train_rmse_list, label='Train RMSE')
plt.plot(n_layers_list, val_rmse_list, label='Validation RMSE')
plt.xlabel('Số tầng ẩn (hidden layers)')
plt.ylabel('RMSE')
plt.title('So sánh Train & Validation RMSE theo số tầng ẩn trong MLP')
plt.legend()
plt.tight_layout()
plt.show()

## Learning rate (Adam)

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

scaler_y = StandardScaler()
y_train_scaled = scaler_y.fit_transform(y_train.values.reshape(-1, 1))
y_val_scaled = scaler_y.transform(y_val.values.reshape(-1, 1))

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)

y_train_tensor = torch.tensor(y_train_scaled, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val_scaled, dtype=torch.float32)

def set_seed(seed):
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

set_seed(69)

class FlexibleMLP(nn.Module):
    def __init__(self, input_dim, hidden_sizes):
        super(FlexibleMLP, self).__init__()
        layers = []
        in_features = input_dim
        for hidden_size in hidden_sizes:
            layers.append(nn.Linear(in_features, hidden_size))
            layers.append(nn.LeakyReLU(negative_slope=0.01))
            layers.append(nn.Dropout(0.3))
            in_features = hidden_size
        layers.append(nn.Linear(in_features, 1))  # Tầng đầu ra
        self.model = nn.Sequential(*layers)

    def forward(self, x):
        return self.model(x)
    
def init_weights(m):
    if isinstance(m, nn.Linear):
        nn.init.xavier_uniform_(m.weight)
        if m.bias is not None:
            nn.init.constant_(m.bias, 0)

def train_and_evaluate_lr(hidden_sizes, lr):
    model = FlexibleMLP(X_train_tensor.shape[1], hidden_sizes)
    model.apply(init_weights)
    criterion = nn.L1Loss()
    optimizer = optim.Adam(model.parameters(), lr)
    best_val_loss = float('inf')
    patience = 10
    patience_counter = 0
    batch_size = 64
    epochs = 100

    for epoch in range(epochs):
        model.train()
        epoch_train_loss = 0
        for i in range(0, len(X_train_tensor), batch_size):
            X_batch = X_train_tensor[i:i + batch_size]
            y_batch = y_train_tensor[i:i + batch_size]
            optimizer.zero_grad()
            y_pred = model(X_batch)
            loss = criterion(y_pred, y_batch)
            loss.backward()
            optimizer.step()
        model.eval()
        with torch.no_grad():
            val_pred = model(X_val_tensor)
            val_loss = criterion(val_pred, y_val_tensor)
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
            best_model = model.state_dict()
        else:
            patience_counter += 1
        if patience_counter >= patience:
            break

    model.load_state_dict(best_model)
    model.eval()

    with torch.no_grad():
        y_val_pred_scaled = model(X_val_tensor)
        y_val_pred = scaler_y.inverse_transform(y_val_pred_scaled.numpy())
        y_val_true = scaler_y.inverse_transform(y_val_tensor.numpy())
        
        y_train_pred_scaled = model(X_train_tensor)
        y_train_pred = scaler_y.inverse_transform(y_train_pred_scaled.numpy())
        y_train_true = scaler_y.inverse_transform(y_train_tensor.numpy())
        
        train_rmse = np.sqrt(mean_squared_error(y_train_true, y_train_pred))
        val_rmse = np.sqrt(mean_squared_error(y_val_true, y_val_pred))
    return train_rmse, val_rmse

learning_rates = np.linspace(0.0001, 0.01, 10)
train_rmses = []
val_rmses = []

hidden_sizes = [256, 128, 64]
for lr in learning_rates:
    print(f"\nTraining with learning rate = {lr:.5f}")
    train_rmse, val_rmse = train_and_evaluate_lr(hidden_sizes, lr)
    train_rmses.append(train_rmse)
    val_rmses.append(val_rmse)
    print(f"Train RMSE = {train_rmse:.4f}, Val RMSE = {val_rmse:.4f}")
    
mlp_submit = pd.DataFrame({
    'Id': df_test.index,
    'TARGET': y_test_pred.flatten()
})
mlp_submit.to_csv('data/mlp_model_pytorch_best.csv', index=False)

plt.figure(figsize=(9, 5))
plt.plot(learning_rates, train_rmses, label='Train RMSE')
plt.plot(learning_rates, val_rmses, label='Validation RMSE')
plt.xscale('log')
plt.xlabel('Learning Rate (Adam)')
plt.ylabel('RMSE')
plt.title('Ảnh hưởng của Learning Rate trong thuật toán tối ưu Adam đến RMSE')
plt.legend()
plt.tight_layout()
plt.show()

## Số lượng epoch

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

scaler_y = StandardScaler()
y_train_scaled = scaler_y.fit_transform(y_train.values.reshape(-1, 1))
y_val_scaled = scaler_y.transform(y_val.values.reshape(-1, 1))

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)

y_train_tensor = torch.tensor(y_train_scaled, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val_scaled, dtype=torch.float32)

def set_seed(seed):
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

set_seed(69)

class FlexibleMLP(nn.Module):
    def __init__(self, input_dim, hidden_sizes):
        super(FlexibleMLP, self).__init__()
        layers = []
        in_features = input_dim
        for hidden_size in hidden_sizes:
            layers.append(nn.Linear(in_features, hidden_size))
            layers.append(nn.LeakyReLU(negative_slope=0.01))
            layers.append(nn.Dropout(0.3))
            in_features = hidden_size
        layers.append(nn.Linear(in_features, 1))
        self.model = nn.Sequential(*layers)

    def forward(self, x):
        return self.model(x)
    
def init_weights(m):
    if isinstance(m, nn.Linear):
        nn.init.xavier_uniform_(m.weight)
        if m.bias is not None:
            nn.init.constant_(m.bias, 0)

def train_and_evaluate(hidden_sizes):
    model = FlexibleMLP(X_train_tensor.shape[1], hidden_sizes)
    model.apply(init_weights)
    criterion = nn.L1Loss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    best_val_loss = float('inf')
    patience = 10
    patience_counter = 0
    batch_size = 64
    epochs = 100
    train_losses = []
    val_losses = []

    for epoch in range(epochs):
        model.train()
        epoch_train_loss = 0
        for i in range(0, len(X_train_tensor), batch_size):
            X_batch = X_train_tensor[i:i + batch_size]
            y_batch = y_train_tensor[i:i + batch_size]
            optimizer.zero_grad()
            y_pred = model(X_batch)
            loss = criterion(y_pred, y_batch)
            loss.backward()
            optimizer.step()
            epoch_train_loss += loss.item() * len(X_batch)
        epoch_train_loss /= len(X_train_tensor) 
        model.eval()
        with torch.no_grad():
            val_pred = model(X_val_tensor)
            val_loss = criterion(val_pred, y_val_tensor)
        train_losses.append(epoch_train_loss)
        val_losses.append(val_loss.item())
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
            best_model = model.state_dict()
        else:
            patience_counter += 1
        # if patience_counter >= patience:
        #     break

    model.load_state_dict(best_model)
    model.eval()

    with torch.no_grad():
        y_val_pred_scaled = model(X_val_tensor)
        y_val_pred = scaler_y.inverse_transform(y_val_pred_scaled.numpy())
        y_val_true = scaler_y.inverse_transform(y_val_tensor.numpy())
        
        y_train_pred_scaled = model(X_train_tensor)
        y_train_pred = scaler_y.inverse_transform(y_train_pred_scaled.numpy())
        y_train_true = scaler_y.inverse_transform(y_train_tensor.numpy())
        
        train_rmse = np.sqrt(mean_squared_error(y_train_true, y_train_pred))
        val_rmse = np.sqrt(mean_squared_error(y_val_true, y_val_pred))
    return model, train_rmse, val_rmse, train_losses, val_losses

results = []
best_rmse = float('inf')
best_model = None
best_test_pred = None
all_train_losses = []
all_val_losses = []

hidden_sizes = [256, 128, 64]
print(f"\nTraining MLP with 3 hidden layer(s), sizes = {hidden_sizes}")
model, train_rmse, val_rmse, train_losses, val_losses = train_and_evaluate(hidden_sizes)
print(f"RMSE: {val_rmse:.4f}")
results.append((3, hidden_sizes, train_rmse, val_rmse))
all_train_losses.append(train_losses)
all_val_losses.append(val_losses)
if val_rmse < best_rmse:
    best_rmse = val_rmse
    best_model = model
    with torch.no_grad():
        y_test_pred_scaled = best_model(X_test_tensor)
        y_test_pred = scaler_y.inverse_transform(y_test_pred_scaled.numpy())

mlp_submit = pd.DataFrame({
    'Id': df_test.index,
    'TARGET': y_test_pred.flatten()
})
mlp_submit.to_csv('data/mlp_model_pytorch_best.csv', index=False)

plt.figure(figsize=(8, 5))
plt.plot(train_losses, label='Training Loss')
plt.plot(val_losses, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training vs Validation Loss')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

## Hàm kích hoạt
Bao gồm ReLU, LeakyRelu với hệ số 0.01, ELU

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

scaler_y = StandardScaler()
y_train_scaled = scaler_y.fit_transform(y_train.values.reshape(-1, 1))
y_val_scaled = scaler_y.transform(y_val.values.reshape(-1, 1))

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train_scaled, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val_scaled, dtype=torch.float32)

def set_seed(seed):
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

set_seed(69)

class FlexibleMLP(nn.Module):
    def __init__(self, input_dim, hidden_sizes, activation_fn):
        super(FlexibleMLP, self).__init__()
        layers = []
        in_features = input_dim
        for hidden_size in hidden_sizes:
            layers.append(nn.Linear(in_features, hidden_size))
            layers.append(activation_fn())
            layers.append(nn.Dropout(0.3))
            in_features = hidden_size
        layers.append(nn.Linear(in_features, 1))
        self.model = nn.Sequential(*layers)
    def forward(self, x):
        return self.model(x)

def init_weights(m):
    if isinstance(m, nn.Linear):
        nn.init.xavier_uniform_(m.weight)
        if m.bias is not None:
            nn.init.constant_(m.bias, 0)

def train_and_evaluate_activation(hidden_sizes, activation_fn):
    model = FlexibleMLP(X_train_tensor.shape[1], hidden_sizes, activation_fn)
    model.apply(init_weights)
    criterion = nn.L1Loss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    best_val_loss = float('inf')
    patience_counter = 0
    batch_size = 64
    epochs = 100
    for epoch in range(epochs):
        model.train()
        for i in range(0, len(X_train_tensor), batch_size):
            X_batch = X_train_tensor[i:i + batch_size]
            y_batch = y_train_tensor[i:i + batch_size]
            optimizer.zero_grad()
            y_pred = model(X_batch)
            loss = criterion(y_pred, y_batch)
            loss.backward()
            optimizer.step()
        model.eval()
        with torch.no_grad():
            val_pred = model(X_val_tensor)
            val_loss = criterion(val_pred, y_val_tensor)
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model = model.state_dict()
            patience_counter = 0
        else:
            patience_counter += 1
    model.load_state_dict(best_model)
    model.eval()
    with torch.no_grad():
        y_val_pred = model(X_val_tensor)
        y_val_pred = scaler_y.inverse_transform(y_val_pred.numpy())
        y_val_true = scaler_y.inverse_transform(y_val_tensor.numpy())
        val_rmse = np.sqrt(mean_squared_error(y_val_true, y_val_pred))
        y_train_pred = model(X_train_tensor)
        y_train_pred = scaler_y.inverse_transform(y_train_pred.numpy())
        y_train_true = scaler_y.inverse_transform(y_train_tensor.numpy())
        train_rmse = np.sqrt(mean_squared_error(y_train_true, y_train_pred))
    return train_rmse, val_rmse

activation_functions = {
    "ReLU": nn.ReLU,
    "LeakyReLU": lambda: nn.LeakyReLU(negative_slope=0.01),
    "ELU": nn.ELU
}

train_rmses = []
val_rmses = []

for name, act_fn in activation_functions.items():
    print(f"\nTraining with activation function = {name}")
    hidden_sizes = [256, 128, 64]
    train_rmse, val_rmse = train_and_evaluate_activation(hidden_sizes, act_fn)
    train_rmses.append(train_rmse)
    val_rmses.append(val_rmse)
    print(f"Train RMSE = {train_rmse:.4f}, Val RMSE = {val_rmse:.4f}")

plt.figure(figsize=(9, 5))
x = np.arange(len(activation_functions))
plt.plot(x, train_rmses, marker = 'o', label='Train RMSE')
plt.plot(x, val_rmses, marker = 'o', label='Validation RMSE')
plt.xticks(x, list(activation_functions.keys()))
plt.ylabel('RMSE')
plt.title('RMSE theo Hàm Kích Hoạt (Activation Function)')
plt.legend()
plt.tight_layout()
plt.show()

## Dropout rate

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

scaler_y = StandardScaler()
y_train_scaled = scaler_y.fit_transform(y_train.values.reshape(-1, 1))
y_val_scaled = scaler_y.transform(y_val.values.reshape(-1, 1))

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train_scaled, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val_scaled, dtype=torch.float32)

def set_seed(seed):
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

set_seed(69)

class FlexibleMLP(nn.Module):
    def __init__(self, input_dim, hidden_sizes, dropout_rate=0.3):
        super(FlexibleMLP, self).__init__()
        layers = []
        in_features = input_dim
        for hidden_size in hidden_sizes:
            layers.append(nn.Linear(in_features, hidden_size))
            layers.append(nn.LeakyReLU(negative_slope=0.01))
            layers.append(nn.Dropout(dropout_rate))
            in_features = hidden_size
        layers.append(nn.Linear(in_features, 1))
        self.model = nn.Sequential(*layers)

    def forward(self, x):
        return self.model(x)

def init_weights(m):
    if isinstance(m, nn.Linear):
        nn.init.xavier_uniform_(m.weight)
        if m.bias is not None:
            nn.init.constant_(m.bias, 0)

def train_and_evaluate_dropout(hidden_sizes, dropout_rate):
    model = FlexibleMLP(X_train_tensor.shape[1], hidden_sizes, dropout_rate)
    model.apply(init_weights)
    criterion = nn.L1Loss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    best_val_loss = float('inf')
    patience_counter = 0
    batch_size = 64
    epochs = 100

    for epoch in range(epochs):
        model.train()
        for i in range(0, len(X_train_tensor), batch_size):
            X_batch = X_train_tensor[i:i + batch_size]
            y_batch = y_train_tensor[i:i + batch_size]
            optimizer.zero_grad()
            y_pred = model(X_batch)
            loss = criterion(y_pred, y_batch)
            loss.backward()
            optimizer.step()

        model.eval()
        with torch.no_grad():
            val_pred = model(X_val_tensor)
            val_loss = criterion(val_pred, y_val_tensor)

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model = model.state_dict()
            patience_counter = 0
        else:
            patience_counter += 1

    model.load_state_dict(best_model)
    model.eval()

    with torch.no_grad():
        y_val_pred = model(X_val_tensor)
        y_val_pred = scaler_y.inverse_transform(y_val_pred.numpy())
        y_val_true = scaler_y.inverse_transform(y_val_tensor.numpy())
        val_rmse = np.sqrt(mean_squared_error(y_val_true, y_val_pred))

        y_train_pred = model(X_train_tensor)
        y_train_pred = scaler_y.inverse_transform(y_train_pred.numpy())
        y_train_true = scaler_y.inverse_transform(y_train_tensor.numpy())
        train_rmse = np.sqrt(mean_squared_error(y_train_true, y_train_pred))

    return train_rmse, val_rmse

dropout_rates = np.linspace(0.1, 0.5, 10)
train_rmses = []
val_rmses = []

for dropout in dropout_rates:
    print(f"\nTraining with dropout rate = {dropout:.2f}")
    hidden_sizes = [256, 128, 64]
    train_rmse, val_rmse = train_and_evaluate_dropout(hidden_sizes, dropout)
    train_rmses.append(train_rmse)
    val_rmses.append(val_rmse)
    print(f"Train RMSE = {train_rmse:.4f}, Val RMSE = {val_rmse:.4f}")

plt.figure(figsize=(9, 5))
plt.plot(dropout_rates, train_rmses, label='Train RMSE')
plt.plot(dropout_rates, val_rmses, label='Validation RMSE')
plt.xlabel('Dropout Rate')
plt.ylabel('RMSE')
plt.title('Ảnh hưởng của Dropout Rate đến RMSE')
plt.legend()
plt.tight_layout()
plt.show()