# Python for Data Science
## Lab 5:
### Nguyen Xuan Viet Duc - 22280012

In [52]:
import torch
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, classification_report
import torch.optim as optim
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset

In [53]:
iris = load_iris()
X = iris.data
Y = iris.target

In [54]:
x_train, x_val, y_train, y_val = train_test_split(X, Y, test_size=0.33, random_state=42)
x_val, x_test, y_val, y_test = train_test_split(x_val, y_val, test_size=0.15, random_state=42)

In [55]:
# Kiểm tra thiết bị (GPU/CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Dataset Class
class Data(Dataset):
    def __init__(self, X, y):
        self.x = torch.from_numpy(X).float()
        self.y = torch.from_numpy(y).long()
        self.len = self.x.shape[0]

    def __getitem__(self, index):
        return self.x[index], self.y[index]

    def __len__(self):
        return self.len

In [57]:
# Mô hình mạng nơ-ron
class Net(nn.Module):
    def __init__(self, D_in, H, D_out):
        super(Net, self).__init__()
        self.linear1 = nn.Linear(D_in, H)
        self.linear2 = nn.Linear(H, D_out)

    def forward(self, x):
        x = torch.sigmoid(self.linear1(x))
        x = self.linear2(x)
        return x
    

In [58]:
# Khởi tạo mô hình và chuyển nó lên thiết bị
input_dim = 4
hidden_dim = 25
output_dim = 3
model = Net(input_dim, hidden_dim, output_dim).to(device)

In [59]:
# Hàm huấn luyện
def train(model, train_loader, val_loader, criterion, optimizer, num_epochs):
    for epoch in range(num_epochs):
        model.train()  # Đặt chế độ train cho mô hình
        train_loss = 0.0
        train_preds = []
        train_labels = []
        
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)  # Đưa dữ liệu lên thiết bị
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            train_loss += loss.item() * inputs.size(0)
            
            # Dự đoán và lưu lại các giá trị dự đoán
            _, predicted = torch.max(outputs, 1)
            train_preds.extend(predicted.cpu().numpy())
            train_labels.extend(labels.cpu().numpy())
            
            # Backward và tối ưu hóa
            loss.backward()
            optimizer.step()
        
        train_loss /= len(train_loader.dataset)
        train_accuracy = accuracy_score(train_labels, train_preds)
        train_precision = precision_score(train_labels, train_preds, average='weighted')
        train_recall = recall_score(train_labels, train_preds, average='weighted')
        train_f1 = f1_score(train_labels, train_preds, average='weighted')
        
        # Đánh giá trên tập validation
        val_loss, val_accuracy, val_precision, val_recall, val_f1 = evaluate(model, val_loader, criterion)
        
        # Log các chỉ số cho mỗi epoch
        print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy:.4f}, '
              f'Train Precision: {train_precision:.4f}, Train Recall: {train_recall:.4f}, Train F1: {train_f1:.4f}')
        print(f'Validation Loss: {val_loss:.4f}, Validation Acc: {val_accuracy:.4f}, '
              f'Validation Precision: {val_precision:.4f}, Validation Recall: {val_recall:.4f}, Validation F1: {val_f1:.4f}')


In [60]:
# Hàm đánh giá
def evaluate(model, data_loader, criterion):
    model.eval()  # Đặt chế độ eval cho mô hình
    loss = 0.0
    preds = []
    labels = []
    
    with torch.no_grad():
        for inputs, label in data_loader:
            inputs, label = inputs.to(device), label.to(device)
            outputs = model(inputs)
            loss += criterion(outputs, label).item() * inputs.size(0)
            
            _, predicted = torch.max(outputs, 1)
            preds.extend(predicted.cpu().numpy())
            labels.extend(label.cpu().numpy())
    
    loss /= len(data_loader.dataset)
    accuracy = accuracy_score(labels, preds)
    precision = precision_score(labels, preds, average='weighted')
    recall = recall_score(labels, preds, average='weighted')
    f1 = f1_score(labels, preds, average='weighted')
    
    return loss, accuracy, precision, recall, f1

In [61]:
# Hàm suy luận
def inference(model, data_loader):
    model.eval()
    all_labels = []
    all_preds = []
    
    with torch.no_grad():
        for x_batch, y_batch in data_loader:
            x_batch = x_batch.to(device)
            outputs = model(x_batch)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(y_batch.numpy())
    
    # In báo cáo phân loại
    print("\nClassification Report:\n", classification_report(all_labels, all_preds, target_names=iris.target_names))

In [62]:
# Thiết lập tham số và gọi hàm train
criterion = nn.CrossEntropyLoss()  # Sử dụng CrossEntropyLoss cho bài toán phân loại nhiều lớp
learning_rate = 0.01
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

num_epochs = 20
train(model, train_loader, val_loader, criterion, optimizer, num_epochs)

# Thực hiện suy luận trên tập test và in báo cáo phân loại
inference(model, test_loader)

Epoch [1/20], Train Loss: 1.1119, Train Acc: 0.4700, Train Precision: 0.4452, Train Recall: 0.4700, Train F1: 0.4399
Validation Loss: 1.1190, Validation Acc: 0.5000, Validation Precision: 0.3695, Validation Recall: 0.5000, Validation F1: 0.4149
Epoch [2/20], Train Loss: 1.1106, Train Acc: 0.5200, Train Precision: 0.4381, Train Recall: 0.5200, Train F1: 0.4635
Validation Loss: 1.1181, Validation Acc: 0.4762, Validation Precision: 0.3452, Validation Recall: 0.4762, Validation F1: 0.3937
Epoch [3/20], Train Loss: 1.1094, Train Acc: 0.5400, Train Precision: 0.4169, Train Recall: 0.5400, Train F1: 0.4660
Validation Loss: 1.1173, Validation Acc: 0.5000, Validation Precision: 0.3326, Validation Recall: 0.5000, Validation F1: 0.3971
Epoch [4/20], Train Loss: 1.1081, Train Acc: 0.5600, Train Precision: 0.4212, Train Recall: 0.5600, Train F1: 0.4773
Validation Loss: 1.1164, Validation Acc: 0.5000, Validation Precision: 0.3326, Validation Recall: 0.5000, Validation F1: 0.3971
Epoch [5/20], Train 