# Python for Data Science
## Lab 5:
### Nguyen Xuan Viet Duc - 22280012

In [1]:
from sklearn.datasets import load_iris

iris = load_iris()
X = iris.data
Y = iris.target

In [2]:
from sklearn.model_selection import train_test_split

x_train, x_val, y_train, y_val = train_test_split(X, Y, test_size=0.33, random_state=42)
x_val, x_test, y_val, y_test = train_test_split(x_val, y_val, test_size=0.15, random_state=42)

In [3]:
import torch
from torch.utils.data import DataLoader, Dataset

class Data(Dataset):
    def __init__(self, X, y):
        self.x=torch.from_numpy(X)
        self.y=torch.from_numpy(y)
        self.len=self.x.shape[0]
    def __getitem__(self,index):
        return self.x[index], self.y[index]
    def __len__(self):
        return self.len

In [4]:
train_dataset = Data(x_train, y_train)
train_loader = DataLoader(dataset=train_dataset,batch_size=64)

val_dataset = Data(x_train, y_train)
val_loader = DataLoader(dataset=val_dataset,batch_size=64)

test_dataset = Data(x_train, y_train)
test_loader = DataLoader(dataset=test_dataset,batch_size=64)

In [5]:
import torch.nn as nn

class Net(nn.Module):
    def __init__(self,D_in,H,D_out):
        super(Net,self).__init__()
        self.linear1=nn.Linear(D_in,H)
        self.linear2=nn.Linear(H,D_out)

    def forward(self,x):
        x=torch.sigmoid(self.linear1(x))
        x=self.linear2(x)
        return x

In [6]:
input_dim = 4
hidden_dim = 25
output_dim = 3
model = Net(input_dim,hidden_dim,output_dim)

In [7]:
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
import torch

# Hàm huấn luyện (Train)
def train(model, train_loader, val_loader, criterion, optimizer, num_epochs):
    for epoch in range(num_epochs):
        model.train()  # Đặt chế độ train cho mô hình
        train_loss = 0.0
        train_preds = []
        train_labels = []
        
        for inputs, labels in train_loader:
            # Đặt gradient về 0
            optimizer.zero_grad()
            
            # Forward
            outputs = model(inputs.float())
            loss = criterion(outputs, labels.long())
            train_loss += loss.item() * inputs.size(0)
            
            # Dự đoán và lưu lại các giá trị dự đoán
            _, predicted = torch.max(outputs, 1)
            train_preds.extend(predicted.cpu().numpy())
            train_labels.extend(labels.cpu().numpy())
            
            # Backward và tối ưu hóa
            loss.backward()
            optimizer.step()
        
        # Tính toán loss và các chỉ số cho tập train
        train_loss /= len(train_loader.dataset)
        train_accuracy = accuracy_score(train_labels, train_preds)
        train_precision = precision_score(train_labels, train_preds, average='weighted')
        train_recall = recall_score(train_labels, train_preds, average='weighted')
        train_f1 = f1_score(train_labels, train_preds, average='weighted')
        
        # Đánh giá trên tập validation
        val_loss, val_accuracy, val_precision, val_recall, val_f1 = evaluate(model, val_loader, criterion)
        
        # Log các chỉ số cho mỗi epoch
        print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy:.4f}, '
              f'Train Precision: {train_precision:.4f}, Train Recall: {train_recall:.4f}, Train F1: {train_f1:.4f}')
        print(f'Validation Loss: {val_loss:.4f}, Validation Acc: {val_accuracy:.4f}, '
              f'Validation Precision: {val_precision:.4f}, Validation Recall: {val_recall:.4f}, Validation F1: {val_f1:.4f}')



In [8]:
# Hàm đánh giá (Evaluate)
def evaluate(model, data_loader, criterion):
    model.eval()  # Đặt chế độ eval cho mô hình
    loss = 0.0
    preds = []
    labels = []
    
    with torch.no_grad():  # Tắt gradient
        for inputs, label in data_loader:
            outputs = model(inputs.float())
            loss += criterion(outputs, label.long()).item() * inputs.size(0)
            
            _, predicted = torch.max(outputs, 1)
            preds.extend(predicted.cpu().numpy())
            labels.extend(label.cpu().numpy())
    
    loss /= len(data_loader.dataset)
    accuracy = accuracy_score(labels, preds)
    precision = precision_score(labels, preds, average='weighted')
    recall = recall_score(labels, preds, average='weighted')
    f1 = f1_score(labels, preds, average='weighted')
    
    return loss, accuracy, precision, recall, f1

In [9]:
# Hàm suy luận (Inference)
def inference(model, data_loader):
    model.eval()
    preds = []
    
    with torch.no_grad():
        for inputs, _ in data_loader:
            outputs = model(inputs.float())
            _, predicted = torch.max(outputs, 1)
            preds.extend(predicted.cpu().numpy())
    
    return preds

In [10]:
import torch.optim as optim
import warnings

warnings.filterwarnings("ignore")

criterion = nn.CrossEntropyLoss()  # Sử dụng CrossEntropyLoss cho bài toán phân loại nhiều lớp
learning_rate = 0.01
optimizer = optim.SGD(model.parameters(), lr=learning_rate)  # Sử dụng SGD, hoặc thay bằng Adam nếu cần

num_epochs = 20
train(model, train_loader, val_loader, criterion, optimizer, num_epochs)

# Thực hiện suy luận trên tập test
test_preds = inference(model, test_loader)
print("Predictions on Test Set:", test_preds)


Epoch [1/20], Train Loss: 1.1431, Train Acc: 0.3500, Train Precision: 0.1225, Train Recall: 0.3500, Train F1: 0.1815
Validation Loss: 1.1368, Validation Acc: 0.3500, Validation Precision: 0.1225, Validation Recall: 0.3500, Validation F1: 0.1815
Epoch [2/20], Train Loss: 1.1359, Train Acc: 0.3500, Train Precision: 0.1225, Train Recall: 0.3500, Train F1: 0.1815
Validation Loss: 1.1304, Validation Acc: 0.3500, Validation Precision: 0.1225, Validation Recall: 0.3500, Validation F1: 0.1815
Epoch [3/20], Train Loss: 1.1297, Train Acc: 0.3500, Train Precision: 0.1225, Train Recall: 0.3500, Train F1: 0.1815
Validation Loss: 1.1249, Validation Acc: 0.3500, Validation Precision: 0.1225, Validation Recall: 0.3500, Validation F1: 0.1815
Epoch [4/20], Train Loss: 1.1243, Train Acc: 0.3500, Train Precision: 0.1225, Train Recall: 0.3500, Train F1: 0.1815
Validation Loss: 1.1201, Validation Acc: 0.3500, Validation Precision: 0.1225, Validation Recall: 0.3500, Validation F1: 0.1815
Epoch [5/20], Train 