In [5]:
import torch
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader, Dataset
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import pandas as pd
from PIL import Image
import os

# Definisanje transformacija
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

# Putanje do dataseta
original_train_path = '../datasets/images/original/train'
original_test_path = '../datasets/images/original/test'
synthetic_train_path = '../datasets/images/generated/train'

# Učitavanje originalnih podataka
train_dataset = ImageFolder(root=original_train_path, transform=transform)
test_dataset = ImageFolder(root=original_test_path, transform=transform)

# Učitavanje sintetičkih podataka
synthetic_train_dataset = ImageFolder(root=synthetic_train_path, transform=transform)

print(f"Broj slika u originalnom trening setu: {len(train_dataset)}")
print(f"Broj slika u originalnom test setu: {len(test_dataset)}")
print(f"Broj slika u sintetičkom trening setu: {len(synthetic_train_dataset)}")


Broj slika u originalnom trening setu: 4352
Broj slika u originalnom test setu: 1088
Broj slika u sintetičkom trening setu: 1000


In [6]:
class CombinedDataset(Dataset):
    def __init__(self, original_dataset, synthetic_dataset, synthetic_percentage, transform=None):
        self.original_dataset = original_dataset
        self.synthetic_dataset = synthetic_dataset
        self.synthetic_percentage = synthetic_percentage
        self.transform = transform
        
        self.original_classes = original_dataset.classes
        self.synthetic_classes = synthetic_dataset.classes
        
        self.data = self.combine_datasets()
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        img_path, label = self.data[index]
        image = Image.open(img_path).convert('RGB')
        
        if self.transform:
            image = self.transform(image)
        
        return image, label
    
    def combine_datasets(self):
        combined_data = []
        for class_name in self.original_classes:
            original_class_indices = [i for i, (_, label) in enumerate(self.original_dataset.samples) if self.original_dataset.classes[label] == class_name]
            synthetic_class_indices = [i for i, (_, label) in enumerate(self.synthetic_dataset.samples) if self.synthetic_dataset.classes[label] == class_name]
            
            num_synthetic = int(len(original_class_indices) * self.synthetic_percentage)
            num_synthetic = min(num_synthetic, len(synthetic_class_indices))
            
            combined_data.extend([self.original_dataset.samples[i] for i in original_class_indices])
            combined_data.extend([self.synthetic_dataset.samples[i] for i in synthetic_class_indices[:num_synthetic]])
        
        return combined_data


In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, roc_curve, auc
import matplotlib.pyplot as plt
import pandas as pd

# Definicija modela
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = nn.Linear(64 * 16 * 16, 512)
        self.fc2 = nn.Linear(512, 2)
    
    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = self.pool(torch.relu(self.conv3(x)))
        x = x.view(-1, 64 * 16 * 16)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Funkcija za obuku i evaluaciju modela
def train_and_evaluate(model, train_loader, test_loader, num_epochs=10):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    
    train_losses = []
    test_losses = []
    
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for batch_idx, (images, labels) in enumerate(train_loader):
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            
            # Ispisujemo trenutnu epohu i gubitak svakih 100 koraka
            if (batch_idx + 1) % 100 == 0:
                print(f'Epoch [{epoch+1}/{num_epochs}], Step [{batch_idx+1}/{len(train_loader)}], Loss: {loss.item():.4f}')
        
        train_losses.append(running_loss/len(train_loader))
    
    train_metrics, train_y_true, train_y_pred, train_y_score = evaluate(model, train_loader, device)
    test_metrics, test_y_true, test_y_pred, test_y_score = evaluate(model, test_loader, device)
    
    return train_metrics, test_metrics, train_y_true, train_y_pred, train_y_score, test_y_true, test_y_pred, test_y_score, train_losses

def evaluate(model, data_loader, device):
    model.eval()
    all_preds = []
    all_labels = []
    all_scores = []
    with torch.no_grad():
        for images, labels in data_loader:
            images = images.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.numpy())
            all_scores.extend(torch.softmax(outputs, dim=1)[:, 1].cpu().numpy())  # Softmax score for class 1 (positive class)
    
    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds, average='weighted')
    recall = recall_score(all_labels, all_preds, average='weighted')
    f1 = f1_score(all_labels, all_preds, average='weighted')
    
    return {
        'Accuracy': accuracy,
        'Precision': precision,
        'Recall': recall,
        'F1-score': f1
    }, all_labels, all_preds, all_scores

In [8]:
# Kreiranje DataLoader-a za test set
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Evaluacija za različite procente sintetičkih podataka
synthetic_percentages = [0.0, 0.5, 1.0]  # 0%, 50%, 100%
all_results = []

plt.figure(figsize=(18, 12))

for idx, percentage in enumerate(synthetic_percentages):
    print(f"\nEvaluacija sa {percentage*100}% sintetičkih podataka u trening setu:")
    
    combined_train_dataset = CombinedDataset(train_dataset, synthetic_train_dataset, percentage, transform)
    train_loader = DataLoader(combined_train_dataset, batch_size=32, shuffle=True)
    
    print(f"Broj slika u kombinovanom trening setu: {len(combined_train_dataset)}")
    
    model = CNNModel()
    train_metrics, test_metrics, train_y_true, train_y_pred, train_y_score, test_y_true, test_y_pred, test_y_score, train_losses = train_and_evaluate(model, train_loader, test_loader)
    
    # Matrice konfuzije za trening i test setove
    cm_train = confusion_matrix(train_y_true, train_y_pred)
    cm_test = confusion_matrix(test_y_true, test_y_pred)
    
    # Prikaz matrica konfuzije
    plt.subplot(3, 4, 4*idx+1)
    plt.imshow(cm_train, interpolation='nearest', cmap=plt.cm.Blues)
    plt.title(f"Confusion matrix (Training) - {percentage*100}% synthetic")
    plt.colorbar()
    tick_marks = range(2)
    plt.xticks(tick_marks, ['0', '1'], rotation=45)
    plt.yticks(tick_marks, ['0', '1'])
    plt.xlabel('Predicted label')
    plt.ylabel('True label')
    plt.tight_layout()
    plt.grid(False)
    
    plt.subplot(3, 4, 4*idx+2)
    plt.imshow(cm_test, interpolation='nearest', cmap=plt.cm.Greens)
    plt.title(f"Confusion matrix (Test) - {percentage*100}% synthetic")
    plt.colorbar()
    plt.xticks(tick_marks, ['0', '1'], rotation=45)
    plt.yticks(tick_marks, ['0', '1'])
    plt.xlabel('Predicted label')
    plt.ylabel('True label')
    plt.tight_layout()
    plt.grid(False)
    
    # ROC krive za trening i test setove
    fpr_train, tpr_train, _ = roc_curve(train_y_true, train_y_score)
    roc_auc_train = auc(fpr_train, tpr_train)
    
    fpr_test, tpr_test, _ = roc_curve(test_y_true, test_y_score)
    roc_auc_test = auc(fpr_test, tpr_test)
    
    # Plotovanje ROC krive na istom grafiku
    plt.subplot(3, 2, 2*idx+5)
    plt.plot(fpr_train, tpr_train, color='darkorange', lw=2, label=f'Train ROC curve (area = {roc_auc_train:.2f})')
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title(f'Receiver Operating Characteristic (Train) - {percentage*100}% synthetic')
    plt.legend(loc="lower right")
    
    plt.subplot(3, 2, 2*idx+6)
    plt.plot(fpr_test, tpr_test, color='darkgreen', lw=2, label=f'Test ROC curve (area = {roc_auc_test:.2f})')
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title(f'Receiver Operating Characteristic (Test) - {percentage*100}% synthetic')
    plt.legend(loc="lower right")

    all_results.append({
        'Synthetic_Percentage': percentage,
        'Train_Metrics': train_metrics,
        'Test_Metrics': test_metrics,
        'Train_Losses': train_losses
    })

plt.tight_layout()
plt.show()


Evaluacija sa 0.0% sintetičkih podataka u trening setu:
Broj slika u kombinovanom trening setu: 4352
Epoch [1/10], Step [100/136], Loss: 0.6159
Epoch [2/10], Step [100/136], Loss: 0.5180
Epoch [3/10], Step [100/136], Loss: 0.4002
Epoch [4/10], Step [100/136], Loss: 0.0995
Epoch [5/10], Step [100/136], Loss: 0.0668
Epoch [6/10], Step [100/136], Loss: 0.1576
Epoch [7/10], Step [100/136], Loss: 0.0027
Epoch [8/10], Step [100/136], Loss: 0.0481
Epoch [9/10], Step [100/136], Loss: 0.0117
Epoch [10/10], Step [100/136], Loss: 0.0098

Evaluacija sa 50.0% sintetičkih podataka u trening setu:
Broj slika u kombinovanom trening setu: 5352
Epoch [1/10], Step [100/168], Loss: 0.4395
Epoch [2/10], Step [100/168], Loss: 0.3012
Epoch [3/10], Step [100/168], Loss: 0.1643
Epoch [4/10], Step [100/168], Loss: 0.0576
Epoch [5/10], Step [100/168], Loss: 0.0950
Epoch [6/10], Step [100/168], Loss: 0.0230
Epoch [7/10], Step [100/168], Loss: 0.0174
Epoch [8/10], Step [100/168], Loss: 0.0029
Epoch [9/10], Step [