In [2]:
import os
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms

class BSDS500Dataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.image_paths = [os.path.join(root_dir, f) for f in os.listdir(root_dir) if f.endswith('.jpg')]
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert('RGB')
        
  
        if self.transform:
            image = self.transform(image)
  
        noisy_image = image + torch.randn_like(image) * 0.1  
        noisy_image = torch.clamp(noisy_image, 0., 1.)       
        
        return noisy_image, image

train_dir = r'C:\Users\soham\OneDrive\Documents\Voice cloning\images\train'


transform = transforms.Compose([
    transforms.Resize((128, 128)),  
    transforms.ToTensor()          
])

train_dataset = BSDS500Dataset(train_dir, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)


In [3]:
import torch
import torch.nn as nn

class DenoisingAutoencoder(nn.Module):
    def __init__(self):
        super(DenoisingAutoencoder, self).__init__()
       
        self.encoder = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(0.2),
            nn.MaxPool2d(2, 2),  
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(0.2),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(128,256, kernel_size=3,stride=1,padding=1),
            nn.BatchNorm2d(256),
            nn.LeakyReLU(0.2),
            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(0.2),
            nn.MaxPool2d(2, 2)
            
        )
    
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(512, 256, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.BatchNorm2d(256),
            nn.LeakyReLU(0.2),
            nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(0.2),
            nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(0.2),
            nn.ConvTranspose2d(64, 3, kernel_size=3, stride=1, padding=1),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

model = DenoisingAutoencoder()


In [None]:
import torch.optim as optim


criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


epochs = 100
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

for epoch in range(epochs):
    for noisy_imgs, clean_imgs in train_loader:
        noisy_imgs, clean_imgs = noisy_imgs.to(device), clean_imgs.to(device)

        outputs = model(noisy_imgs)
        outputs.shape
        loss = criterion(outputs, clean_imgs)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}")


In [None]:
import torchsummary


torchsummary.summary(model, (3, 128, 128))

In [None]:
import matplotlib.pyplot as plt


test_dir = r'C:\Users\soham\OneDrive\Documents\Voice cloning\images\train'
test_dataset = BSDS500Dataset(test_dir, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

# Visualize results
model.eval()
with torch.no_grad():
    count=0
    for noisy_img, clean_img in test_loader:
        noisy_img, clean_img = noisy_img.to(device), clean_img.to(device)
        denoised_img = model(noisy_img)

        # Convert tensors to images
        noisy_img = noisy_img.cpu().squeeze().permute(1, 2, 0).numpy()
        denoised_img = denoised_img.cpu().squeeze().permute(1, 2, 0).numpy()
        clean_img = clean_img.cpu().squeeze().permute(1, 2, 0).numpy()
        count+=1
        # Plot
        plt.figure(figsize=(15, 5))
        plt.subplot(1, 3, 1)
        plt.title("Noisy Image")
        plt.imshow(noisy_img)
        plt.subplot(1, 3, 2)
        plt.title("Denoised Image")
        plt.imshow(denoised_img)
        plt.subplot(1, 3, 3)
        plt.title("Clean Image")
        plt.imshow(clean_img)
        plt.show()
        if(count==5):
            break
        
