In [20]:
import zipfile
from PIL import Image
import torch
from torchvision import transforms
from io import BytesIO
from torch.utils.data import DataLoader, TensorDataset
import numpy as np


device = 'cuda' if torch.cuda.is_available() else 'cpu'
print("GPU is", "available" if device else "NOT AVAILABLE")

# Caminho para o arquivo ZIP
zip_path1 = 'bird.zip'
zip_path2 = 'not-bird.zip'

# Transforms para redimensionar e converter para tensor
transform = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.ToTensor(),  # Converte para [C x H x W]
])

# Lista para armazenar os tensores
image_in_tensors = []
image_out_tensors = []


# Abre o ZIP e processa as imagens diretamente
def loadImages(zip_path,label,max):
    count = 0
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        for file_name in zip_ref.namelist():
            if file_name.lower().endswith(('.png', '.jpg', '.jpeg')):
                with zip_ref.open(file_name) as file:
                    image = Image.open(BytesIO(file.read())).convert('RGB')
                    tensor = transform(image)  # Shape: [3, 256, 256]
                    image_in_tensors.append(tensor)
                    image_out_tensors.append([label])
                count+=1
                if count%1000 == 0:
                    print(count)
                    
                if count >= max:
                    return

# Empilha os tensores em um batch
loadImages(zip_path1,1,1000)
loadImages(zip_path2,0,1000)

print(f'Total de imagens carregadas: {len(image_in_tensors)}')

t_x = torch.stack(image_in_tensors)
t_y = torch.tensor(image_out_tensors,dtype=torch.float32)

shuffler = np.random.permutation(len(t_x))

x_shuffled = t_x[shuffler]
y_shuffled = t_y[shuffler]

t_xt = x_shuffled.to(device)
t_yt = y_shuffled.to(device)

dataset = TensorDataset(t_xt, t_yt)
#batch_tensor = torch.stack(image_in_tensors)  # Shape: [N, 3, 256, 256]


print(f'Shape do batch: {t_xt.shape}')
print(f'Shape dos rótulos: {t_yt.shape}')



GPU is available
1000
1000
Total de imagens carregadas: 2000
Shape do batch: torch.Size([2000, 3, 32, 32])
Shape dos rótulos: torch.Size([2000, 1])


In [17]:
import torch;
import torch.nn as nn

class RedeCnnBirdNotBird(nn.Module):
    def __init__(self):
        super(RedeCnnBirdNotBird, self).__init__()

        self.conv1 = nn.Conv2d(3, 6, 3, stride=1)
        self.conv2 = nn.Conv2d(6, 12, 3, stride=1)
        self.conv3 = nn.Conv2d(12, 24, 5, stride=1)
        self.poll1 = nn.MaxPool2d(2,2)
        self.poll2 = nn.MaxPool2d(2,2)
        
        self.linear1 = nn.Linear(864,256)
        self.linear2 = nn.Linear(256,1)
        #self.linear3 = nn.Linear(164,164)
        #self.linear4 = nn.Linear(512,1)

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.poll1(x)
        x = self.poll2(x)
        
        #print("x poll1: ",x.size())
        
        x = torch.flatten(x, start_dim=1)
        
        #print("x flatten: ",x.size())
        
        x = torch.relu(self.linear1(x))
        x = torch.relu(self.linear2(x))
        
        #x = torch.relu(self.linear1(x))
        #x = torch.relu(self.linear2(x))
        #x = torch.tanh(self.linear3(x))
        #x = torch.relu(self.linear4(x))
        return x

In [23]:
import time

def train(cnn,dataset, epochs=10):
    opt = torch.optim.Adam(cnn.parameters(),lr=0.000001)#0.00001  #0.0000001

    train_loader = torch.utils.data.DataLoader(
        dataset, batch_size=64, shuffle=True
    )

    for epoch in range(epochs):
        totalloss = 0
        batch = 0
        start_time = time.time()
        opt.zero_grad()

        for inputs, targets in train_loader:
            
            #inputs = x[:, :insize]    
            #targets = x[:, insize:] 
            #x = x.to(device) # GPU
            x_hat = cnn(inputs)
            #print(inputs.size()," ",targets.size()," ",x_hat.size())
            loss = ((targets - x_hat)**2).sum()

            loss.backward(retain_graph=True)
            totalloss+=loss
            batch+=1
 
            opt.step()
            opt.zero_grad()
            #print("step: ")

        end_time = time.time()
        print(epoch," Total Loss: ",(totalloss/len(dataset))," time ",(end_time-start_time))
        #if epoch%250==0:
        #    torch.save(cnn, "backup_training_gam_v02.pth")
    return cnn


In [None]:
cnn = RedeCnnBirdNotBird().to(device)

cnn = train(cnn, dataset,epochs=100)





0  Total Loss:  tensor(0.4887, device='cuda:0', grad_fn=<DivBackward0>)  time  0.1792278289794922
1  Total Loss:  tensor(0.4787, device='cuda:0', grad_fn=<DivBackward0>)  time  0.16898512840270996
2  Total Loss:  tensor(0.4685, device='cuda:0', grad_fn=<DivBackward0>)  time  0.16802430152893066
3  Total Loss:  tensor(0.4582, device='cuda:0', grad_fn=<DivBackward0>)  time  0.17541193962097168
4  Total Loss:  tensor(0.4478, device='cuda:0', grad_fn=<DivBackward0>)  time  0.17531943321228027
5  Total Loss:  tensor(0.4374, device='cuda:0', grad_fn=<DivBackward0>)  time  0.18264102935791016
6  Total Loss:  tensor(0.4269, device='cuda:0', grad_fn=<DivBackward0>)  time  0.20350003242492676
7  Total Loss:  tensor(0.4165, device='cuda:0', grad_fn=<DivBackward0>)  time  0.17789268493652344
8  Total Loss:  tensor(0.4061, device='cuda:0', grad_fn=<DivBackward0>)  time  0.16866326332092285
9  Total Loss:  tensor(0.3958, device='cuda:0', grad_fn=<DivBackward0>)  time  0.17922258377075195
10  Total L

In [None]:
tabela_verdade = [2,2]

train_loader = torch.utils.data.DataLoader(
    dataset, batch_size=1, shuffle=True
)


for inputs, targets in train_loader:
    x_hat = cnn(inputs)
    classe = 0
    if x_hat[0] > 0.5:
        classe = 1
        
    #print(targets)
    tabela_verdade[classe][int(targets[0][0])]+=1
    
print(tabela_verdade)

tensor([[0.]], device='cuda:0')
tensor([[1.]], device='cuda:0')
tensor([[1.]], device='cuda:0')
tensor([[1.]], device='cuda:0')
tensor([[0.]], device='cuda:0')
tensor([[1.]], device='cuda:0')
tensor([[1.]], device='cuda:0')
tensor([[1.]], device='cuda:0')
tensor([[1.]], device='cuda:0')
tensor([[0.]], device='cuda:0')
tensor([[1.]], device='cuda:0')
tensor([[1.]], device='cuda:0')
tensor([[0.]], device='cuda:0')
tensor([[1.]], device='cuda:0')
tensor([[1.]], device='cuda:0')
tensor([[1.]], device='cuda:0')
tensor([[0.]], device='cuda:0')
tensor([[1.]], device='cuda:0')
tensor([[0.]], device='cuda:0')
tensor([[1.]], device='cuda:0')
tensor([[1.]], device='cuda:0')
tensor([[1.]], device='cuda:0')
tensor([[0.]], device='cuda:0')
tensor([[0.]], device='cuda:0')
tensor([[0.]], device='cuda:0')
tensor([[0.]], device='cuda:0')
tensor([[1.]], device='cuda:0')
tensor([[0.]], device='cuda:0')
tensor([[1.]], device='cuda:0')
tensor([[0.]], device='cuda:0')
tensor([[0.]], device='cuda:0')
tensor([