In [41]:
import torch
from torch.utils.data import Dataset, DataLoader
import torchvision.models as models
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
from PIL import Image
import pandas as pd
from sklearn.model_selection import train_test_split
import os


In [55]:
class SpectrogramDataset(Dataset):
    def __init__(self, img_dir, transform=None, annotations_file=None):
        self.img_dir = img_dir
        self.transform = transform
        if annotations_file:
            self.img_labels = pd.read_csv(annotations_file)
            self.has_labels = True
        else:
            self.img_labels = [{'idx': img} for img in os.listdir(img_dir) if img.endswith('.png')]
            self.has_labels = False

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_info = self.img_labels[idx]
        img_name = img_info['idx']
        img_path = os.path.join(self.img_dir, img_name)
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        
        if self.has_labels:
            label = img_info['class']
            return image, label, img_name  # Devolver también el nombre del archivo
        return image, torch.tensor(0), img_name  # Devolver un tensor dummy si no hay etiquetas y el nombre del archivo


In [57]:
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
])

# Carga de datos de entrenamiento
train_img_dir = './data-images/train'
train_csv_file = 'data/train.csv'
train_data = SpectrogramDataset(annotations_file=train_csv_file, img_dir=train_img_dir, transform=transform)
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)

# Carga de datos de prueba (sin etiquetas)
test_img_dir = './data-images/test'
test_data = SpectrogramDataset(img_dir=test_img_dir, transform=transform)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False)


In [44]:
model = models.resnet18(pretrained=True)
for param in model.parameters():
    param.requires_grad = False

for name, child in model.named_children():
    if name in ['layer4', 'fc']:
        for param in child.parameters():
            param.requires_grad = True

model.fc = nn.Linear(model.fc.in_features, 2)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)




ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [45]:
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001)
criterion = nn.CrossEntropyLoss()


In [46]:
def train_model(model, dataloaders, criterion, optimizer, num_epochs=2):
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        for inputs, labels in dataloaders['train']:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item() * inputs.size(0)
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss / len(dataloaders['train'].dataset)}")

In [47]:
dataloaders = {'train': train_loader, 'test': test_loader}
train_model(model, dataloaders, criterion, optimizer, num_epochs=2)

Epoch 1/2, Loss: 0.2596386714240675
Epoch 2/2, Loss: 0.176141044514389


In [58]:
def get_predicted_labels(model, dataloader):
    model.eval()  # Poner el modelo en modo evaluación
    predictions = []
    with torch.no_grad():  # Desactivar el cálculo de gradientes
        for images, _, filenames in dataloader:  # Ajustar para recibir nombres de archivo
            images = images.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            predictions.extend(zip(filenames, predicted.cpu().numpy()))  # Emparejar nombre de archivo con predicción
    
    return predictions

# Luego, para llamar a esta función y obtener las etiquetas predichas junto con los nombres de archivo:
predicted_labels = get_predicted_labels(model, test_loader)
for filename, label in predicted_labels:
    print(f"{filename}: {'RightWhale' if label == 1 else 'NoWhale'}")


1157.png: NoWhale
426.png: NoWhale
1525.png: NoWhale
189.png: NoWhale
51.png: NoWhale
1456.png: RightWhale
358.png: NoWhale
1199.png: NoWhale
1538.png: RightWhale
1773.png: RightWhale
570.png: RightWhale
899.png: RightWhale
1509.png: NoWhale
725.png: RightWhale
276.png: RightWhale
1163.png: NoWhale
171.png: NoWhale
1213.png: NoWhale
1906.png: NoWhale
974.png: NoWhale
321.png: NoWhale
1810.png: NoWhale
564.png: RightWhale
382.png: NoWhale
898.png: NoWhale
255.png: NoWhale
1885.png: NoWhale
1318.png: RightWhale
14.png: RightWhale
1778.png: NoWhale
567.png: NoWhale
391.png: RightWhale
1095.png: RightWhale
1747.png: NoWhale
1919.png: RightWhale
313.png: NoWhale
350.png: NoWhale
1265.png: NoWhale
735.png: RightWhale
1535.png: NoWhale
765.png: RightWhale
841.png: NoWhale
519.png: NoWhale
572.png: NoWhale
1606.png: RightWhale
862.png: RightWhale
668.png: NoWhale
584.png: NoWhale
299.png: RightWhale
1473.png: NoWhale
1303.png: NoWhale
909.png: RightWhale
1349.png: RightWhale
1755.png: NoWhale


In [63]:
df = pd.DataFrame(predicted_labels, columns=['idx', 'class'])
df['idx'] = df['idx'].str.replace('.png', '', regex=False)

df['class'] = df['class'].map({0: 'NoWhale', 1: 'RightWhale'})  # Mapear los valores numéricos a etiquetas

df.to_csv('./predicted_labels.csv', index=False)