In [20]:
import os
import scipy.io as sio
import json

def count_people_in_shanghaitech(images_dir, annotations_dir, output_json_path):
    """
    Przetwarza zbiór ShanghaiTech, zapisując liczbę osób na każdym zdjęciu w pliku JSON.
    
    Args:
        images_dir (str): Ścieżka do folderu z obrazami (.jpg).
        annotations_dir (str): Ścieżka do folderu z adnotacjami (.mat).
        output_json_path (str): Ścieżka do pliku JSON, który zapisze {nazwa_zdjęcia: liczba_osób}.
    """
    image_counts = {}
    
    for img_name in os.listdir(images_dir):
        if not img_name.endswith('.jpg'):
            continue
        
        # Wczytaj adnotacje (punkty osób)
        mat_path = os.path.join(annotations_dir, 'GT_' + img_name.replace('.jpg', '.mat')).replace("\\", "/")
        mat = sio.loadmat(mat_path)
        points = mat['image_info'][0][0][0][0][0]  # Struktura plików .mat w ShanghaiTech
        
        # Liczba osób = liczba punktów
        num_people = len(points)
        image_counts[img_name] = num_people
    
    # Zapisz wynik w JSON
    with open(output_json_path, 'w') as f:
        json.dump(image_counts, f, indent=4)
    
    print(f"Zapisano liczbę osób dla {len(image_counts)} zdjęć w: {output_json_path}")

# Przykład użycia:
count_people_in_shanghaitech(
    images_dir="ShanghaiTech/part_B/train_data/images",
    annotations_dir="ShanghaiTech/part_B/train_data/ground-truth",
    output_json_path="ShanghaiTech_train_people_counts.json"
)

count_people_in_shanghaitech(
    images_dir="ShanghaiTech/part_B/test_data/images",
    annotations_dir="ShanghaiTech/part_B/test_data/ground-truth",
    output_json_path="ShanghaiTech_test_people_counts.json"
)

Zapisano liczbę osób dla 400 zdjęć w: ShanghaiTech_train_people_counts.json
Zapisano liczbę osób dla 316 zdjęć w: ShanghaiTech_test_people_counts.json


In [22]:
import torch
import torchvision
from torch.utils.data import Dataset
from PIL import Image
import json
import os

class ShanghaiTechDataset(Dataset):
    def __init__(self, images_dir, json_counts_path, transform=None):
        self.images_dir = images_dir
        self.transform = transform
        
        with open(json_counts_path, 'r') as f:
            self.counts = json.load(f)
        
        self.image_names = list(self.counts.keys())
    
    def __len__(self):
        return len(self.image_names)
    
    def __getitem__(self, idx):
        img_name = self.image_names[idx]
        img_path = os.path.join(self.images_dir, img_name)
        
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        
        num_people = self.counts[img_name]
        return image, torch.tensor(num_people, dtype=torch.float32)  # float32 dla regresji

In [23]:
# Transformacje (można dodać augmentację)
transform = transforms.Compose([
    transforms.Resize((256, 256)),  # Dopasuj rozmiar
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalizacja ImageNet
])

# Załaduj dataset
train_dataset = ShanghaiTechDataset(
    images_dir="ShanghaiTech/part_B/train_data/images",
    json_counts_path="ShanghaiTech_train_people_counts.json",
    transform=transform
)

val_dataset = ShanghaiTechDataset(
    images_dir="ShanghaiTech/part_B/test_data/images",
    json_counts_path="ShanghaiTech_test_people_counts.json",
    transform=transform
)

# DataLoader
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [24]:
import torch.nn as nn
import torchvision.models as models

class CrowdCounter(nn.Module):
    def __init__(self):
        super().__init__()
        self.backbone = models.resnet18(pretrained=True)  # Transfer learning
        self.backbone.fc = nn.Sequential(
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, 1)  # 1 wyjście (liczba osób)
        )
    
    def forward(self, x):
        return self.backbone(x).squeeze(1)  # Usuń wymiar [batch, 1] -> [batch]

model = CrowdCounter()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)



Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to C:\Users\jakub/.cache\torch\hub\checkpoints\resnet18-f37072fd.pth


100.0%


CrowdCounter(
  (backbone): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, tra

In [25]:
criterion = nn.L1Loss()  # MAE (mniej wrażliwe na outliers niż MSE)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

In [26]:
def train_model(model, train_loader, val_loader, criterion, optimizer, epochs=10):
    for epoch in range(epochs):
        model.train()
        train_loss = 0.0
        
        for images, counts in train_loader:
            images = images.to(device)
            counts = counts.to(device)
            
            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, counts)
            
            # Backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
        
        # Walidacja
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for images, counts in val_loader:
                images = images.to(device)
                counts = counts.to(device)
                
                outputs = model(images)
                val_loss += criterion(outputs, counts).item()
        
        print(f"Epoch {epoch+1}/{epochs} | Train Loss: {train_loss/len(train_loader):.4f} | Val Loss: {val_loss/len(val_loader):.4f}")

train_model(model, train_loader, val_loader, criterion, optimizer, epochs=20)

Epoch 1/20 | Train Loss: 121.3960 | Val Loss: 122.2054
Epoch 2/20 | Train Loss: 118.8840 | Val Loss: 119.4229
Epoch 3/20 | Train Loss: 116.4743 | Val Loss: 115.6548
Epoch 4/20 | Train Loss: 112.6863 | Val Loss: 111.0445


Exception ignored in: <function WeakKeyDictionary.__init__.<locals>.remove at 0x00000263CEAE7560>
Traceback (most recent call last):
  File "C:\Users\jakub\anaconda3\envs\cnn_e\Lib\weakref.py", line 370, in remove
    self = selfref()
           ^^^^^^^^^
KeyboardInterrupt: 


KeyboardInterrupt: 