# CNN pro zpracování obrazu

https://github.com/jeffheaton/app_deep_learning/blob/main/t81_558_class_05_1_python_images.ipynb

In [1]:
serverpath = "https://needtoknow.cz"

In [20]:
import getpass
server = getpass.getpass()
serverpath = f"https://{server}.cz"

 ········


## Dataset

Zpracování sady obrázků sponek.

In [3]:
fullurl = "https://github.com/jeffheaton/data-mirror/releases/download/v1/paperclips.zip"

In [None]:
import requests
from io import BytesIO

r = requests.get(fullurl)
assert r.status_code == 200

with open("./datasource/data.zip", "wb") as f:
    for chunk in r.iter_content(chunk_size=512 * 1024): 
        if chunk: # filter out keep-alive new chunks
            f.write(chunk)
    f.close()


In [None]:
import zipfile

try:
    with zipfile.ZipFile('./datasource/data.zip', 'r') as zip_ref:
        zip_ref.extractall()
except zipfile.BadZipFile:
    print('Not a zip file or a corrupted zip file')

In [17]:
import os

SOURCE = "./datasource/data/paperclips"
SOURCE = os.path.realpath(SOURCE)
SOURCE

'/home/jovyan/work/ais/datasource/data/paperclips'

Načtení csv obsahující popis obrázků, manuální přednastavení množin. Transformace datových položek.

In [27]:
import os
import pandas as pd

csvfilename = os.path.join(SOURCE, "train.csv")
print(csvfilename)
df_train = pd.read_csv(os.path.join(SOURCE, "train.csv"), na_values=['NA', '?'])
df_test = pd.read_csv(os.path.join(SOURCE, "test.csv"), na_values=['NA', '?'])
df_train['filename']="clips-"+ df["id"].astype(str)+".jpg"
df_test['filename']="clips-"+ df["id"].astype(str)+".jpg"
df_train["clip_count"] = df_train["clip_count"].astype("float32")
df_test["clip_count"] = df_train["clip_count"].astype("float32")
df_train

/home/jovyan/work/ais/datasource/data/paperclips/train.csv


Unnamed: 0,id,clip_count,filename
0,30001,11.0,clips-30001.jpg
1,30002,2.0,clips-30002.jpg
2,30003,26.0,clips-30003.jpg
3,30004,41.0,clips-30004.jpg
4,30005,49.0,clips-30005.jpg
...,...,...,...
19995,49996,35.0,clips-49996.jpg
19996,49997,54.0,clips-49997.jpg
19997,49998,72.0,clips-49998.jpg
19998,49999,24.0,clips-49999.jpg


Rozdělení dat (trénovací vs. testovací množina)

In [28]:
TRAIN_PCT = 0.9
TRAIN_CUT = int(len(df_train) * TRAIN_PCT)

df_validate = df_train[TRAIN_CUT:]
df_train = df_train[0:TRAIN_CUT]

print(f"Training size: {len(df_train)}")
print(f"Validate size: {len(df_validate)}")

Training size: 18000
Validate size: 2000


In [29]:
import json
dfjson = json.loads(df_validate.to_json(orient="records"))
dfjson[:10]

[{'id': 48001, 'clip_count': 29.0, 'filename': 'clips-48001.jpg'},
 {'id': 48002, 'clip_count': 29.0, 'filename': 'clips-48002.jpg'},
 {'id': 48003, 'clip_count': 33.0, 'filename': 'clips-48003.jpg'},
 {'id': 48004, 'clip_count': 14.0, 'filename': 'clips-48004.jpg'},
 {'id': 48005, 'clip_count': 43.0, 'filename': 'clips-48005.jpg'},
 {'id': 48006, 'clip_count': 33.0, 'filename': 'clips-48006.jpg'},
 {'id': 48007, 'clip_count': 32.0, 'filename': 'clips-48007.jpg'},
 {'id': 48008, 'clip_count': 2.0, 'filename': 'clips-48008.jpg'},
 {'id': 48009, 'clip_count': 73.0, 'filename': 'clips-48009.jpg'},
 {'id': 48010, 'clip_count': 4.0, 'filename': 'clips-48010.jpg'}]

Vytvoření prvku, který se chován jako dynamický list. Tedy má délku a lze přistupovat k indexu.

In [None]:
from torch.utils.data import Dataset

def createDataset(dataframe, root_dir, transform=None):
    class ClipCountDataset(Dataset):
        def __len__(self):
            return len(self.data)

        def __getitem__(self, idx):
            img_name = os.path.join(
                self.root_dir, "clips-" + str(self.data.iloc[idx, 0]) + ".jpg"
            )
            image = Image.open(img_name)
            clip_count = self.data.iloc[idx, 1]
            sample = {"image": image, "clip_count": clip_count}
            if self.transform:
                sample["image"] = self.transform(sample["image"])
            return sample
    return ClipCountDataset()

Příprava transformací, které zabezpečí optimalizaci procesu učení.
- změna rozměru
- normalizace barev (gauss / sigma, ...)

In [30]:
SOURCE

'/home/jovyan/work/ais/datasource/data/paperclips'

In [32]:
!pip install torchvision

Collecting torchvision
  Downloading torchvision-0.16.1-cp310-cp310-manylinux1_x86_64.whl (6.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.8/6.8 MB[0m [31m758.8 kB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting torch==2.1.1
  Downloading torch-2.1.1-cp310-cp310-manylinux1_x86_64.whl (670.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m670.2/670.2 MB[0m [31m572.9 kB/s[0m eta [36m0:00:00[0m00:01[0m00:27[0m
[0mCollecting nvidia-cusparse-cu12==12.1.0.106
  Downloading nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl (196.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m196.0/196.0 MB[0m [31m917.6 kB/s[0m eta [36m0:00:00[0m00:01[0m00:06[0m
[?25hCollecting nvidia-cusolver-cu12==11.4.5.107
  Downloading nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl (124.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m124.2/124.2 MB[0m [31m751.5 kB/s[0m eta [36m0:00:00

In [34]:
!pip install torch



In [33]:
from torch.utils.data import DataLoader
from torchvision import transforms

data_transform = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])

train_dataset = createDataset(df_train, SOURCE, transform=data_transform)
val_dataset = createDataset(df_validate, SOURCE, transform=data_transform)
test_dataset = createDataset(df_test, SOURCE, transform=data_transform)

train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=False)

  warn(


ImportError: cannot import name 'vmap_impl' from 'torch._functorch.vmap' (/opt/conda/lib/python3.10/site-packages/torch/_functorch/vmap.py)

Příprava modelu

In [None]:
model = nn.Sequential(
    nn.Conv2d(3, 64, 3),  # 3 input channels, 64 output channels, 3x3 kernel
    nn.ReLU(),
    nn.MaxPool2d(2, 2),  # 2x2 pooling kernel with stride 2
    nn.Conv2d(64, 64, 3), # 64 input channels, 64 output channels, 3x3 kernel
    nn.ReLU(),
    nn.MaxPool2d(2, 2),  # 2x2 pooling kernel with stride 2
    nn.Flatten(),       # Flattening the tensor for the fully connected layers
    nn.Linear(64 * 62 * 62, 512), # 64 * 62 * 62 input features, 512 output features
    nn.ReLU(),
    nn.Linear(512, 1)    # 512 input features, 1 output feature
)
model = torch.compile(model,backend="aot_eager").to(device)

Trénování modelu

In [None]:
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters())
scheduler = ReduceLROnPlateau(optimizer, 'min')

EPOCHS = 1

print("Training")
for epoch in range(EPOCHS):
    running_loss = 0.0
    steps = list(enumerate(train_dataloader, 0))
    for i, data in tqdm.tqdm(steps):
        inputs, labels = data['image'].to(device).float(), data['clip_count'].to(device).float()
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs.view(-1), labels.view(-1))
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    scheduler.step(running_loss)
    print(f"Epoch {epoch}/{EPOCHS}, loss: {loss.item()}")

print('Finished Training')

In [None]:


# Testing
predictions = []
with torch.no_grad():
    for data in tqdm.tqdm(test_dataloader):
        images = torch.tensor(data['image']).to(device)
        outputs = model(images)
        predictions.append(outputs.item())

df_submit = pd.DataFrame({'id': df_test['id'], 'clip_count': predictions})
df_submit.to_csv("submit.csv", index=False)

