In [None]:
# custom class om DataLoader te kunnen gebruiken

from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split

import os
from dotenv import load_dotenv

# load_dotenv()
load_dotenv(dotenv_path="C:/Users/peta GamePC/Documents/UvA KI/matrixian/Pangolinis/.env")
file_path = os.getenv('FILE_PATH')

df = pd.read_csv(file_path + "Full_preprocessed_detailed_house.csv", dtype="string")
label_to_int = {label: idx for idx, label in enumerate(df['woningtype'].unique())}


class InputImages(Dataset):
    def __init__(self, dataframe, transform=None, label_to_int=label_to_int):
        self.dataframe = dataframe
        self.transform = transform
        self.label_to_int = label_to_int

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        row = self.dataframe.iloc[idx]
        image_path = row['frontview_url']
        image_path = file_path + image_path
        image = Image.open(image_path).convert("RGB")

        if self.transform:
            image = self.transform(image)

        # label str --> int voor cross entropy loss
        label = label_to_int[row['woningtype']]
        label = int(label)

        return image, label

In [None]:
# preprocess pipeline

img_preprocess = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),              # hierna komt normalisatie, mean+std hangen van gebruikte model af (bijv: transforms.Normalize([0.5, 0.5, 0.5], [0.5,0.5,0.5])
    transforms.Normalize((0.5, 0.5, 0.5), (0.5,0.5,0.5))
])


In [None]:
# dataloader; define train + val set

train_df, temp_df = # werkelijke df's
val_df, test_df = # werkelijke df's

train_dataset = InputImages(train_df, transform=img_preprocess)  # train_df veranderen naar hoe de echte train set heet
val_dataset = InputImages(val_df, transform=img_preprocess)      # zeldfde geldt voor val_df

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [None]:
# simple CNN 

class SimpleCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 16, 3, padding=1) #in_channel=3 (RGB), out_channel, kernel_size
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(16, 32, 3)
        self.fc1 = nn.Linear(32 * 54 * 54, 120) # dit moet wellicht 32*55*55 zijn
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


net = SimpleCNN()

In [None]:
# loss function, optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) #momentum kan getweaked worden

In [None]:
# training loop

for epoch in range(10):  # voor 7k rijen. 10-20 epochs

    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
            running_loss = 0.0

print('Finished Training')