In [63]:
import pytest
from torchvision import transforms
import torch
from torch.utils.data import DataLoader, TensorDataset
import mlops_finalproject.models.predict_model
import os
from PIL import Image
import pandas as pd

_PATH_DATA = r"C:/Users/alexc/Documents/DTU - First Semester/mlops/mlops-finalReport/data"

In [50]:
@pytest.mark.skipif(not os.path.exists("../../data/processed/images.pt"), reason="Data files not found")
@pytest.mark.skipif(not os.path.exists("data/processed/labels.pt"), reason="Data files not found")
def test_load_train_data():
    """Check whether we got all the data in train and in test"""
    # Test
    train_images = torch.load("../../data/processed/images.pt")
    train_labels = torch.load("../../data/processed/labels.pt")
    assert len(train_images) == 39209, "Train imgs size have an incorrect number of entries"
    assert len(train_labels) == 39209,  "Train labels size have an incorrect number of entries"


test_load_train_data()

In [49]:

@pytest.mark.skipif(not os.path.exists("data/processed/images.pt"), reason="Data files not found")
@pytest.mark.skipif(not os.path.exists("data/processed/labels.pt"), reason="Data files not found")
def test_load_data_shape():
    """Check whether we have the correct shape format"""
    images = torch.load("../../data/processed/images.pt")
    labels = torch.load("../../data/processed/labels.pt")
  
    train_dataset = TensorDataset(images,labels)  # create your datset
    trainloader = DataLoader(
        train_dataset, batch_size=64, shuffle=True
    )  

    for batch, __ in trainloader:
        for img in batch:
            assert img.shape ==  torch.Size([2700]), "Img have a incorrect size"

test_load_data_shape()

In [47]:


@pytest.mark.skipif(not os.path.exists("data/processed/images.pt"), reason="Data files not found")
@pytest.mark.skipif(not os.path.exists("data/processed/labels.pt"), reason="Data files not found")
def test_load_data_labels():
    """Check whether we loaded all the labels"""

    images = torch.load("../../data/processed/images.pt")
    labels = torch.load("../../data/processed/labels.pt")

    #labels = torch.Tensor(labels)
    train_dataset = TensorDataset(images, labels)  # create your datset
    trainloader = DataLoader(
        train_dataset, batch_size=64, shuffle=True
    ) 

    unique = torch.unique(labels)
    for i in range(0, 43): assert i in unique , "data not contains all the labels"

test_load_data_labels()

In [69]:
@pytest.mark.skipif(not os.path.exists(_PATH_DATA + "../../data/raw/German"), reason="Data files not found")
def test_load_test_data():
    test_images, test_labels = get_data( "../../data/raw/German")
    assert len(test_images) == len(test_labels) == 12630 , "Test size have an incorrect number of entries"

test_load_test_data()

12630


In [67]:
def get_data(path: str) -> list:
    """ Funtion to load the folders with the imgs to predict
    """
    test = pd.read_csv(path + '/Test.csv')
    paths = test["Path"].values
    test_labels = test["ClassId"].values

    test_imgs = []
    transform = transforms.Compose([transforms.Resize([30, 30]), transforms.ToTensor()])

    for img_path in paths:
        fullpath = path + "/" + img_path
        img = Image.open(fullpath)
        # normalized
        tensor = transform(img) # Resize and convert to tensor
        test_imgs.append(tensor)

    output = [
        torch.stack(test_imgs),
        torch.tensor(test_labels),
    ]

    return output