In [1]:
import mlflow
import torch
from torch.utils.data import DataLoader, random_split
from glob import glob
import os
import torchvision.transforms as transforms
import matplotlib.pyplot as plt

from components.mymodel import load_model, Models
from components.helper import train
from components.dataset import *

In [2]:
# Parameters
image_set = Imageset.k
device = Devices.all
environment = Environments.all
model_name = Models.alexnet
clip_target = True
normalize_target = True
epochs = 200,
lr = 0.001
batch_size = 200
params:dict = dict({
    'epochs': epochs,
    'lr': lr,
    'batch_size': batch_size,
    'Imageset': image_set.value,
    'Device': device.value,
    'Environment': environment.value,
    'model_name': model_name.value,
    'clip_target': clip_target,
    'normalize_target': normalize_target
})

In [3]:
full_train_dataset = SoilDataset_bigset(imageset=image_set, 
                                     device=device, 
                                     environment=environment, 
                                     preprocessing=Preprocessing.training,
                                     clip_target=clip_target,
                                     normalize_target=normalize_target)
train_dataset, val_dataset = random_split(dataset=full_train_dataset, lengths=[0.8,0.2], generator=torch.Generator().manual_seed(42))

Found 9463 images in ./dataset/bigset/K/*/*.


In [8]:
train_dataset.dataset.preprocessing

Compose(
    ToPILImage()
    Resize(size=350, interpolation=bilinear, max_size=None, antialias=warn)
    CenterCrop(size=(224, 224))
    RandomHorizontalFlip(p=0.5)
    RandomVerticalFlip(p=0.5)
    ToTensor()
    Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
)

In [None]:
mlflow.set_tracking_uri("https://web-mlflow.akraradets.duckdns.org")
mlflow.set_experiment(experiment_name='Soil')
mlflow.start_run()
mlflow.log_params(params)

In [3]:
def train_model(model, dataset:SoilDataset_bigset, epochs:int, lr:float, batch_size:int):
    loader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True, num_workers=-1)
    model, train_losses = train(model, loader, epochs, lr, DEVICE)
    plt.plot(train_losses)
    plt.title(dataset.signature)
    plt.show()
    return model, train_losses

#We can check whether we have gpu
DEVICE = torch.device("cuda:0" if (torch.cuda.is_available()) else "cpu")
# DEVICE = 'cpu'
print("Device: ", DEVICE)

Device:  cuda:0


In [4]:
# from PIL import Image

# img = Image.open(dataset.imgs[100])
# # dataset.imgs[100]
# img
# plt.imshow( img_array )

In [5]:
preprocess = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize(350),
    transforms.CenterCrop(224),
    # transforms.RandomCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
    # transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
dataset = SoilDataset_bigset(imageset=image_set, device=device, environment=environment, clip_target=clip_target, transform=preprocess)

Found 9463 images in ./dataset/bigset/K/*/*.


In [6]:
model = load_model(model_name=model_name)
model, train_loss = train_model(model, dataset=dataset, epochs=params['epochs'], lr=params['lr'], batch_size=params['batch_size'])

97.70276832580566 0 tensor(17535314.)
save model!!




# Inference

In [None]:
from tqdm.notebook import tqdm
import numpy as np
import pandas as pd

model.to('cpu')
preprocess = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize(350),
    transforms.CenterCrop(224),
    # transforms.RandomCrop(224),
    # transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
    # transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
dataset = SoilDataset_bigset(imageset=image_set, device=device, environment=environment, transform=preprocess)
loader = DataLoader(dataset=dataset, batch_size=params['batch_size'], shuffle=False, num_workers=1)
ys = []
yhats = []
pic_names = []
with torch.no_grad():
    for X,y,pic_name in tqdm(loader):
        yhat = model(X)
        pic_names.append(list(pic_name))
        ys.append(y.reshape(-1))
        yhats.append(yhat.reshape(-1))

In [None]:
df = pd.DataFrame([np.hstack(ys), np.hstack(yhats), np.hstack(pic_names)]).T
df.rename(columns = {
    0:'Target',
    1:'Predict',
    2:'Image name'
}, inplace=True)
df.set_index('Image name', inplace=True)
artifact_name:str = os.path.join('artifact','inference.csv')
df.to_csv(artifact_name)
mlflow.log_artifact(artifact_name)
mlflow.end_run()
os.remove(artifact_name)