In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = ""  # Disable GPU
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"  # Use PCI bus order
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"  # Force CUDA to run synchronously for better profiling

import torch
if torch.cuda.is_available():
    torch.backends.cudnn.deterministic = True  # Set deterministic mode
    torch.backends.cudnn.benchmark = False  # Disable cudnn benchmarking for reproducibility

# Set CPU affinity to use only Intel processors
torch.set_num_threads(torch.get_num_threads())
os.environ["KMP_BLOCKTIME"] = "0"
os.environ["KMP_AFFINITY"] = "granularity=fine,compact,1,0"


In [2]:
DEBUG = False # set this flag to True to use a small subset of data for testing

In [3]:
import os
from PIL import Image
import numpy as np
from fastprogress.fastprogress import progress_bar
from fastai.vision.all import *
import params

import wandb

In [4]:
data_path = './dataset/data/'

In [18]:
def get_classes_per_image(txt_data, class_labels):
    """
    Get the count of classes present in an image.
    """
    result_dict = {class_labels[int(d.split()[0])]: 1 for d in txt_data}
    return result_dict


def _create_table(image_files, class_labels):
    """
    Create a table with the dataset.
    """
    labels = ['object', 'x_cen', 'y_cen', 'w', 'h']
    table = wandb.Table(columns=["image_name"] + labels)

    for i, image_file in progress_bar(enumerate(image_files), total=len(image_files)):
        with open(os.path.splitext(image_file)[0] + '.txt') as f:
            txt_data = f.readlines()

        for line in txt_data:
            line = line.strip().split()
            obj_class = class_labels[int(line[0])]
            x_cen, y_cen, w, h = [float(x) for x in line[1:]]
            table.add_data(
                Path(image_file).name,
                obj_class,
                x_cen,
                y_cen,
                w,
                h
            )

    return table

In [6]:
run = wandb.init(project=params.WANDB_PROJECT, entity=params.ENTITY, job_type="upload")
raw_data_at = wandb.Artifact(params.RAW_DATA_AT, type="raw_data")

[34m[1mwandb[0m: Currently logged in as: [33mmnitin59[0m ([33mdevhive[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [7]:
from pathlib import Path
import wandb

raw_data_at = wandb.Artifact("raw_data", type="dataset")

# specify data path
data_path = Path('./dataset/data')

# add image files to artifact
for img_file in data_path.glob('*.jpg'):
    raw_data_at.add_file(str(img_file), name=f"images/{img_file.name}")

# add label files to artifact
for label_file in data_path.glob('*.txt'):
    raw_data_at.add_file(str(label_file), name=f"labels/{label_file.name}")


In [15]:
# get image files
data_file = [os.path.join(data_path,x) for x in os.listdir(data_path)]
images = [data_file[x] for x in range(len(data_file)) if data_file[x].endswith('.jpeg')]
labels = [data_file[x] for x in range(len(data_file)) if data_file[x].endswith('.txt')]


In [19]:
table = _create_table(images, params.BDD_CLASSES)

In [20]:
raw_data_at.add(table, "EDA_table")

ArtifactManifestEntry(path='EDA_table.table.json', digest='j7B2jPkEN9tzT2fvEQNzTQ==', ref=None, birth_artifact_id=None, size=144596, extra={}, local_path='C:\\Users\\mniti\\AppData\\Local\\wandb\\wandb\\artifacts\\staging\\tmprqs3dyla')

In [21]:
run.log_artifact(raw_data_at)
run.finish()