An overview on the training data.

In [1]:
import os
import sys
from pathlib import Path
from functools import reduce

import tqdm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
data_dir = Path(r"H:\data\pamr\predict-ai-model-runtime\npz_all\npz")
layout_dir = data_dir / "layout"
nlp_dir = layout_dir / "nlp"
xla_dir = layout_dir / "xla"
xdt_dir = xla_dir / "default/train"
xrt_dir = xla_dir / "random/train"
ndt_dir = nlp_dir / "default/train"
nrt_dir = nlp_dir / "random/train"

## Files

In [3]:
def load_npzs(data_dir: str):
    return [
        np.load(data_dir + "/" + str(f))
        for f in os.listdir(data_dir)
        if f.endswith(".npz")
    ]


def load_all_layouts(layout_dir: str):
    return {
        "ndt": load_npzs(layout_dir + "/nlp/default/train"),
        "nrt": load_npzs(layout_dir + "/nlp/random/train"),
        "xdt": load_npzs(layout_dir + "/xla/default/train"),
        "xrt": load_npzs(layout_dir + "/xla/random/train"),
    }

In [4]:
files = load_all_layouts(str(layout_dir))
num_files = reduce(lambda x, y: x + y, [len(v) for v in files.values()])
print(f"Number of files in layout: {num_files}")

Number of files in layout: 535


In [5]:
nd_models = [p.name for p in ndt_dir.iterdir()]
nr_models = [p.name for p in nrt_dir.iterdir()]

print(f"Number of model in nlp default: {len(nd_models)}")
print(f"Number of model in nlp random: {len(nr_models)}")

print("Models only in nlp random:")
set(nr_models) - set(nd_models)


Number of model in nlp default: 198
Number of model in nlp random: 207
Models only in nlp random:


{'small_bert_bert_en_uncased_L-10_H-128_A-2_batch_size_16_test.npz',
 'small_bert_bert_en_uncased_L-10_H-256_A-4_batch_size_16_train.npz',
 'small_bert_bert_en_uncased_L-12_H-128_A-2_batch_size_16_test.npz',
 'small_bert_bert_en_uncased_L-2_H-256_A-4_batch_size_64_test.npz',
 'small_bert_bert_en_uncased_L-4_H-128_A-2_batch_size_32_test.npz',
 'small_bert_bert_en_uncased_L-4_H-256_A-4_batch_size_16_test.npz',
 'small_bert_bert_en_uncased_L-6_H-128_A-2_batch_size_32_test.npz',
 'small_bert_bert_en_uncased_L-8_H-128_A-2_batch_size_16_test.npz',
 'small_bert_bert_en_uncased_L-8_H-256_A-4_batch_size_16_train.npz'}

In [6]:
xd_models = [p.name for p in xdt_dir.iterdir()]
xr_models = [p.name for p in xrt_dir.iterdir()]

print(f"Number of model in xla default: {len(xd_models)}")
print(f"Number of model in xla random: {len(xr_models)}")

print("Models only in xla random:")
set(xr_models) - set(xd_models)

Number of model in xla default: 61
Number of model in xla random: 69
Models only in xla random:


{'mlperf_maskrcnn_1_shard_batch_4.npz',
 'mlperf_maskrcnn_batch_2.npz',
 'mlperf_maskrcnn_batch_4.npz',
 'mlperf_nmt_1_shard_batch_8.npz',
 'mlperf_nmt_batch_8.npz',
 'openai_v0_rnn_natural.npz',
 'openai_v0_rnn_optimized.npz',
 'trax_lsh_attention.npz'}