In [1]:
from datasets import load_dataset
from tensorflow import keras
import time
import json
import pandas as pd
import glob
import pdb
import numpy as np
import tensorflow as tf
import os

In [2]:
active_range = 7
repeat_range = 400
input_dir = '../models/logs'
shuffle_time = 4
username = os.environ.get('USER') 
label_path = f'/scratch/{username}/mps_collect'
mig_path = '../models/logs/mig'
mig_slices = ['7g.40gb', '4g.20gb', '3g.20gb']

In [3]:
target_data = []
input_data = []

for i in range(1, active_range+1):
    for j in range(repeat_range):
        record = glob.glob(f'{input_dir}/active{i}_repeat{j}_*.csv')[0]
        data = pd.read_csv(record, header=None).to_numpy()
        
        ########### freature data ############
        norm_data = np.zeros(data.shape)
        for col in range(data.shape[1]):            
            norm_data[:,col] = np.round(data[:,col] / data[:,col].max(), 4)
        input_data.append(norm_data)
        
        ########### create label data ###########
        label_data = np.zeros(data.shape)
        model_str = record.split(f'_repeat{j}_')[1].split('.')[0]
        for col in range(data.shape[1]):
            model = glob.glob(f'{label_path}/active{i}_repeat{j}/{model_str}{col}_100pct*.json')[0]
            model = model.split('_100pct_')[1].split('.')[0] # mobilenet_train64
            
            slice_time = []
            for mig_slice in mig_slices:
                filename = f'{mig_path}/{mig_slice}_{model}.json'
#                 if os.path.isfile(filename):
                with open(filename) as f:
                    lat = json.load(f)
                mean_lat = []
                for key, val in lat.items():
                    mean_lat += val
                mean_lat = mean_lat[1:] # remove 1st element
                mean_lat = round(np.mean(mean_lat),4)
                slice_time.append(mean_lat)
            slice_time = np.asarray(slice_time)            
            slice_time = np.round(slice_time / slice_time.max(),4)            
            label_data[:,col] = slice_time
        target_data.append(label_data)                    
        
        for shuffle in range(shuffle_time):
            new_index = np.random.permutation(norm_data.shape[1])
            input_data.append(norm_data[:, new_index])
            target_data.append(label_data[:, new_index])
        
input_data = np.asarray(input_data)
target_data = np.asarray(target_data)

In [5]:
input_data.shape, target_data.shape

((14000, 3, 7), (14000, 3, 7))

In [6]:
if os.path.isfile('input_data.npy'):
    os.remove('input_data.npy')
if os.path.isfile('target_data.npy'):
    os.remove('target_data.npy')
np.save('input_data.npy', input_data)
np.save('target_data.npy', target_data)

In [3]:
load = np.load('input_data.npy')
load.min(), load.max()


(0.0879, 1.0)

In [7]:
target = np.load('target_data.npy')

In [8]:
load[0], target[0]

(array([[0.8667, 1.    , 1.    , 1.    , 1.    , 1.    , 1.    ],
        [0.8375, 1.    , 1.    , 1.    , 1.    , 1.    , 1.    ],
        [1.    , 1.    , 0.8889, 1.    , 1.    , 1.    , 1.    ]]),
 array([[0.9744, 0.8889, 0.8889, 0.8889, 0.8889, 0.8889, 0.8889],
        [0.9897, 1.    , 1.    , 1.    , 1.    , 1.    , 1.    ],
        [1.    , 0.8889, 0.8889, 0.8889, 0.8889, 0.8889, 0.8889]]))

In [8]:
train_dataset = tf.data.Dataset.from_tensor_slices((input_data,target_data))

2022-02-22 16:50:01.482277: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-02-22 16:50:02.019425: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1510] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 10801 MB memory:  -> device: 0, name: Tesla K80, pci bus id: 0000:05:00.0, compute capability: 3.7


In [9]:
train_dataset

<TensorSliceDataset shapes: ((3, 7), (3, 7)), types: (tf.float64, tf.float64)>

In [10]:
train_dataset.shuffle(100).batch(8)
train_dataset

<TensorSliceDataset shapes: ((3, 7), (3, 7)), types: (tf.float64, tf.float64)>

In [11]:
tf.data.experimental.cardinality(train_dataset)

<tf.Tensor: shape=(), dtype=int64, numpy=14000>