In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import argparse
import pandas as pd
from sklearn.model_selection import train_test_split
import sys
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MinMaxScaler as mm_scaler
from sklearn.preprocessing import StandardScaler as std_scaler
from torchsummary import summary
import numpy as np
import copy
from collections import OrderedDict

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
class CompData(Dataset):
    def __init__(self, X, y, train=True, scaler=True, task_num=3, num_sets=100, meta_train_batch=10, meta_test_batch=10, test_batch=32):
        self.task_num = task_num       
        self.scaler = scaler
        if train:
            self.meta_train_batch = meta_train_batch
            self.meta_test_batch = meta_test_batch
        else:
            self.test_batch = test_batch
        
        if scaler:
            X = mm_scaler().fit_transform(X)
        
        ### create sets. support_x has 10000 sets of 5/ 25 images each. Total ~1000 sets (for 1000 iterations)
        ### create set with 32 rows (#meta_train_number) and 32 rows (#meta_test_number) and append to train and test lists
        #convert pandas dataframe to numpy array
        # print(type(X), type(y))
        #X = X.to_numpy()
        #y = y.to_numpy()
        
        entire_data = np.column_stack((X,y))
        
        #num_rows = len(X) # for index sampling
        #self.selected_sample_indices = np.random.choice(num_rows,\
        #                                           size=task_num*(meta_train_batch+meta_test_batch),\
        #                                           replace=False)
        
        total_rows_required = num_sets*(meta_train_batch+meta_test_batch)
        # we get shuffled data so directly pick total rows required
        total_train_rows = entire_data[:total_rows_required,:]
        
        train_rows = total_train_rows[:num_sets*meta_train_batch,:]
        test_rows = total_train_rows[num_sets*meta_train_batch:,:]
        
        #train_rows = np.hsplit(train_rows, num_sets)
        #test_rows = np.hsplit(test_rows, num_sets)
        
        num_features = len(train_rows[0]) #### DEFINE
        
        ### final np arrays with data and runtimes for num_set rows 
        train_rows_data = train_rows[:, :num_features-1]
        train_rows_runtime = train_rows[:,num_features-1:]
        
        test_rows_data = test_rows[:,:num_features-1]
        test_rows_runtime = test_rows[:,num_features-1:]
        
        train_rows_data = np.vsplit(train_rows_data, num_sets)
        train_rows_runtime = np.vsplit(train_rows_runtime, num_sets)
        test_rows_data = np.vsplit(test_rows_data, num_sets)
        test_rows_runtime = np.vsplit(test_rows_runtime, num_sets)
        
        #create sets here:
        final_sets = [] ## list of list. each list row will have train_rows_data/runtime, test_data/runtime
        for i in range(num_sets):
            temp = [train_rows_data[i]]+[train_rows_runtime[i]]+[test_rows_data[i]]+[test_rows_runtime[i]]
            final_sets.append(temp)
        self.final_sets = final_sets

    def __len__(self):
        return len(self.final_sets)

    def __getitem__(self,index):
                
        #zip sample without replacement from X
        temp_store = self.final_sets[index]
        train_row_data = temp_store[0]
        train_row_runtime = temp_store[1]
        test_row_data = temp_store[2]
        test_row_runtime = temp_store[3]
        
        if np.asarray(train_row_data) is train_row_data:
            train_row_data = np.asarray(train_row_data)
            train_row_runtime = np.asarray(train_row_runtime)
            test_row_data = np.asarray(test_row_data)
            test_row_runtime = np.asarray(test_row_runtime)
        
        ## convert numpy array to torch tensor
        #if not torch.is_tensor(X):
        train_row_data = train_row_data.astype(np.float32)
        train_row_runtime = train_row_runtime.astype(np.float32)
        test_row_data = test_row_data.astype(np.float32)
        test_row_runtime = test_row_runtime.astype(np.float32)
        
        train_row_data = torch.from_numpy(train_row_data)
        train_row_runtime = torch.from_numpy(train_row_runtime)
        test_row_data = torch.from_numpy(test_row_data)
        test_row_runtime = torch.from_numpy(test_row_runtime)
        #if not torch.is_tensor(y):
        #    self.train_y = torch.from_numpy(y)               
        
        
        return train_row_data, train_row_runtime, test_row_data, test_row_runtime
        

In [4]:
########### write parameter initialization in script:: kaiming or xavier; check c.b.finn's work
class OffloadModel(torch.nn.Module):
    def __init__(self, ip_features, num_hidden, op_features=1):
        super(OffloadModel, self).__init__()
        
        self.mod1 = nn.Sequential(OrderedDict([
            ('lin1', nn.Linear(ip_features, num_hidden)),
            ('relu1', nn.ReLU())
        ]))
        self.mod2 = nn.Sequential(OrderedDict([
            ('lin2', nn.Linear(num_hidden,num_hidden*2)),
            ('relu2', nn.ReLU()),
            ('drop1', nn.Dropout(p=0.25)),          
            ('lin3', nn.Linear(num_hidden*2, num_hidden)),
            ('relu3', nn.ReLU())
           
        ]))
        self.mod3 = nn.Sequential(OrderedDict([
            ('lin4', nn.Linear(num_hidden, num_hidden)),
            ('relu4', nn.ReLU()),
            ('drop2', nn.Dropout(p=0.25)),
            ('lin5', nn.Linear(num_hidden,op_features)),
            ('sig1', nn.Sigmoid())             
        ]))
    
    def forward(self, x):
        op = self.mod1(x)
        x = self.mod2(op)
        x += op
        x = self.mod3(x)
        return x
    
    def var_forward(self, x, weights):
        op = F.relu(F.linear(x, weights[0], weights[1]))
        x = F.relu(F.linear(op, weights[2], weights[3]))
        x = F.dropout(x, p=0.25)
        x = F.relu(F.linear(x, weights[4], weights[5]))
        x += op
        x = F.relu(F.linear(x, weights[6], weights[7]))
        x = F.dropout(x, p=0.25)
        x = F.sigmoid(F.linear(x, weights[8], weights[9]))
        return x

In [14]:
mnet = OffloadModel(53,106)
mnet

OffloadModel(
  (mod1): Sequential(
    (lin1): Linear(in_features=53, out_features=106, bias=True)
    (relu1): ReLU()
  )
  (mod2): Sequential(
    (lin2): Linear(in_features=106, out_features=212, bias=True)
    (relu2): ReLU()
    (drop1): Dropout(p=0.25, inplace=False)
    (lin3): Linear(in_features=212, out_features=106, bias=True)
    (relu3): ReLU()
  )
  (mod3): Sequential(
    (lin4): Linear(in_features=106, out_features=106, bias=True)
    (relu4): ReLU()
    (drop2): Dropout(p=0.25, inplace=False)
    (lin5): Linear(in_features=106, out_features=1, bias=True)
    (sig1): Sigmoid()
  )
)

In [15]:
mnet.to(device)
summary(mnet, (10,53))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1              [-1, 10, 106]           5,724
              ReLU-2              [-1, 10, 106]               0
            Linear-3              [-1, 10, 212]          22,684
              ReLU-4              [-1, 10, 212]               0
           Dropout-5              [-1, 10, 212]               0
            Linear-6              [-1, 10, 106]          22,578
              ReLU-7              [-1, 10, 106]               0
            Linear-8              [-1, 10, 106]          11,342
              ReLU-9              [-1, 10, 106]               0
          Dropout-10              [-1, 10, 106]               0
           Linear-11                [-1, 10, 1]             107
          Sigmoid-12                [-1, 10, 1]               0
Total params: 62,435
Trainable params: 62,435
Non-trainable params: 0
---------------------------------

In [6]:
dr_columns = ['kernel','Compiler','Cluster','gpu_name','outer','inner','var_decl','ref_expr','int_literal','float_literal','mem_to',\
            'mem_from','add_sub_int','add_sub_double','mul_int','mul_double','div_int','div_double','assign_int','assign_double']

dataset_root=""
df = pd.read_csv(dataset_root+"matrix_multiplication.csv")   
df = df.drop(columns=dr_columns)

#sys.exit("please check the dataset path and file names")

X = df.iloc[:, 0:-1]
y = df.iloc[:, -1]

train_eval_split=0.6
split_seed=43

X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=train_eval_split, random_state=split_seed, shuffle=True)

train_sets = CompData(X_train,y_train, scaler=False,train=True, task_num=3, num_sets=20, meta_train_batch=10, meta_test_batch=5)

# print(train_sets.__getitem__(0))
#test_sets = CompData(X_test, y_test, train=False, test_batch=32)

In [None]:
#dls = DataLoader(train_sets, batch_size=4)

In [7]:
update_factor = 0.1
def train(model, x_train, y_train, x_test, y_test):
    original_model_copy = copy.deepcopy(model)
    loss_tasks = 0
    for k in range(task_num):
        print(k)
        temp_weights=[w.clone() for w in list(original_model_copy.parameters())]
        
        outputs = original_model_copy.var_forward(x_train[k], temp_weights)
        loss = criterion(outputs, y_train[k])
        #print(type(y_train[k]))
        #print(loss, type(loss))
        #print(type(loss), type(temp_weights))
        grad = torch.autograd.grad(loss, temp_weights)
        # temporary update weights 
        temp_weights = [w - update_factor*g for w,g in zip(temp_weights, grad)]
        
        ## run updated weights on meta-test batch
        new_outputs = original_model_copy.var_forward(x_test[k], temp_weights)
        new_loss = criterion(new_outputs, y_test[k])
        
        loss_tasks += new_loss
    
    return loss_tasks        


In [13]:
outer_epochs = 10
task_num = 3
global_model = OffloadModel(53,106)
meta_optim = torch.optim.Adam(global_model.parameters(), lr=1e-3)
#### add some lr decay: cosine or step or lambda
global_model = global_model.to(device)
criterion = nn.MSELoss()

for idx in range(outer_epochs):
    train_set_loader = DataLoader(train_sets, batch_size=task_num, drop_last=True)
    for i, (x_train, y_train, x_test, y_test) in enumerate(train_set_loader):
        # print((x_train[0]))
        task_num_, set_size, cols = x_train.shape #<--verify
        #print(task_num_, set_size, cols)
        x_train, y_train, x_test, y_test = x_train.to(device), y_train.to(device), x_test.to(device), y_test.to(device)
        
        # print(type(x_train))
        ### train should return and accuracies???
        total_loss = train(global_model, x_train, y_train, x_test, y_test) #<-- returns loss
        
        meta_optim.zero_grad()
        total_loss.backward()
        meta_optim.step()
    
        ### do some validation or call the actual test function? (diff data set & loader)
        ### no grad calculation in whatever that happens next

In [8]:
mnet = OffloadModel(53,106)
mnet.cuda()

OffloadModel(
  (hidden1): Linear(in_features=53, out_features=106, bias=True)
  (hidden2): Linear(in_features=106, out_features=212, bias=True)
  (hidden3): Linear(in_features=212, out_features=106, bias=True)
  (hidden4): Linear(in_features=106, out_features=106, bias=True)
  (op_run): Linear(in_features=106, out_features=1, bias=True)
  (dropout): Dropout(p=0.2, inplace=False)
)

In [None]:
xip = torch.randn((10,53), requires_grad=True)

In [10]:
summary(mnet, (10,53))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1              [-1, 10, 106]           5,724
            Linear-2              [-1, 10, 212]          22,684
           Dropout-3              [-1, 10, 212]               0
            Linear-4              [-1, 10, 106]          22,578
            Linear-5              [-1, 10, 106]          11,342
           Dropout-6              [-1, 10, 106]               0
            Linear-7                [-1, 10, 1]             107
Total params: 62,435
Trainable params: 62,435
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.06
Params size (MB): 0.24
Estimated Total Size (MB): 0.30
----------------------------------------------------------------




In [None]:
c = np.column_stack((a,b))

In [None]:
'''
#### Added to dataset_other on origin-local
import torch
from torch.utils.data import Dataset
from sklearn.preprocessing import MinMaxScaler as mm_scaler
import numpy as np



class CompData(Dataset):
    def __init__(self, X, y, train=True, scaler=True, task_num=3, num_sets=100. meta_train_batch=10, meta_test_batch=10, test_batch=32):
        self.task_num = task_num
        self.scaler = scaler
        if train:
            self.meta_train_batch = meta_train_batch
            self.meta_test_batch = meta_test_batch
        else:
            self.test_batch = test_batch

        if scaler:
            X = mm_scaler().fit_transform(X)

        ### create sets. support_x has 10000 sets of 5/ 25 images each. Total ~1000 sets (for 1000 iterations)
        ### create set with 32 rows (#meta_train_number) and 32 rows (#meta_test_number) and append to train and test lists
        #convert pandas dataframe to numpy array
        X = X.to_numpy()
        y = y.to_numpy()

        entire_data = np.hstack((X,y))

        #num_rows = len(X) # for index sampling
        #self.selected_sample_indices = np.random.choice(num_rows,\
        #                                           size=task_num*(meta_train_batch+meta_test_batch),\
        #                                           replace=False)

        total_rows_required = num_sets*(meta_train_batch+meta_test_batch)
        # we get shuffled data so directly pick total rows required
        total_train_rows = entire_data[:total_rows_required]

        train_rows = total_train_rows[:num_sets*meta_train_batch]
        test_rows = total_train_rows[num_threats*meta_train_batch:]

        train_rows = np.hsplit(train_rows, num_sets)
        test_rows = np.hsplit(test_rows, num_sets)

        num_features = len(train_rows[0])#### DEFINE

        ### final np arrays with data and runtimes for num_set rows
        train_rows_data = train_rows[:, :num_features-1]
        train_rows_runtime = train_rows[:,num_features-1:]

        test_rows_data = test_rows[:,:num_features-1]
        test_rows_runtime = test_rows[:,num_features-1:]
        #create sets here:
        final_sets = [] ## list of list. each list row will have train_rows_data/runtime, test_data/runtime
        for j in range(num_tasks):
            tr_row_data = list()
            tr_row_run = list()
            te_row_data = list()
            te_row_run - list()
            for i in range(num_sets):
                tr_row_data.append(train_rows_data[i])
                tr_row_run.append(train_rows_runtime[i])
                te_row_data.append(test_rows_data[i])
                te_row_run.append(test_rows_runtime[i])

            temp = [tr_row_data]+[tr_row_run]+[te_row_data]]+[te_row_run]
            final_sets.append(temp)

        self.final_sets = final_sets

    def __len__(self):
        return len(self.final_sets)

    def __getitem__(self,index):

        #zip sample without replacement from X
        #for i in range(self.num_tasks):
        train_row_data = self.final_sets[index,:,0]
        train_row_runtime = self.final_sets[index,:,1]
        test_row_data = self.final_sets[index,:,2]
        test_row_runtime = self.final_set[index,:,3]

        #for i in range(self.num_tasks):
        #if np.asarray(train_row_data) is train_row_data:
        train_row_data = np.asarray(train_row_data)
        train_row_runtime = np.asarray(train_row_runtime)
        test_row_data = np.asarray(test_row_data)
        test_row_runtime = np.asarray(test_row_runtime)

        ## convert numpy array to torch tensor
        #if not torch.is_tensor(X):
        train_row_data = torch.from_numpy(train_row_data)
        train_row_runtime = torch.from_numpy(train_row_runtime)
        test_row_data = torch.from_numpy(test_row_data)
        test_row_runtime = torch.from_numpy(test_row_runtime)
        #if not torch.is_tensor(y):
        #    self.train_y = torch.from_numpy(y)


        return train_row_data, train_row_runtime, test_row_data, test_row_runtime



'''