In [1]:
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
from utils.model_operations import *
from utils.image_operations import *

directory_path = os.getcwd()

In [28]:
def loadModelWeights(setName, percentage=1):
    """
    setname: either "train" or "test"
    
    returns dataframe with the weights of all models by layers
    """
    bias = ['0.02', '0.03', '0.04', '0.05']
    df = pd.DataFrame()
    modelId = 0
    for b in bias:
        if setName == "train":
            model_data = ModelDataset(bias=b, data_directory=directory_path+'/data/DigitWdb/train')
        elif setName == "test":
            model_data = ModelDataset(bias=b, data_directory=directory_path+'/data/DigitWdb/test')
        else:
            raise "set name must either be train or test"
            
        for modelNumber in tqdm(range(len(model_data)//percentage), desc="loading model weights with bias "+b):
            model = model_data[modelNumber]
            layerNumber = 0
            for layer in model.layers:
                if len(layer.get_weights()) != 0:
                    # weights
                    weights = layer.get_weights()[0]
                    # biases
                    biases = layer.get_weights()[1]

                    # wandb = np.ravel(weights) + np.ravel(biases)
                    
                    df = df.append({'modelId':modelId,'weights':np.ravel(weights),'biases':np.ravel(biases),'layer':layerNumber, 'bias':b}, ignore_index=True)
                    # df = df.append({'modelId':modelId,'wandb':wandb,'layer':layerNumber, 'bias':b}, ignore_index=True)
                    layerNumber = layerNumber + 1
            modelId += 1       
    return df


loading model weights with bias 0.02: 100%|██████████| 20/20 [00:05<00:00,  3.96it/s]
loading model weights with bias 0.03: 100%|██████████| 20/20 [00:02<00:00,  6.90it/s]
loading model weights with bias 0.04: 100%|██████████| 20/20 [00:02<00:00,  6.70it/s]
loading model weights with bias 0.05: 100%|██████████| 20/20 [00:02<00:00,  7.65it/s]
loading model weights with bias 0.02: 100%|██████████| 5/5 [00:00<00:00,  8.37it/s]
loading model weights with bias 0.03: 100%|██████████| 5/5 [00:01<00:00,  4.87it/s]
loading model weights with bias 0.04: 100%|██████████| 5/5 [00:01<00:00,  4.68it/s]
loading model weights with bias 0.05: 100%|██████████| 5/5 [00:01<00:00,  4.75it/s]


In [None]:
int(trainModelWeights['layer'].max()

In [None]:
trainModelWeights = loadModelWeights("train")
testModelWeights = loadModelWeights("test")

In [77]:
trainModelWeights

Unnamed: 0,modelId,weights,biases,layer,bias
0,0.0,"[-0.018486138, -0.03354981, -0.16535422, -0.06...","[-0.24564764, -0.081536554, -0.01958792, -0.11...",0.0,0.02
1,0.0,"[-0.26035, -0.031969644, 0.02034611, 0.0241116...","[-0.08751261, 0.14474091, -0.26124775, -0.2221...",1.0,0.02
2,0.0,"[-0.045246184, -0.022445709, -0.17450637, -0.0...","[0.18726698, -0.055855844, 0.10226409, -0.1979...",2.0,0.02
3,0.0,"[0.14023237, -0.0014608316, 0.17644557, 0.1682...","[-0.11819455, 0.109577455, -0.034734905, 0.126...",3.0,0.02
4,0.0,"[-0.25628987, 0.017059373, -0.14892107, 0.1850...","[0.04371279, 0.1927553, -0.0704351, 0.02792670...",4.0,0.02
...,...,...,...,...,...
395,79.0,"[0.037538417, -0.084343694, 0.00429407, -0.043...","[-0.011763463, -0.008365027, -0.14752248, -0.1...",0.0,0.05
396,79.0,"[0.20852585, -0.23954894, -0.0094635775, -0.06...","[0.06761229, -0.007432909, 0.024540491, -0.137...",1.0,0.05
397,79.0,"[0.2562213, 0.039079823, 0.041972984, 0.120924...","[-0.18877326, -0.028655905, -0.026531724, 8.70...",2.0,0.05
398,79.0,"[-0.042518232, 0.16675776, 0.5135789, -0.35607...","[-0.12601914, 0.22279127, 0.93562245, 0.575864...",3.0,0.05


In [87]:
X = trainModelWeights[trainModelWeights['modelId'] == 0][[feature]].values[:,0]

(1280,)

In [99]:
from sklearn.model_selection import train_test_split


def train_test(trainModelWeights, testModelWeights = None, feature='weights'):

    all_dfs = {}
    dataset = []
    
    train_ids = list(range(0, int(trainModelWeights['modelId'].max() + 1)))
    if testModelWeights is not None : 
        test_ids = list(range(0, int(testModelWeights['modelId'].max() + 1)))
    else : 
        train_ids, test_ids = train_test_split(train_ids, test_size=0.2)
    
    # TO DO : add support for the test set

    for modelid in train_ids :

        # extract weights and bias
        X_train = trainModelWeights[trainModelWeights['modelId'] == modelid][[feature]].values[:,0]
        y_train = trainModelWeights[trainModelWeights['modelId'] == modelid][['bias']].values[:,0][0]

        results = pd.DataFrame(columns=['layer','feature', 'bias'])
        layers = []
        for i, layer in enumerate(X_train) : 
            # For now, randomly reduce dimension
            # TO DO : CNN feature reduction 
            layer = np.random.choice(layer, size=100, replace=False) 
            results = results.append({'layer' : i, 'feature' : layer, 'bias' : y_train}, ignore_index=True)
            layers.append(layer)

        all_dfs[modelid] = results.set_index('layer')
        dataset.append(layers)

    # most important : dataset
    return train_dataset, all_dfs, train_ids, test_ids

In [100]:
dataset, all_dfs, train_ids, test_ids = train_test(trainModelWeights, feature='weights')

In [104]:
np.array(dataset).shape

(64, 5, 100)

In [109]:
# batch
batch = np.array(dataset)[:8,:,:]
batch.shape

(8, 5, 100)

In [112]:
torch.Tensor(batch).size()

torch.Size([8, 5, 100])

In [113]:
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        # input_size = 100 (number of features), output_size = 10
        self.lstm = nn.LSTM(100, 10, 1, batch_first=True)
        # output_size = 1 (target price)
        self.dense = nn.Linear(10, 1)

    def forward(self, x):
        x = self.dense(self.lstm(x)[0])
        return x

model = Model()
# batch_input = torch.randn(16, 5, 100) # => batch_size = 16
y = model(torch.Tensor(batch)) # => torch.Size([16, 7, 1])

In [115]:
y[0]

tensor([[-0.0469],
        [-0.0699],
        [-0.0493],
        [-0.1235],
        [-0.0808]], grad_fn=<SelectBackward0>)

In [None]:
# batch_input = torch.randn(16, 7, 5) => batch_size = 16
# https://stackoverflow.com/questions/61856896/pytorch-lstm-data-dimension

In [97]:
all_dfs[23]

Unnamed: 0_level_0,feature,bias
layer,Unnamed: 1_level_1,Unnamed: 2_level_1
0,"[-0.031022916, -0.08982373, -0.086074, -0.0578...",0.03
1,"[0.083413705, -0.082515985, 0.051866587, 0.118...",0.03
2,"[0.15023884, -0.018405134, -0.03787086, -0.277...",0.03
3,"[-0.019644208, -0.0229431, -0.01770842, -0.196...",0.03
4,"[-0.16148698, 0.29211918, -0.118106246, -0.190...",0.03


In [None]:
# Right now, we have a dataset made of 
# 