In [43]:
import torch
import copy

import seaborn as sns
from pylab import rcParams
import matplotlib.pyplot as plt
from matplotlib import rc

import numpy as np
import glob, os
import string
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, precision_score, recall_score, confusion_matrix
from sklearn.metrics import precision_recall_fscore_support

from torch import nn, optim
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics.pairwise import pairwise_distances
from sklearn.metrics.pairwise import euclidean_distances
import torch.nn.functional as F

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import random
from pathlib import Path
from scipy import stats


In [44]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
dir = '../models/estimate_size15/*'
print(dir)
model_files = glob.glob(dir)
device

../models/estimate_size15/*


device(type='cuda')

In [47]:
model_files[1]

'../models/estimate_size15/model_char_15data_MSE_epoch200.pth'

In [48]:
class Encoder(nn.Module):
    def __init__(self, seq_len, n_features, embedding_dim=64):
        super(Encoder, self).__init__()
        
        self.seq_len, self.n_features = seq_len, n_features
        self.embedding_dim, self.hidden_dim = embedding_dim, 2 * embedding_dim
        
        self.rnn1 = nn.LSTM(
          input_size=n_features,
          hidden_size=self.hidden_dim,
          num_layers=1,
          batch_first=True
        )
        self.rnn2 = nn.LSTM(
          input_size=self.hidden_dim,
          hidden_size=embedding_dim,
          num_layers=1,
          batch_first=True
        )
    def forward(self, x):

        x = x.reshape((1, self.seq_len, self.n_features))
        
        x, (_, _) = self.rnn1(x)
        x, (hidden_n, _) = self.rnn2(x)
        
        return hidden_n.reshape((self.seq_len, self.embedding_dim))

In [49]:
class Decoder(nn.Module):
    def __init__(self, seq_len, input_dim=64, n_features=1):
        super(Decoder, self).__init__()
        
        self.seq_len, self.input_dim = seq_len, input_dim
        self.hidden_dim, self.n_features = 2 * input_dim, n_features
       
        self.rnn1 = nn.LSTM(
            input_size=input_dim,
            hidden_size=input_dim,
            num_layers=1,
            batch_first=True
        )
        self.rnn2 = nn.LSTM(
            input_size=input_dim,
            hidden_size=self.hidden_dim,
            num_layers=1,
            batch_first=True
        )
        
        self.output_layer = nn.Linear(self.hidden_dim, n_features)
    def forward(self, x):
        x = x.repeat(self.seq_len, self.n_features)
        x = x.reshape((self.n_features, self.seq_len, self.input_dim))
        
        x, (hidden_n, cell_n) = self.rnn1(x)
        x, (hidden_n, ce1ll_n) = self.rnn2(x)
        x = x.reshape((self.n_features, self.hidden_dim))
        return self.output_layer(x)

In [50]:
class RecurrentAutoencoder(nn.Module):
    def __init__(self, seq_len, n_features, embedding_dim=64):
        super(RecurrentAutoencoder, self).__init__()
        self.encoder = Encoder(seq_len, n_features, embedding_dim).to(device)
        self.decoder = Decoder(seq_len, embedding_dim, n_features).to(device)
    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

In [51]:
model = torch.load(model_files[1])
model

RecurrentAutoencoder(
  (encoder): Encoder(
    (rnn1): LSTM(45, 256, batch_first=True)
    (rnn2): LSTM(256, 128, batch_first=True)
  )
  (decoder): Decoder(
    (rnn1): LSTM(128, 128, batch_first=True)
    (rnn2): LSTM(128, 256, batch_first=True)
    (output_layer): Linear(in_features=256, out_features=45, bias=True)
  )
)

In [52]:
dir = '../datasets/estimate_size15/test/*'
# file read
all_names = []
all_data = []
sequence_length = []
alpha = list(string.ascii_uppercase)
data_length = len(glob.glob(dir))

files = glob.glob(dir)
files[0]

'../datasets/estimate_size15/test/3test89.txt'

In [53]:
all_names = []

for file in files:
    all_names.append(file.split('/')[-1].replace('.txt', ''))
all_names[0]


'3test89'

In [54]:
namesbysize = []
for _ in range(15):
    line = []
    namesbysize.append(line)
    

for names in all_names:
    namesbysize[int(names[:names.find('test')])].append(names)
namesbysize[1]

['1test14',
 '1test8',
 '1test12',
 '1test11',
 '1test1',
 '1test7',
 '1test2',
 '1test13',
 '1test0',
 '1test9',
 '1test4',
 '1test6',
 '1test10',
 '1test3',
 '1test5']

In [11]:
# onehot
alpha = list(string.ascii_uppercase)
chr2index = {alpha[i]:i for i in range(len(alpha))}
def chr2OH(alphabet):
    oh = [0 for i in range(len(alpha))]
    #ndex = chr2index[alphabet]
    oh[alphabet] = 1
    #rint(oh)
    return oh

In [55]:
# char
alpha = list(string.ascii_uppercase)
chr2index = {i:alpha[i] for i in range(len(alpha))}

def OH2char(alphabet):
    oh = [0 for i in range(len(alpha))]
    index = chr2index[alphabet]

    return index
chr2OH(1)

[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

In [12]:
def loadData_onehot(directory):
    all_names = []
    all_data = []
    sequence_length = []
    alpha = list(string.ascii_uppercase)
    data_length = len(glob.glob(directory))
    #file_predix = './datasets/estimate_size/seq'
    files = glob.glob(directory)

    for file in files:
        datasets = []
        #all_names.append(file.split('/')[-1].replace('.txt', ''))
        for rf in open(file, 'r'):
            (u, v, w) = rf[1:-2].split(', ')
            #print(u, v, w)
            datasets.append(chr2OH(int(u)) + chr2OH(int(v)) +[float(w)])
        if datasets != []:
            sequence_length.append(len(datasets))
            all_data.append(datasets)
            all_names.append(file.split('/')[-1].replace('.txt', ''))
   

    all_data = np.array([np.array(arr) for arr in all_data])

    return all_data, all_names, max(sequence_length)

In [17]:
#onehot
def loadData_onehot(directory):
    files = glob.glob(directory)
    all_data = []
    all_names = []
    sequence_length = []
    for file in files:
        all_names.append(file.split('/')[-1].replace('.txt', ''))
        datasets = []
        for rf in open(file, 'r'):
            (u, v, w) = rf[1:-2].split(', ')
            #print(u, v, w)
            datasets.append(chr2OH(int(u)) + chr2OH(int(v)) +[float(w)])
            #datasets.append([chr2index[u[1]], chr2index[v[1]], float(w)])
        sequence_length.append(len(datasets))
        all_data.append(datasets)

    all_data = np.array([np.array(arr) for arr in all_data])

    return all_data, all_names, max(sequence_length)

In [56]:
def loadData_char(directory):
    all_names = []
    all_data = []
    sequence_length = []
    alpha = list(string.ascii_uppercase)
    data_length = len(glob.glob(directory))
    #file_predix = './datasets/estimate_size/seq'
    files = glob.glob(directory)

    for file in files:
        datasets = []
        #all_names.append(file.split('/')[-1].replace('.txt', ''))
        for rf in open(file, 'r'):
            (u, v, w) = rf[1:-2].split(', ')
            #print(u, v, w)
            datasets.append([(float(u)) , (float(v)), float(w)])
        if datasets != []:
            sequence_length.append(len(datasets))
            all_data.append(datasets)
            all_names.append(file.split('/')[-1].replace('.txt', ''))
   

    all_data = np.array([np.array(arr) for arr in all_data])

    return all_data, all_names, max(sequence_length)

In [57]:
def preprocessData_char(all_data, max_sequence_length):
    #max_sequence_length = max(sequence_length)
    zeros = np.zeros(3)

    for ind, data in enumerate(all_data):

        if len(data) != max_sequence_length:
            for i in range(len(data), max_sequence_length):
                all_data[ind] = np.vstack([all_data[ind], zeros]) 
    all_d = []
    for ind, data in enumerate(all_data):
        all_d.append(data.flatten())
        #all_d[ind] = data.flat1ten()
 

    return all_d

In [13]:
def preprocessData_onehot(all_data, max_sequence_length):
    #max_sequence_length = max(sequence_length)
    zeros = np.zeros(53)

    for ind, data in enumerate(all_data):

        if len(data) != max_sequence_length:
            for i in range(len(data), max_sequence_length):
                all_data[ind] = np.vstack([all_data[ind], zeros]) 
    all_d = []
    for ind, data in enumerate(all_data):
        all_d.append(data.flatten())
        #all_d[ind] = data.flat1ten()
 

    return all_d

In [58]:
def create_dataset_1(nparrays):
    dataset = [torch.tensor(s).unsqueeze(0).float() for s in nparrays]
  #step_per_epoch, seq_len, llen, n_features = torch.stack(dataset).shape
    #n_seq, m ,seq_len, n_features = print(torch.stack(dataset).shape)
    print(torch.stack(dataset).shape)
    n_seq, seq_len, n_features = torch.stack(dataset).shape
    #print( n_seq, m , seq_len, n_features )
    return dataset, seq_len, n_features

# represent 0 graph

In [59]:
# char
rep0dir = '../datasets/estimate_size15/seq/0graph*'
rep0data, rep0names, rep0seqLength = loadData_char(rep0dir)
len(rep0names)
rep0d = preprocessData_char(rep0data, rep0seqLength)
rep0testdata, rep0seq_len , rep0n_features = create_dataset_1(rep0d)

torch.Size([1000, 1, 45])


In [15]:
# onehot
rep0dir = '../datasets/estimate_size15/seq/0graph*'
rep0data, rep0names, rep0seqLength = loadData_onehot(rep0dir)
len(rep0names)
rep0d = preprocessData_onehot(rep0data, rep0seqLength)
rep0testdata, rep0seq_len , rep0n_features = create_dataset_1(rep0d)


torch.Size([1000, 1, 689])


# represent 1 graph

In [60]:
# char
rep1dir = '../datasets/estimate_size15/seq/1graph*'
rep1data, rep1names, rep1seqLength = loadData_char(rep1dir)
len(rep1names)
rep1d = preprocessData_char(rep1data, rep1seqLength)
rep1testdata, rep1seq_len , rep1n_features = create_dataset_1(rep1d)

torch.Size([1000, 1, 45])


In [16]:
# onehot
rep1dir = '../datasets/estimate_size15/seq/1graph*'
rep1data, rep1names, rep1seqLength = loadData_onehot(rep1dir)
len(rep1names)
rep1d = preprocessData_onehot(rep1data, rep1seqLength)
rep1testdata, rep1seq_len , rep1n_features = create_dataset_1(rep1d)

torch.Size([1000, 1, 689])


# represent 2 graph

In [61]:
# char
rep2dir = '../datasets/estimate_size15/seq/2graph*'
rep2data, rep2names, rep2seqLength = loadData_char(rep2dir)
len(rep2names)
rep2d = preprocessData_char(rep2data, rep2seqLength)
rep2testdata, rep2seq_len , rep2n_features = create_dataset_1(rep2d)

torch.Size([1000, 1, 45])


In [17]:
# onehot
rep2dir = '../datasets/estimate_size15/seq/2graph*'
rep2data, rep2names, rep2seqLength = loadData_onehot(rep2dir)
len(rep2names)
rep2d = preprocessData_onehot(rep2data, rep2seqLength)
rep2testdata, rep2seq_len , rep2n_features = create_dataset_1(rep2d)

torch.Size([1000, 1, 689])


# represent 3 graph

In [62]:
# char
rep3dir = '../datasets/estimate_size15/seq/3graph*'
rep3data, rep3names, rep3seqLength = loadData_char(rep3dir)
len(rep3names)
rep3d = preprocessData_char(rep3data, rep3seqLength)
rep3testdata, rep3seq_len , rep3n_features = create_dataset_1(rep3d)

torch.Size([1000, 1, 45])


In [18]:
# onehot
rep3dir = '../datasets/estimate_size15/seq/3graph*'
rep3data, rep3names, rep3seqLength = loadData_onehot(rep3dir)
len(rep3names)
rep3d = preprocessData_onehot(rep3data, rep3seqLength)
rep3testdata, rep3seq_len , rep3n_features = create_dataset_1(rep3d)

torch.Size([1000, 1, 689])


# represent 4 graph

In [63]:
# char
rep4dir = '../datasets/estimate_size15/seq/4graph*'
rep4data, rep4names, rep4seqLength = loadData_char(rep4dir)
len(rep4names)
rep4d = preprocessData_char(rep4data, rep4seqLength)
rep4testdata, rep4seq_len , rep4n_features = create_dataset_1(rep4d)

torch.Size([1000, 1, 45])


In [None]:
# onehot
rep4dir = '../datasets/estimate_size15/seq/4graph*'
rep4data, rep4names, rep4seqLength = loadData_onehot(rep4dir)
len(rep4names)
rep4d = preprocessData_onehot(rep4data, rep4seqLength)
rep4testdata, rep4seq_len , rep4n_features = create_dataset_1(rep4d)

# represent 5 graph

In [64]:
# char
rep5dir = '../datasets/estimate_size15/seq/5graph*'
rep5data, rep5names, rep5seqLength = loadData_char(rep5dir)
len(rep5names)
rep5d = preprocessData_char(rep5data, rep5seqLength)
rep5testdata, rep5seq_len , rep5n_features = create_dataset_1(rep5d)

torch.Size([1000, 1, 45])


In [None]:
# onehot
rep5dir = '../datasets/estimate_size15/seq/5graph*'
rep5data, rep5names, rep5seqLength = loadData_onehot(rep5dir)
len(rep5names)
rep5d = preprocessData_onehot(rep5data, rep5seqLength)
rep5testdata, rep5seq_len , rep5n_features = create_dataset_1(rep5d)

# Test data

In [65]:
# char
testdir = '../datasets/estimate_size15/test/*'
testdata, testnames, testseqLength = loadData_char(testdir)
len(testnames)
testd = preprocessData_char(testdata, testseqLength+1)
testtestdata, testseq_len , testn_features = create_dataset_1(testd)
testtestdata[0]
testnames[0]

torch.Size([336, 1, 45])


  all_data = np.array([np.array(arr) for arr in all_data])


'3test89'

In [19]:
# onehot
testdir = '../datasets/estimate_size15/test/*'
testdata, testnames, testseqLength = loadData_onehot(testdir)

testd = preprocessData_onehot(testdata, testseqLength+1)
testtestdata, testseq_len , testn_features = create_dataset_1(testd)
testtestdata[0]
testnames[0]

torch.Size([296, 1, 689])


  all_data = np.array([np.array(arr) for arr in all_data])


'3test72'

In [66]:
namesbysize = []
indexsbysize = []


for _ in range(15):
    line = []
    namesbysize.append(line)

for _ in range(15):
    line = []
    indexsbysize.append(line)
    
for ind, names in enumerate(testnames):
    namesbysize[int(names[:names.find('test')])].append(names)
    indexsbysize[int(names[:names.find('test')])].append(ind)

testdatabysize = []    
testnamebysize = []
for ind, size in enumerate(indexsbysize):
    graphs = []
    names = []
    for indx, index in enumerate(size):
        graphs.append(testtestdata[index])
        names.append(testnames[index])
    testdatabysize.append(graphs)  
    testnamebysize.append(names)


# Predict

In [67]:
def predict(model, dataset):
    predictions, losses = [], []
    criterion = nn.MSELoss(reduction='mean').to(device)
    with torch.no_grad():
        model = model.eval()
        for seq_true in dataset:
            seq_true = seq_true.to(device)
            seq_pred = model(seq_true)
            #print(seq_pred.shape)
            loss = criterion(seq_pred, seq_true)
            #print(loss)
            #print(seq_pred.cpu().numpy().flatten())
            predictions.append(seq_pred.cpu().numpy().flatten())
            losses.append(loss.item())
    return predictions

In [68]:
predictbysize = []
for size in testdatabysize:
    predictbysize.append(predict(model, size))

  return F.mse_loss(input, target, reduction=self.reduction)


In [69]:
rep0predict = predict(model,rep0testdata)

In [70]:
rep1predict = predict(model,rep1testdata)

In [71]:
rep2predict = predict(model,rep2testdata)

In [72]:
rep3predict = predict(model,rep3testdata)

In [73]:
rep4predict = predict(model,rep4testdata)

In [74]:
rep5predict = predict(model,rep5testdata)

In [None]:
rep0predict = predict(model,rep0testdata)
rep1predict = predict(model,rep1testdata)
rep2predict = predict(model,rep2testdata)
rep3predict = predict(model,rep3testdata)
print(rep0predict[0].shape)
print(predictbysize[1][0])
print(len(predictbysize[1][0]))
euclidean_distances([rep0predict[0]], [predictbysize[1][0]])

In [75]:
rep0dist = []
for size in predictbysize:
    dist = []
    for testvec in size:
        for repvec in rep0predict[:10]:
            dist.append(euclidean_distances([testvec], [repvec]))
    rep0dist.append(dist)
rep0dist[1]

[array([[1766.4036]], dtype=float32),
 array([[1766.7454]], dtype=float32),
 array([[1778.0967]], dtype=float32),
 array([[1777.8389]], dtype=float32),
 array([[1766.884]], dtype=float32),
 array([[1777.29]], dtype=float32),
 array([[1777.9415]], dtype=float32),
 array([[1768.3362]], dtype=float32),
 array([[1776.7852]], dtype=float32),
 array([[1765.9915]], dtype=float32),
 array([[12698.965]], dtype=float32),
 array([[12699.139]], dtype=float32),
 array([[12716.228]], dtype=float32),
 array([[12715.962]], dtype=float32),
 array([[12699.334]], dtype=float32),
 array([[12715.024]], dtype=float32),
 array([[12715.499]], dtype=float32),
 array([[12700.26]], dtype=float32),
 array([[12714.431]], dtype=float32),
 array([[12703.464]], dtype=float32),
 array([[8595.761]], dtype=float32),
 array([[8595.918]], dtype=float32),
 array([[8612.236]], dtype=float32),
 array([[8611.977]], dtype=float32),
 array([[8596.093]], dtype=float32),
 array([[8613.585]], dtype=float32),
 array([[8613.959]], d

In [76]:
rep1dist = []
for size in predictbysize:
    dist = []
    for testvec in size:
        for repvec in rep1predict[:10]:
            dist.append(euclidean_distances([testvec], [repvec]))
    rep1dist.append(dist)
rep1dist[1]

[array([[1812.9735]], dtype=float32),
 array([[1791.8934]], dtype=float32),
 array([[1793.1322]], dtype=float32),
 array([[1799.5079]], dtype=float32),
 array([[1782.6183]], dtype=float32),
 array([[1799.6517]], dtype=float32),
 array([[1799.6061]], dtype=float32),
 array([[1792.9208]], dtype=float32),
 array([[1805.7905]], dtype=float32),
 array([[1806.6416]], dtype=float32),
 array([[12743.734]], dtype=float32),
 array([[12730.207]], dtype=float32),
 array([[12731.698]], dtype=float32),
 array([[12734.867]], dtype=float32),
 array([[12718.956]], dtype=float32),
 array([[12735.148]], dtype=float32),
 array([[12735.068]], dtype=float32),
 array([[12731.489]], dtype=float32),
 array([[12740.246]], dtype=float32),
 array([[12742.948]], dtype=float32),
 array([[8644.608]], dtype=float32),
 array([[8627.078]], dtype=float32),
 array([[8628.057]], dtype=float32),
 array([[8632.961]], dtype=float32),
 array([[8617.013]], dtype=float32),
 array([[8633.195]], dtype=float32),
 array([[8633.126]

In [77]:
rep2dist = []
for size in predictbysize:
    dist = []
    for testvec in size:
        for repvec in rep2predict[:10]:
            dist.append(euclidean_distances([testvec], [repvec]))
    rep2dist.append(dist)
rep2dist[1]

[array([[1009.5532]], dtype=float32),
 array([[1494.4829]], dtype=float32),
 array([[1293.4066]], dtype=float32),
 array([[1095.2976]], dtype=float32),
 array([[809.21875]], dtype=float32),
 array([[1494.5631]], dtype=float32),
 array([[1265.8127]], dtype=float32),
 array([[1454.6154]], dtype=float32),
 array([[1521.3591]], dtype=float32),
 array([[1522.8195]], dtype=float32),
 array([[11815.735]], dtype=float32),
 array([[11883.239]], dtype=float32),
 array([[11820.952]], dtype=float32),
 array([[11821.33]], dtype=float32),
 array([[11540.611]], dtype=float32),
 array([[11883.241]], dtype=float32),
 array([[11850.276]], dtype=float32),
 array([[11865.565]], dtype=float32),
 array([[11958.594]], dtype=float32),
 array([[11958.467]], dtype=float32),
 array([[7617.9087]], dtype=float32),
 array([[7811.0513]], dtype=float32),
 array([[7695.7495]], dtype=float32),
 array([[7607.9346]], dtype=float32),
 array([[7473.2354]], dtype=float32),
 array([[7811.0635]], dtype=float32),
 array([[7726

In [78]:
rep3dist = []
for size in predictbysize:
    dist = []
    for testvec in size:
        for repvec in rep3predict[:10]:
            dist.append(euclidean_distances([testvec], [repvec]))
    rep3dist.append(dist)
rep3dist[1]

[array([[11425.39]], dtype=float32),
 array([[11425.488]], dtype=float32),
 array([[11549.516]], dtype=float32),
 array([[11735.84]], dtype=float32),
 array([[11722.848]], dtype=float32),
 array([[11956.706]], dtype=float32),
 array([[11956.706]], dtype=float32),
 array([[11549.516]], dtype=float32),
 array([[12034.727]], dtype=float32),
 array([[11776.515]], dtype=float32),
 array([[8105.0767]], dtype=float32),
 array([[8105.1304]], dtype=float32),
 array([[9094.057]], dtype=float32),
 array([[9016.333]], dtype=float32),
 array([[8212.746]], dtype=float32),
 array([[9266.092]], dtype=float32),
 array([[9266.092]], dtype=float32),
 array([[9094.057]], dtype=float32),
 array([[9854.028]], dtype=float32),
 array([[9397.505]], dtype=float32),
 array([[6755.916]], dtype=float32),
 array([[6756.004]], dtype=float32),
 array([[7475.574]], dtype=float32),
 array([[8146.274]], dtype=float32),
 array([[7820.0317]], dtype=float32),
 array([[8762.855]], dtype=float32),
 array([[8762.855]], dtype=

In [79]:
rep4dist = []
for size in predictbysize:
    dist = []
    for testvec in size:
        for repvec in rep4predict[:10]:
            dist.append(euclidean_distances([testvec], [repvec]))
    rep4dist.append(dist)
rep4dist[1]

[array([[1381.3137]], dtype=float32),
 array([[1381.079]], dtype=float32),
 array([[1136.0881]], dtype=float32),
 array([[1452.7268]], dtype=float32),
 array([[1136.0887]], dtype=float32),
 array([[1391.5562]], dtype=float32),
 array([[1381.0795]], dtype=float32),
 array([[1381.079]], dtype=float32),
 array([[1391.6752]], dtype=float32),
 array([[1276.7393]], dtype=float32),
 array([[12119.284]], dtype=float32),
 array([[12119.308]], dtype=float32),
 array([[12031.063]], dtype=float32),
 array([[12169.666]], dtype=float32),
 array([[12031.064]], dtype=float32),
 array([[12066.298]], dtype=float32),
 array([[12119.309]], dtype=float32),
 array([[12119.307]], dtype=float32),
 array([[12066.023]], dtype=float32),
 array([[12036.426]], dtype=float32),
 array([[7974.1875]], dtype=float32),
 array([[7974.271]], dtype=float32),
 array([[7868.633]], dtype=float32),
 array([[8039.273]], dtype=float32),
 array([[7868.6343]], dtype=float32),
 array([[7957.151]], dtype=float32),
 array([[7974.271]

In [80]:
rep5dist = []
for size in predictbysize:
    dist = []
    for testvec in size:
        for repvec in rep5predict[:10]:
            dist.append(euclidean_distances([testvec], [repvec]))
    rep5dist.append(dist)
rep5dist[1]

[array([[8890.862]], dtype=float32),
 array([[9247.199]], dtype=float32),
 array([[9129.303]], dtype=float32),
 array([[9357.123]], dtype=float32),
 array([[9357.122]], dtype=float32),
 array([[8890.862]], dtype=float32),
 array([[9121.328]], dtype=float32),
 array([[9357.122]], dtype=float32),
 array([[9366.5625]], dtype=float32),
 array([[9357.122]], dtype=float32),
 array([[7939.882]], dtype=float32),
 array([[8849.907]], dtype=float32),
 array([[8235.791]], dtype=float32),
 array([[10271.674]], dtype=float32),
 array([[10271.675]], dtype=float32),
 array([[7939.882]], dtype=float32),
 array([[7689.071]], dtype=float32),
 array([[10271.676]], dtype=float32),
 array([[9242.18]], dtype=float32),
 array([[10271.675]], dtype=float32),
 array([[5223.2466]], dtype=float32),
 array([[6993.544]], dtype=float32),
 array([[6417.7754]], dtype=float32),
 array([[7503.306]], dtype=float32),
 array([[7503.3066]], dtype=float32),
 array([[5223.2466]], dtype=float32),
 array([[6321.7544]], dtype=fl

In [81]:
rep0flatten = []
rep1flatten = []
rep2flatten = []
rep3flatten = []
rep4flatten = []
rep5flatten = []

for size in rep0dist:
    distbysize = [] 
    for dist in size:
        distbysize.extend(dist[0])
    rep0flatten.append(distbysize)
    
for size in rep1dist:
    distbysize = [] 
    for dist in size:
        distbysize.extend(dist[0])
    rep1flatten.append(distbysize)
    
for size in rep2dist:
    distbysize = [] 
    for dist in size:
        distbysize.extend(dist[0])
    rep2flatten.append(distbysize)
    
for size in rep3dist:
    distbysize = [] 
    for dist in size:
        distbysize.extend(dist[0])
    rep3flatten.append(distbysize)
    
for size in rep4dist:
    distbysize = [] 
    for dist in size:
        distbysize.extend(dist[0])
    rep4flatten.append(distbysize)
    
for size in rep5dist:
    distbysize = [] 
    for dist in size:
        distbysize.extend(dist[0])
    rep5flatten.append(distbysize)

In [82]:
rep0flatten[12]

[267.00317,
 267.42548,
 252.51054,
 252.68445,
 267.3602,
 219.4108,
 218.67274,
 269.40265,
 219.94702,
 255.38817,
 334.865,
 335.2289,
 323.06567,
 323.2124,
 335.1798,
 294.0025,
 293.34686,
 337.00583,
 294.40588,
 324.6316,
 332.70126,
 333.06552,
 320.70734,
 320.85675,
 333.01517,
 291.6632,
 290.988,
 334.80362,
 292.07846,
 322.46216,
 118.83159,
 119.316925,
 104.39714,
 104.59255,
 119.23345,
 62.97867,
 61.565952,
 121.936,
 63.82047,
 106.34931,
 144.43875,
 144.89592,
 128.93585,
 129.14827,
 144.81142,
 90.80846,
 89.57085,
 147.14047,
 91.60954,
 132.66772,
 175.35674,
 175.81258,
 160.32693,
 160.52007,
 175.73712,
 122.852135,
 121.85365,
 178.1045,
 123.527954,
 162.93288,
 175.99712,
 176.44504,
 160.68112,
 160.88191,
 176.36801,
 124.95668,
 123.913956,
 178.53992,
 125.65265,
 164.44972,
 143.48605,
 143.96649,
 128.88817,
 129.0828,
 143.89197,
 88.47733,
 87.29451,
 146.35475,
 89.25205,
 131.23296]

In [84]:
rep0avg = []
rep1avg = []
rep2avg = []
rep3avg = []
rep4avg = []
rep5avg = []

for size in range(1,13):
    rep0avg.append(sum(rep0flatten[size])/len(rep0flatten[size]))
    rep1avg.append(sum(rep1flatten[size])/len(rep1flatten[size]))
    rep2avg.append(sum(rep2flatten[size])/len(rep2flatten[size]))
    rep3avg.append(sum(rep3flatten[size])/len(rep3flatten[size]))
    rep4avg.append(sum(rep4flatten[size])/len(rep4flatten[size]))
    rep5avg.append(sum(rep5flatten[size])/len(rep5flatten[size]))

print(rep0avg)
print(rep1avg)
print(rep2avg)
print(rep3avg)
print(rep4avg)
print(rep5avg)

[9350.609099934896, 1550.2758883158365, 1058.0068069045608, 560.031493100253, 293.9724005005576, 341.94621939886184, 85.79736250800056, 385.8061749903361, 251.34376049041748, 167.6277414560318, 276.05590875799004, 193.01222801208496]
[9377.303352050782, 1575.1238619486492, 1082.9736649796769, 579.7783428538929, 307.8201834938743, 321.9715767633347, 99.90895029531943, 360.030547320048, 212.93711718645963, 127.85658091306686, 223.71140726262874, 132.08589651584626]
[8556.110623779297, 1248.1552417755126, 794.3839776941248, 973.9610473632813, 1118.9230092135342, 1192.8241912841797, 1172.7078603383657, 1128.3531486002605, 1225.6528234308416, 1217.7025382995605, 1264.5491122159092, 1246.507794189453]
[8793.86685546875, 11893.35122680664, 12076.235692039696, 12538.997414328835, 12790.030690696023, 12846.819966052828, 12878.406698690878, 12759.7815625, 12906.714781605113, 12916.249145507812, 12945.970720880681, 12938.061767578125]
[8752.185812988282, 1166.3267491658528, 707.0306766819309, 714

In [85]:

rep0trimavg = []
rep1trimavg = []
rep2trimavg = []
rep3trimavg = []
rep4trimavg = []
rep5trimavg = []

for size in range(1, len(rep0flatten)):
    rep0trimavg.append(stats.trim_mean(rep0flatten[size], 0.2))
    rep1trimavg.append(stats.trim_mean(rep1flatten[size], 0.2))
    rep2trimavg.append(stats.trim_mean(rep2flatten[size], 0.2))
    rep3trimavg.append(stats.trim_mean(rep3flatten[size], 0.2))
    rep4trimavg.append(stats.trim_mean(rep4flatten[size], 0.2))
    rep5trimavg.append(stats.trim_mean(rep5flatten[size], 0.2))

print(rep0trimavg)
print(rep1trimavg)
print(rep2trimavg)
print(rep3trimavg)
print(rep4trimavg)
print(rep5trimavg)

[10829.359, 1083.1302, 1078.5424, 514.1001, 269.12036, 288.9539, 70.21236, 326.44498, 235.46394, 143.45284, 268.30444, 181.70451, 50.735535, 31.105202]
[10856.379, 1110.4565, 1104.3491, 534.32275, 281.86456, 260.8845, 85.74444, 293.9866, 188.97366, 92.687645, 210.71219, 118.45746, 48.810974, 98.91787]
[9945.075, 853.56573, 828.22974, 996.314, 1135.8556, 1244.9049, 1176.6528, 1176.097, 1241.1903, 1230.5809, 1268.5068, 1242.1681, 1196.454, 1170.8947]
[8756.049, 12042.776, 12073.7295, 12573.121, 12824.898, 12928.861, 12888.099, 12843.921, 12939.593, 12938.272, 12957.788, 12939.62, 12913.065, 12893.243]
[10181.938, 727.461, 710.04724, 714.58014, 847.98175, 925.9712, 876.3906, 873.85767, 921.8503, 919.4079, 945.001, 928.11993, 895.68115, 879.68]
[8228.631, 9505.703, 9524.88, 9987.915, 10238.022, 10326.937, 10289.627, 10239.076, 10330.222, 10333.3955, 10354.581, 10337.301, 10307.76, 10289.12]


In [None]:
tips = sns.load_dataset('tips')
print(tips.shape)
print(tips.head)


In [263]:
# -*- coding: utf-8 -*-
"""
Created on Sun Nov 15 14:48:46 2020

@author: sookmyung
"""
import os 
import random
from pathlib import Path
import glob

path = str(Path(__file__).parent.parent) 
files = glob.glob(path +  '\\datasets\\estimate_size\\test\\*') 
all_names = []

for file in files:aa
    all_names.append(file.split('\\')[-1].replace('.txt', ''))

namesbysize = []
for _ in range(15):
    line = []
    namesbysize.append(line)
    

for names in all_names:
    namesbysize[int(names[:names.find('test')])].append(names)
    
predicts = []
for _ in range(15):
    line = []
    predicts.append(line)

precision = []    
for ind, name in enumerate(namesbysize):
    precision.extend(len(predicts[ind])/len(namesbysize))
    
print('Best size in test case: ', max(precision) )

IndentationError: unexpected indent (<ipython-input-263-7426b1d0bda2>, line 17)

In [None]:
model = torch.load(model_path)  