In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import torchvision
from torchvision import transforms
from torchvision.datasets import ImageFolder
import math
from torch.utils.data import Dataset, DataLoader
from sklearn import datasets
import matplotlib.pyplot as plt
import matplotlib as mpl
from sklearn.preprocessing import StandardScaler#para escalar caracteristicas
from sklearn.model_selection import train_test_split # separar mas facil la data de train y test
import pandas as pd
from sklearn.utils import shuffle
from ast import literal_eval
import time
import sys
import os
from scipy.signal import convolve2d
from functions.auxiliares import PaddingSameSize, MatrixFormat_To_Vector, RemoveOversizedMatrix, StandardSize_Padding, CNN_Features_Format, batch_format, KernelSize, StandardSize_InitialConvolution, StandardSize


In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Esto garantiza que se ejecutara en GPU si esta disponible
print(device)

In [None]:
max_r = 40 ## Maximo numero de subconsultas
max_c = 50 ## Maximo numero de subcarateristicas

In [None]:
df_raw = pd.read_csv('D:/.Memoria/Test/Jupyter/test_example.csv')
columns = list(df_raw.columns)
print(columns)
#  Eliminar los con tiempos de ejecución 0 o muy altos
df_clean = shuffle(df_raw[(df_raw['ql_rt_msec'] > 0) & (df_raw['ql_rt_msec'] < 1e3)])
df_clean.to_csv('D:/.Memoria/Test/Jupyter/test_example_clean.csv',index=False)

### HAY QUE CONVERTIR ALGUNAS COLUMNAS Q ESTAN EN PORCENTAJE A FLOAT
df_clean['ql_rt_clocks'] = df_clean['ql_rt_clocks'].apply(lambda x: float(x.strip('%'))/100)
df_clean['ql_same_seg'] = df_clean['ql_same_seg'].apply(lambda x: float(x.strip('%'))/100)
df_clean['ql_same_page'] = df_clean['ql_same_page'].apply(lambda x: float(x.strip('%'))/100)
df_clean['ql_cl_wait_clocks'] = df_clean['ql_cl_wait_clocks'].apply(lambda x: float(x.strip('%'))/100)
df_clean['ql_c_clocks'] = df_clean['ql_c_clocks'].apply(lambda x: float(x.strip('%'))/100)
df_clean['ql_c_cl_wait'] = df_clean['ql_c_cl_wait'].apply(lambda x: float(x.strip('%'))/100)

# Convertir MatrixFormat de STR a np.array
df_clean['matrix_format'] = df_clean['matrix_format'].apply(lambda x: np.asarray(literal_eval(x)).astype(np.float32)) 

## shape de raw y clean
print(f'shape df_raw: {df_raw.shape}')
print(f'shape df_clean: {df_clean.shape}')


new_df_clean,max_new_r,max_new_c,min_new_r,min_new_c,mean_new_r,mean_new_c = RemoveOversizedMatrix(df_clean,max_r,max_c)

#num_standard_rows = max_new_r
#num_standard_columns = max_new_c
num_standard_rows = 20
num_standard_columns = 36

new_df_clean['matrix_format'] = df_clean['matrix_format'].apply(lambda x: StandardSize_Padding(x,num_standard_rows,num_standard_columns)) 

print(f'shape new_df_clean: {new_df_clean.shape}')

msk = np.random.rand(len(new_df_clean)) <= 0.8
df_train = new_df_clean[msk]
df_test = new_df_clean[~msk]

df_train.to_csv('D:/.Memoria/Test/Jupyter/test_example_clean_train.csv',index=False)
df_test.to_csv('D:/.Memoria/Test/Jupyter/test_example_clean_test.csv',index=False)



In [None]:
features = [
    'matrix_format'
] 

## FEATURES - TRAIN Y TEST
X_df_train = df_train[features]
X_df_test = df_test[features]
X_numpy_train = X_df_train.to_numpy()
X_numpy_test = X_df_test.to_numpy()



## TARGETS - TRAIN Y TEST
y_df_train = df_train['ql_rt_msec']
y_df_test = df_test['ql_rt_msec']

y_numpy_train = y_df_train.to_numpy().astype(np.float32)
y_numpy_test = y_df_test.to_numpy().astype(np.float32)


print("-----------------------")
print(f'shape X_numpy_train: {X_numpy_train.shape}')
print(f'shape X_numpy_test: {X_numpy_test.shape}')
print(f'shape y_numpy_train: {y_numpy_train.shape}')
print(f'shape y_numpy_test: {y_numpy_test.shape}')
print("-----------------------")


# Pasarlos a Torch. 
#X_train = torch.from_numpy(X_numpy_train)
#X_test = torch.from_numpy(X_numpy_test)

#X_train = torch.tensor(df_train['matrix_format'].values)
#X_test = torch.tensor(df_test['matrix_format'].values)


X_train = CNN_Features_Format(X_numpy_train)
X_test = CNN_Features_Format(X_numpy_test)

y_train = torch.from_numpy(y_numpy_train)
y_test = torch.from_numpy(y_numpy_test)


# También pasar los targets de vector fila a vector columna
y_train = y_train.view(y_train.shape[0], 1)
y_test = y_test.view(y_test.shape[0], 1)


print(f'shape X_train: {X_train.shape}')
print(f'shape X_test: {X_test.shape}')
print(f'shape y_train: {y_train.shape}')
print(f'shape y_test: {y_test.shape}')
print("-----------------------")

## Parametros

In [None]:
#n_samples_train, n_features_train = X_train.shape
#n_samples_test, n_features_test = X_test.shape
#
#hidden_size = 100000
#input_size = n_features_train 
#print(f'n_samples_train: {n_samples_train}')
#print(f'n_features_train: {n_features_train}')
#print(f'n_samples_test: {n_samples_test}')
#print(f'n_features_test: {n_features_test}')
#print(f'learning_rate: {learning_rate}')
#print(f'hidden_size: {hidden_size}')
#print(f'input_size: {input_size}')
learning_rate = 0.001
batches_size = 100
num_epochs = 10000
num_batches = math.ceil(X_train.shape[0]/batches_size)
batches = batch_format(X_train,y_train,batches_size)
for i in range(num_batches):
    print(batches["features"][i].shape)
    print(batches["labels"][i].shape)
print(X_train.shape)
print(y_train.shape)

## Modelo

In [None]:
class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        # SE DEFINEN LAS CAPAS A UTILIZAR, TANTO LAS CNN, FCL-ANN CY LAS DE POOLING
        self.conv1 = nn.Conv2d(1, 6, (3,5),stride=(1,1)) 
        self.pool = nn.MaxPool2d((2,2), stride=(2,2))
        self.conv2 = nn.Conv2d(6, 12, (3,5),stride=(1,1)) 
        self.fc1 = nn.Linear(12*3*6, 162)
        self.fc2 = nn.Linear(162, 84)
        self.fc3 = nn.Linear(84, 42)
        self.fc4 = nn.Linear(42, 20)
        self.fc5 = nn.Linear(20, 1)
        
        #Se pueden agregar mas capas o mas neurones, cambiar tamaños etc.. pero siempre debo terminar con
        # una salida del tamaño que busco

    def forward(self, x):
        #print("input ",x.shape)
        x = F.leaky_relu(self.conv1(x), negative_slope=0.01, inplace=False)
        #print("conv1 ",x.shape)
        x = self.pool(x)
        #print("pooling ",x.shape)
        
        
        #print("input ",x.shape)
        x = F.leaky_relu(self.conv2(x), negative_slope=0.01, inplace=False)
        #print("conv2 ",x.shape)
        x = self.pool(x)
        #print("pooling2 ",x.shape)
        
        
        x = x.view(-1,12*3*6)           
        #print("view: ",x.shape)
        x = F.leaky_relu(self.fc1(x), negative_slope=0.01, inplace=False)
        #print("fc1: ",x.shape)
        x = F.leaky_relu(self.fc2(x), negative_slope=0.01, inplace=False)
        #print("fc2: ",x.shape)
        x = F.leaky_relu(self.fc3(x), negative_slope=0.01, inplace=False)
        #print("fc3: ",x.shape)
        x = F.leaky_relu(self.fc4(x), negative_slope=0.01, inplace=False)
        #print("fc4: ",x.shape)
        x =self.fc5(x)
        #print("fc5: ",x.shape)
        #print("---------------------------------------------------------------")
        return x


model = ConvNet().to(device)

## Loss y Optimizer

In [None]:
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(),lr=learning_rate)

## Training Loop

In [None]:
#print(num_batches)
for epoch in range(num_epochs):
    for bt in range(num_batches):
        #print(batches["features"][bt].shape)
        #print(batches["labels"][bt].shape)
        y_hat = model(batches["features"][i])
        loss = criterion(y_hat,batches["labels"][i])
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        if bt+1 == num_batches and (epoch+1) % 100 == 0:
            print(f'epoch: [{epoch+1}/{num_epochs}], loss = {loss.item()}')
            
        

### Testing

In [None]:
# BASICO
print(X_test.shape)
print(y_test.shape)
print(batches_size)



In [None]:

batches_size_test = 5
num_batches_test = math.ceil(X_test.shape[0]/batches_size_test)
batches_test = batch_format(X_test,y_test,batches_size_test)
#for i in range(num_batches_test):
#    print(batches_test["features"][i].shape)
#    print(batches_test["labels"][i].shape)
#print(X_test.shape)
#print(y_test.shape)

y_pred = model(batches_test["features"][0])
y = batches_test["labels"][0]
print(y_pred)

print(y)

print()



In [None]:
tol = 100 # milisegundos de tolerancia
num_aciertos = 0
num_test = float(y_test.shape[0])
with torch.no_grad():
    for bt in range(num_batches_test):
        #print(batches["features"][bt].shape)
        #print(batches["labels"][bt].shape)
        y_pred_tensor = model(batches_test["features"][bt])
        y_tensor = batches_test["labels"][bt]
        for i in range(y_tensor.shape[0]):
            y_pred = y_pred_tensor[i].item()
            y = y_tensor[i].item()
            print(y, y_pred)
            if abs(y_pred-y) < tol:
                num_aciertos += 1 
            
    accuracy = num_aciertos/num_test

print(f'achuntes: [{num_aciertos}/{num_test}]')
print(f'accuracy: {accuracy*100}')
      
      

            