In [1]:
import numpy as np
import time

In [2]:
def readFile(fn):
    todo  = np.loadtxt(fn, delimiter=',')
    datos = todo[:,:-1].copy()
    resul = todo[:,-1].copy()
    return datos, resul

In [3]:
def normalizar(datos):
    f, c  = np.shape(datos)  # Filas, columnas
    media = np.zeros([1, c]) # Inicializar a 0 un array de 11
    std   = np.zeros([1, c]) # "
    
    media = np.sum(datos, axis=0)/f
    std   = np.sqrt(np.sum(np.power(np.subtract(datos, media[None]), 2), axis = 0)/float(f))
    
    datos_normalizados = (datos - media)/std
    
    print("Datos normalizados")
    print(datos.shape, media.shape, std.shape)
    print(datos_normalizados.shape)
    print("media, std: ", np.mean(datos_normalizados, axis = 0), np.std(datos_normalizados, axis = 0))
    
    return datos_normalizados

In [4]:
def predecir(w, b, datos):
    input_sigmoide  = np.matmul(datos, w) + b
    output_sigmoide = 1 / (1 + np.exp(-input_sigmoide))
    
    return output_sigmoide > 0.5

In [5]:
def sigmoide(w, b, datos):
    input_sigmoide  = np.matmul(datos, w) + b
    output_sigmoide = 1 / (1 + np.exp(-input_sigmoide))
    
    return output_sigmoide

In [6]:
def funcion_coste(res, pred_res, w, filas, lambda_reg):
    epsilon = 1e-5
    return (-1/filas)*np.sum(res*np.log(pred_res + epsilon) + (1-res)*np.log(1-pred_res + epsilon)) + lambda_reg/(2*filas)*np.sum(np.power(w,2))

In [7]:
def precision(w, b, datos, res):
    f, c     = np.shape(datos)
    pred_res = predecir(w, b, datos)
    correcto = 0
    for i in range(f):
        if res[i] == pred_res[i]:
            correcto += 1
    return correcto / float(f)

In [8]:
def entrenar(datos, res, iteraciones, aprendizaje, lambda_reg):
    f, c = np.shape(datos)
    np.random.seed(6)
    w  = np.random.random([c, ]) # Array de 11 aleatorios
    b  = 0
    dw = np.zeros([c, ]) # Array de 11 ceros
    db = 0
    for i in range(iteraciones):
        pred_res = sigmoide(w, b, datos)
        for j in range(c):
            dw[j] = (1/f)*np.sum((pred_res-res)*datos[:,j]) + (lambda_reg/f)*w[j]
            # datos[:,j] Te devuelve toda la columna j (1 millón de elementos)
        db = 1/f*np.sum(pred_res-res)
        w  = w - aprendizaje*dw
        b  = b - aprendizaje*db
        print(str(i) + ". cost function value: " + str(funcion_coste(res, pred_res, w, f, lambda_reg)))
        
    return w, b

In [9]:
t1 = time.time()
datos, resul = readFile("../../../datos/botnet_tot_syn_l.csv")
t2 = time.time()
print("Tiempo transcurrido: {} segundos.".format(t2 - t1))

Tiempo transcurrido: 12.174845457077026 segundos.


In [10]:
print(datos.shape, resul.shape)
t1 = time.time()
datos = normalizar(datos)
t2 = time.time()
print(" = = = = = = = = = ")
print(datos.shape, resul.shape)
print("Tiempo transcurrido: {} segundos.".format(t2 - t1))

(1000000, 11) (1000000,)
Datos normalizados
(1000000, 11) (11,) (11,)
(1000000, 11)
media, std:  [-6.16018147e-12  5.56872640e-13 -2.96104913e-12  1.11630100e-12
  8.05058467e-14 -2.31109477e-12 -5.81393492e-13 -3.18123951e-12
 -3.46714788e-13 -2.47027160e-12  4.12380815e-12] [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 = = = = = = = = = 
(1000000, 11) (1000000,)
Tiempo transcurrido: 0.677001953125 segundos.


In [11]:
t1 = time.time()
w, b = entrenar(datos, resul, 40, 0.5, 0.000002)
t2 = time.time()
print("Tiempo transcurrido: {} segundos.".format(t2 - t1))

t1 = time.time()
prec = precision(w, b, datos, resul)
t2 = time.time()
print("precisión:", prec)
print("Tiempo transcurrido: {} segundos.".format(t2 - t1))

0. cost function value: 1.0806564762364792
1. cost function value: 0.8612484482977357
2. cost function value: 0.7013177105814158
3. cost function value: 0.5872546677935642
4. cost function value: 0.5060611754671706
5. cost function value: 0.4475228927661015
6. cost function value: 0.4044264812458692
7. cost function value: 0.3719276432882331
8. cost function value: 0.3468220444232312
9. cost function value: 0.326981371478495
10. cost function value: 0.3109724803782283
11. cost function value: 0.29781280732476084
12. cost function value: 0.286815404880832
13. cost function value: 0.2774904330671418
14. cost function value: 0.26948183465695363
15. cost function value: 0.2625260217926492
16. cost function value: 0.25642447832554477
17. cost function value: 0.25102528108506583
18. cost function value: 0.2462104250233885
19. cost function value: 0.24188698526765534
20. cost function value: 0.23798085627929244
21. cost function value: 0.23443224917561187
22. cost function value: 0.2311924068