In [2]:
import numpy as np
import multiprocessing as mp

In [39]:
###########################################
############ Questões 1, 2 e 3 ############
###########################################

"""
Gera uma reta aleatória
"""
def generate_line():
    
    # Dimensões do poblema
    d = 2
    
    # Gera 2 pontos aleatórios entre 0 e 1 e os converte para
    # o intervalo entre -1 e 1
    p1 = np.random.rand(d) * 2 - 1
    p2 = np.random.rand(d) * 2 - 1
    
    # Calcula a reta na forma `y=ax + b` entre os pontos
    a = (p2[1] - p1[1]) / (p2[0] - p1[0])
    b = p1[1] - a*p1[0]

    return np.array([b, a, -1])

def generate_activation_function():
    
    # Dimensões do poblema
    d = 2
    
    # Gera 2 pontos aleatórios entre 0 e 1 e os converte para
    # o intervalo entre -1 e 1
    p1 = np.random.rand(d)
    p2 = np.random.rand(d)
    
    # Calcula a reta na forma `y=ax + b` entre os pontos
    a = (p2[1] - p1[1]) / (p2[0] - p1[0])
    b = p1[1] - a*p1[0]

    return lambda x: np.clip(a*x + b, 0, 1)


"""
Gera os dados baseados em uma reta
"""
def generate_data(n):
    
    # Gera 'n' pontos (x, y) aleatoriamente entre 0 e 1
    # e os converte para o intervalo -1 e 1
    points = (np.random.rand(n, 2) * 2) - 1
    
    return points

"""
Calcula os valores de saída da função ideal
"""
def calc_y(line, data):
    
    y = line[0] + np.dot(data, line[1:])
    
    # Transforma os valores em -1 ou 1
    # Valores em cima da reta são considerados -1
    y = np.where(y > 0, 1, -1)
    
    return y


"""

"""
def calc_prob_y(line, data):
    
    y = line[0] + np.dot(data, line[1:])
    
    # Transforma os valores em -1 ou 1
    # Valores em cima da reta são considerados -1
    # y = np.where(y > 0, 1, -1)
    
    return y

"""

"""
def calc_logistic_y(line, data):
    
    y = line[0] + np.dot(data, line[1:])
    
    # Transforma os valores em -1 ou 1
    # Valores em cima da reta são considerados -1
    # y = np.where(y > 0, 1, -1)
    
    return y

In [35]:
"""
Calcula a regressão linear
"""
def logistic_regression(x, y, l_rate=0.01, term_crit=0.01, max_epochs=5000):
    
    it = 0
    diff = float('inf')
    prev_w = np.zeros(3)
    w = prev_w
    
    indices = np.arange(y.shape[0])
    
    m_x = np.concatenate((np.ones((x.shape[0], 1)), x), axis=1)
    
    while diff > term_crit and it < max_epochs:
        
#         indices = np.arange(y.shape[0])
        np.random.shuffle(indices)
        
        for n in indices:
            
            prev_w = w
            
            yn = y[n]
            xn = m_x[n]
            
            grad = -xn * (yn / (1 + np.exp( yn * np.dot(w, xn) )))
            
            w = prev_w - l_rate * grad
            
            temp = w - prev_w
            
            it += 1
            diff = abs( np.sqrt(temp.dot(temp)) )
        
    return [w, it]

In [38]:
"""
Calcula o E_out para uma função linear
"""
def calc_log_e_out(line, w, act_f):
    
    # Gera mil pontos para serem avaliados
    ev_data = generate_data(1000)
    
    # Saída ideal
    ev_f_y = calc_y(line, ev_data)
    
    # Saída gerada pela g(x)
#     ev_g_y =calc_y(w, ev_data)
    ev_g_y = calc_logistic_y(w, ev_data)
    
#     print(ev_g_y)
    
    e_out = ev_f_y * np.log(1 / ev_g_y) + (1 - ev_f_y) * np.log(1 / (1 - ev_g_y))
    
    # Conta todos os pontos em que as saídas não foram iguais
#     misclassified = np.count_nonzero(ev_f_y - ev_g_y)

    # Calcula a porcentagem dos pontos classificados erroneamente
    return np.sum(e_out)

In [40]:
###########################################
############### Experimentos ##############
###########################################

def experiment_1(N):
    
    np.random.seed()
    
    line = generate_line()
    act_f = generate_activation_function()

    x = generate_data(N)
    y = calc_y(line, x)
    
    w_log = logistic_regression(x, y)[0]
    
    e_out = calc_log_e_out(line, w_log, act_f)
    
    return e_out


def experiment_2(N):
    
    np.random.seed()
    
    line = generate_line()

    x = generate_data(N)
    y = calc_y(line, x)
    
    w_log = logistic_regression(x, y)[1]
    
    return w_log
    
#     y_g = calc_y(w_lin, x)

#     e_in = np.count_nonzero( y - y_g ) / y.shape[0]
#     e_out = calc_linear_e_out(line, w_lin)
    
#     return [e_in, e_out, it, it2]



In [7]:
"""
Executa um certo número de experimentos paralelamente
Caso o número de processos não seja espeficidado,
o multiprocessing utiliza o valor padrão,
que costuma ser o número de processadores
"""
def run_experiment(N, num_exp, exp_id, processes=None):
    pool = mp.Pool(processes)
    
    function = None
    
    if exp_id == 1:
        function = experiment_1
    elif exp_id == 2:
        function = experiment_2
    else:
        print('Invalid experiment!')
        return None
    
    # Executa os experimentos 'num_exp' vezes, passando como
    # parâmetro para cada um, o número de dados N a serem gerados
    results = np.array(pool.map(function, [N] * num_exp))
    
    pool.close()
    
    # Calcula a média dos resultados por coluna
    return np.mean(results, axis=0)

In [36]:
run_experiment(100, 1, 1)

[-0.005       0.00213255 -0.00071348]
[-0.00998359  0.00426604 -0.00525812]
[ -1.49556557e-02   6.49351270e-05  -9.79246309e-03]
[-0.00993732  0.00295111 -0.01367814]
[-0.01493391  0.00627254 -0.00982975]
[-0.0198936   0.00745226 -0.00967699]
[-0.02482286  0.01142632 -0.01089818]
[-0.01978665  0.01122666 -0.01588545]
[-0.01475717  0.01455313 -0.01606611]
[-0.0097358   0.0151158  -0.01749679]
[-0.01465053  0.01997073 -0.02014972]
[-0.01959996  0.02000759 -0.02148261]
[-0.02454825  0.01739853 -0.02416317]
[-0.01951557  0.02038681 -0.02440185]
[-0.02439776  0.02205187 -0.02853598]
[-0.01937114  0.02046832 -0.03218157]
[-0.02422766  0.02257499 -0.03658159]
[-0.02918461  0.01880871 -0.03795621]
[-0.02416154  0.02151559 -0.03925544]
[-0.02903835  0.02113049 -0.04258751]
[-0.03388105  0.02565973 -0.0441954 ]
[-0.02885714  0.03054651 -0.0441225 ]
[-0.03363292  0.0332594  -0.0488354 ]
[-0.0286029   0.03506583 -0.04983274]
[-0.03336887  0.03725246 -0.05451811]
[-0.02840461  0.04068452 -0.0565130



nan

In [9]:
run_experiment(100, 100, 2)

100.0