In [63]:
%matplotlib notebook

In [64]:
import pyspark

sc = pyspark.sql.SparkSession.Builder().getOrCreate()

## Levanto los archivos

In [65]:
import json
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import KFold, GroupKFold
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import ParameterGrid
from IPython.display import display, HTML
import math



## Utils

In [66]:
def distance(p1, p2):
    return math.sqrt((p1[0]-p2[0])**2+(p1[1]-p2[1])**2)


global x_0
global y_0

x_0 = 462385.503783397
y_0 = 6109042.35153865

def normalize_positions(row):
    row['x'] = row['x']-x_0
    row['y'] = row['y']-y_0
    return row



In [67]:
points_recep = sc.read.json('datos/train-test-by-emission.jsonlines/').rdd

In [68]:
def group_emisions(data):
    data_dict = data.asDict()
    recep_0 = data_dict.pop('recep_0')
    recep_1 = data_dict.pop('recep_1')
    recep_2 = data_dict.pop('recep_2')
    recep_3 = data_dict.pop('recep_3')
    
    data_dict['emissions'] = []
    for i in range(24):
        data_dict['emissions'].append([recep_0[i], recep_1[i], recep_2[i], recep_3[i]])
    return data_dict


In [69]:
points_emisions = points_recep.map(group_emisions)


In [70]:
def expand_rows_with_emissions(row):
    emissions = row.pop('emissions')
    print(row)
    rows = []
    for e in emissions:
        new_row = row.copy()
        new_row['recep'] = e
        rows.append(new_row)
    return rows
        
all_emissions = points_emisions.flatMap(expand_rows_with_emissions)

### Genero los atributos y etiquetas que me interesan

In [71]:
def generate_attrs(row):
    data = {
        'antenna_0': row['recep'][0],
        'antenna_1': row['recep'][1],
        'antenna_2': row['recep'][2],
        'antenna_3': row['recep'][3],
    }
    return {'data': data, 'x': row['x'], 'y': row['y'], 'point': row['Punto']}
    
all_emissions = all_emissions.map(generate_attrs)

### Saco las emisiones sin ninguna recepcion

In [72]:
all_emissions = all_emissions.filter(lambda x: sum(x['data'].values())>0)

### Normalizo coordenadas

In [73]:
all_emissions = all_emissions.map(lambda x: normalize_positions(x))

## Redes neuronales para regresion

In [74]:
def get_regressor_mae(predictions, real):
    mae = 0
    for i in range(len(predictions)):
        mae += distance(predictions[i], real[i])
    mae = mae/len(predictions)
    return mae


In [75]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.autograd import Variable

In [76]:
regre_target = pd.DataFrame(all_emissions.map(lambda x: [x['x'], x['y']]).collect())
regre_data = pd.DataFrame(all_emissions.map(lambda x: x['data']).collect())

regre_data_np = np.array(regre_data.astype(float))
regre_target_np = np.array(regre_target.astype(float))

In [77]:
class LinearRegressionModel(nn.Module):

    def __init__(self, input_dim, output_dim):
        super(LinearRegressionModel, self).__init__() 
        self.linear = nn.Linear(input_dim, output_dim, bias=True)
        
    def forward(self, x):
        # Here the forward pass is simply a linear function
        out = self.linear(x)
        return out



In [78]:
class BestModel(nn.Module):

    def __init__(self, input_dim, hidden_list, output_dim):

        super(BestModel, self).__init__() 
        # Calling Super Class's constructor
        self.linear1 = nn.Linear(input_dim, hidden_dim[0], bias=True)
        self.act = nn.Sigmoid()
        self.linear2 = nn.Linear(hidden_dim[-1], output_dim, bias=True)
        # nn.linear is defined in nn.Module

    def forward(self, x):
        # Here the forward pass is simply a linear function
        out = self.linear1(x)
        out = self.act(out)
        out = self.linear2(out)
        return out


In [102]:
MODEL_PATH = 'datos/tmp/nn-model-2h'
def k_cross_validation(model_builder, data, target, k=5, init_from_file = False, save_in_file = False):   
    kf = KFold(n_splits=k, shuffle=True)
    mae_list = []
    mae_list_train = []
    k_index = 0
        
    for train_index, test_index in kf.split(data):
        params = model_builder()
        model = params['model']
        if init_from_file:
            model.load_state_dict(torch.load(MODEL_PATH))
        l_rate = params['l_rate']
        optimiser = params['optimiser']
        criterion = params['criterion']
        epochs = params['epochs']

        X_train, X_test = data[train_index], data[test_index]
        y_train, y_test = target[train_index], target[test_index]
        
        for epoch in range(epochs):

            epoch +=1
            #increase the number of epochs by 1 every time
            inputs = Variable(torch.from_numpy(X_train))
            labels = Variable(torch.from_numpy(y_train))

            inputs = torch.tensor(inputs, dtype=torch.float)
            labels = torch.tensor(labels, dtype=torch.float)

            #clear grads as discussed in prev post
            optimiser.zero_grad()
            #forward to get predicted values
            outputs = model.forward(inputs)
            loss = criterion(outputs, labels)
            loss.backward()# back props
            optimiser.step()# update the parameters
            if k_index == 0 and epoch < 1000: print('epoch {}, loss {}'.format(epoch,loss.item()))
            if k_index == 0 and epoch > 1000 and random.randint(0, 100) == 1: print('epoch {}, loss {}'.format(epoch,loss.item()))

        print(loss.data)
        
        test = Variable(torch.from_numpy(X_test))
        test = test.clone().detach().float()
        predicted = model.forward(test).data.numpy()
        mae = get_regressor_mae(predictions=predicted, real=y_test)
        print(mae)
        
        train = Variable(torch.from_numpy(X_train))
        train = train.clone().detach().float()
        predicted_train = model.forward(train).data.numpy()
        mae_train = get_regressor_mae(predictions=predicted_train, real=y_train)
        print(mae_train)
        
        mae_list.append(mae)
        mae_list_train.append(mae_train)
        k_index += 1
        if save_in_file: torch.save(model.state_dict(), MODEL_PATH)
        

    return mae_list, mae_list_train


In [112]:
def build_nn_params():
    #model = LinearRegressionModel(4,2)
    #model = BestModel(4, 4, 2)
    model = BestModel()
    criterion = nn.MSELoss()
    l_rate = 0.00000001
    optimiser = torch.optim.SGD(model.parameters(), lr = l_rate) 
    epochs = 400000
    
    return {
        'model': model,
        'l_rate': l_rate,
        'optimiser': optimiser,
        'criterion': criterion,
        'epochs': epochs
    }

In [113]:
class BestModel(nn.Module):

    def __init__(self):

        super(BestModel, self).__init__() 
        # Calling Super Class's constructor
        self.linear1 = nn.Linear(4, 8, bias=True)
        self.linear2 = nn.Linear(8, 4, bias=True)
        self.linear3 = nn.Linear(4, 2, bias=True)
        self.act = nn.ReLU()

    def forward(self, x):
        x = self.act(self.linear1(x))
        x = self.act(self.linear2(x))
        x = self.linear3(x)             
        return x


In [114]:
k_cross_validation(build_nn_params, regre_data_np, regre_target_np, k=5, init_from_file=True, save_in_file=True)



epoch 1, loss 84708.84375
epoch 2, loss 82908.8359375
epoch 3, loss 81677.6328125
epoch 4, loss 80848.5546875
epoch 5, loss 80288.5546875
epoch 6, loss 79909.8046875
epoch 7, loss 79657.203125
epoch 8, loss 79486.7734375
epoch 9, loss 79374.6015625
epoch 10, loss 79299.7578125
epoch 11, loss 79248.640625
epoch 12, loss 79213.359375
epoch 13, loss 79187.921875
epoch 14, loss 79168.8515625
epoch 15, loss 79154.3359375
epoch 16, loss 79142.328125
epoch 17, loss 79131.890625
epoch 18, loss 79122.953125
epoch 19, loss 79114.9140625
epoch 20, loss 79107.6953125
epoch 21, loss 79100.8125
epoch 22, loss 79094.3515625
epoch 23, loss 79087.9609375
epoch 24, loss 79081.78125
epoch 25, loss 79075.90625
epoch 26, loss 79070.2890625
epoch 27, loss 79064.8828125
epoch 28, loss 79059.71875
epoch 29, loss 79054.65625
epoch 30, loss 79049.78125
epoch 31, loss 79045.1015625
epoch 32, loss 79040.5
epoch 33, loss 79036.078125
epoch 34, loss 79031.7734375
epoch 35, loss 79027.6171875
epoch 36, loss 79023.55

epoch 386, loss 78721.265625
epoch 387, loss 78720.9140625
epoch 388, loss 78720.5546875
epoch 389, loss 78720.1953125
epoch 390, loss 78719.84375
epoch 391, loss 78719.4921875
epoch 392, loss 78719.140625
epoch 393, loss 78718.796875
epoch 394, loss 78718.4453125
epoch 395, loss 78718.09375
epoch 396, loss 78717.75
epoch 397, loss 78717.3984375
epoch 398, loss 78717.0546875
epoch 399, loss 78716.71875
epoch 400, loss 78716.3828125
epoch 401, loss 78716.046875
epoch 402, loss 78715.703125
epoch 403, loss 78715.3046875
epoch 404, loss 78714.90625
epoch 405, loss 78714.5234375
epoch 406, loss 78714.1484375
epoch 407, loss 78713.7734375
epoch 408, loss 78713.421875
epoch 409, loss 78713.0625
epoch 410, loss 78712.7109375
epoch 411, loss 78712.3671875
epoch 412, loss 78712.015625
epoch 413, loss 78711.671875
epoch 414, loss 78711.328125
epoch 415, loss 78710.984375
epoch 416, loss 78710.640625
epoch 417, loss 78710.296875
epoch 418, loss 78709.9453125
epoch 419, loss 78709.6015625
epoch 42

epoch 674, loss 78649.953125
epoch 675, loss 78649.7890625
epoch 676, loss 78649.625
epoch 677, loss 78649.4609375
epoch 678, loss 78649.296875
epoch 679, loss 78649.1328125
epoch 680, loss 78648.96875
epoch 681, loss 78648.8046875
epoch 682, loss 78648.6484375
epoch 683, loss 78648.484375
epoch 684, loss 78648.328125
epoch 685, loss 78648.1640625
epoch 686, loss 78648.0
epoch 687, loss 78647.84375
epoch 688, loss 78647.6875
epoch 689, loss 78647.5234375
epoch 690, loss 78647.3671875
epoch 691, loss 78647.2109375
epoch 692, loss 78647.046875
epoch 693, loss 78646.890625
epoch 694, loss 78646.734375
epoch 695, loss 78646.578125
epoch 696, loss 78646.421875
epoch 697, loss 78646.265625
epoch 698, loss 78646.109375
epoch 699, loss 78645.953125
epoch 700, loss 78645.796875
epoch 701, loss 78645.640625
epoch 702, loss 78645.484375
epoch 703, loss 78645.328125
epoch 704, loss 78645.171875
epoch 705, loss 78645.015625
epoch 706, loss 78644.859375
epoch 707, loss 78644.703125
epoch 708, loss 7

epoch 1140, loss 78596.8828125
epoch 1165, loss 78595.265625
epoch 1206, loss 78592.6875
epoch 1370, loss 78583.25
epoch 1415, loss 78580.9296875
epoch 1535, loss 78574.609375
epoch 1636, loss 78569.6484375
epoch 1672, loss 78567.953125
epoch 1716, loss 78565.875
epoch 1740, loss 78564.78125
epoch 1916, loss 78557.1328125
epoch 2126, loss 78549.4375
epoch 2336, loss 78542.4296875
epoch 2416, loss 78539.8515625
epoch 2728, loss 78530.7734375
epoch 2866, loss 78527.2421875
epoch 2872, loss 78527.078125
epoch 3145, loss 78521.3203125
epoch 3164, loss 78520.9765625
epoch 3300, loss 78518.625
epoch 3540, loss 78514.8203125
epoch 3636, loss 78513.375
epoch 3660, loss 78513.0234375
epoch 3919, loss 78509.328125
epoch 3936, loss 78509.09375
epoch 3982, loss 78508.5078125
epoch 4052, loss 78507.6328125
epoch 4124, loss 78506.75
epoch 4142, loss 78506.53125
epoch 4149, loss 78506.4453125
epoch 4297, loss 78504.7578125
epoch 4365, loss 78504.0234375
epoch 4489, loss 78502.7109375
epoch 4566, loss

epoch 29687, loss 78361.40625
epoch 30038, loss 78360.3125
epoch 30043, loss 78360.296875
epoch 30110, loss 78360.09375
epoch 30137, loss 78360.0078125
epoch 30344, loss 78359.3828125
epoch 30366, loss 78359.3125
epoch 30399, loss 78359.21875
epoch 30486, loss 78358.9453125
epoch 30725, loss 78358.1875
epoch 30815, loss 78357.9140625
epoch 31012, loss 78357.3203125
epoch 31477, loss 78355.953125
epoch 31556, loss 78355.7265625
epoch 31586, loss 78355.6484375
epoch 31665, loss 78355.421875
epoch 31893, loss 78354.7734375
epoch 31979, loss 78354.53125
epoch 31984, loss 78354.5234375
epoch 32298, loss 78353.609375
epoch 32372, loss 78353.375
epoch 32421, loss 78353.2109375
epoch 32453, loss 78353.1015625
epoch 32606, loss 78352.6015625
epoch 32744, loss 78352.15625
epoch 32789, loss 78352.0078125
epoch 32904, loss 78351.640625
epoch 32940, loss 78351.5234375
epoch 32977, loss 78351.4140625
epoch 33236, loss 78350.609375
epoch 33370, loss 78350.203125
epoch 33501, loss 78349.8046875
epoch 

epoch 57551, loss 78146.7578125
epoch 57573, loss 78146.6484375
epoch 57675, loss 78146.125
epoch 58040, loss 78144.2734375
epoch 58220, loss 78143.3515625
epoch 58266, loss 78143.09375
epoch 58359, loss 78142.5859375
epoch 58362, loss 78142.5703125
epoch 58429, loss 78142.2109375
epoch 58453, loss 78142.078125
epoch 58458, loss 78142.0546875
epoch 58460, loss 78142.0390625
epoch 58482, loss 78141.9296875
epoch 58509, loss 78141.78125
epoch 58600, loss 78141.3046875
epoch 58669, loss 78140.9453125
epoch 58721, loss 78140.6796875
epoch 58828, loss 78140.140625
epoch 58880, loss 78139.875
epoch 59103, loss 78138.7109375
epoch 59172, loss 78138.359375
epoch 59333, loss 78137.5703125
epoch 59592, loss 78136.2109375
epoch 59603, loss 78136.1484375
epoch 59630, loss 78136.0078125
epoch 59840, loss 78134.921875
epoch 59990, loss 78134.171875
epoch 60030, loss 78133.9453125
epoch 60052, loss 78133.828125
epoch 60130, loss 78133.390625
epoch 60156, loss 78133.25
epoch 60184, loss 78133.09375
ep

epoch 83188, loss 78022.921875
epoch 83861, loss 78019.75
epoch 83911, loss 78019.59375
epoch 84109, loss 78019.0078125
epoch 84122, loss 78018.96875
epoch 84150, loss 78018.890625
epoch 84243, loss 78018.625
epoch 84281, loss 78018.5234375
epoch 84396, loss 78018.1953125
epoch 84503, loss 78017.90625
epoch 84582, loss 78017.6875
epoch 84704, loss 78017.3515625
epoch 84787, loss 78017.125
epoch 84807, loss 78017.0703125
epoch 84923, loss 78016.7578125
epoch 84987, loss 78016.5859375
epoch 85137, loss 78016.1875
epoch 85139, loss 78016.1875
epoch 85168, loss 78016.109375
epoch 85354, loss 78015.625
epoch 85399, loss 78015.5078125
epoch 85459, loss 78015.359375
epoch 85488, loss 78015.28125
epoch 85768, loss 78014.578125
epoch 85882, loss 78014.3046875
epoch 86072, loss 78013.875
epoch 86211, loss 78013.5546875
epoch 86217, loss 78013.5390625
epoch 86478, loss 78012.9609375
epoch 86542, loss 78012.8125
epoch 86592, loss 78012.7109375
epoch 86677, loss 78012.515625
epoch 86679, loss 78012

epoch 111075, loss 77967.15625
epoch 111088, loss 77967.140625
epoch 111089, loss 77967.1328125
epoch 111185, loss 77967.0
epoch 111395, loss 77966.703125
epoch 111434, loss 77966.65625
epoch 111502, loss 77966.5546875
epoch 111632, loss 77966.375
epoch 111707, loss 77966.2734375
epoch 111790, loss 77966.15625
epoch 111884, loss 77966.0234375
epoch 111957, loss 77965.921875
epoch 112060, loss 77965.78125
epoch 112073, loss 77965.765625
epoch 112152, loss 77965.65625
epoch 112219, loss 77965.5625
epoch 112676, loss 77964.9375
epoch 112701, loss 77964.90625
epoch 112834, loss 77964.7265625
epoch 113033, loss 77964.453125
epoch 113054, loss 77964.421875
epoch 113132, loss 77964.3203125
epoch 113181, loss 77964.25
epoch 113330, loss 77964.046875
epoch 113373, loss 77963.9921875
epoch 113494, loss 77963.828125
epoch 113668, loss 77963.5859375
epoch 113677, loss 77963.578125
epoch 113805, loss 77963.40625
epoch 114069, loss 77963.046875
epoch 114212, loss 77962.8515625
epoch 114450, loss 779

epoch 137378, loss 77934.921875
epoch 137391, loss 77934.90625
epoch 137460, loss 77934.8359375
epoch 137595, loss 77934.6953125
epoch 137685, loss 77934.6015625
epoch 137915, loss 77934.3671875
epoch 138020, loss 77934.2578125
epoch 138027, loss 77934.25
epoch 138060, loss 77934.21875
epoch 138167, loss 77934.109375
epoch 138410, loss 77933.859375
epoch 138415, loss 77933.859375
epoch 138529, loss 77933.7421875
epoch 138660, loss 77933.609375
epoch 138767, loss 77933.5
epoch 138879, loss 77933.3828125
epoch 139084, loss 77933.171875
epoch 139105, loss 77933.1484375
epoch 139154, loss 77933.1015625
epoch 139168, loss 77933.09375
epoch 139264, loss 77932.9921875
epoch 139429, loss 77932.8203125
epoch 139672, loss 77932.578125
epoch 139857, loss 77932.390625
epoch 139860, loss 77932.3828125
epoch 140030, loss 77932.2109375
epoch 140087, loss 77932.15625
epoch 140102, loss 77932.140625
epoch 140104, loss 77932.1328125
epoch 140128, loss 77932.109375
epoch 140210, loss 77932.03125
epoch 14

epoch 162785, loss 77908.890625
epoch 162795, loss 77908.875
epoch 162813, loss 77908.859375
epoch 162899, loss 77908.7734375
epoch 162965, loss 77908.7109375
epoch 163001, loss 77908.671875
epoch 163076, loss 77908.6015625
epoch 163340, loss 77908.3359375
epoch 163357, loss 77908.3203125
epoch 163365, loss 77908.3125
epoch 163933, loss 77907.75
epoch 163939, loss 77907.7421875
epoch 163977, loss 77907.7109375
epoch 164187, loss 77907.5
epoch 164188, loss 77907.5
epoch 164191, loss 77907.5
epoch 164279, loss 77907.4140625
epoch 164326, loss 77907.3671875
epoch 164619, loss 77907.078125
epoch 164637, loss 77907.0546875
epoch 164984, loss 77906.71875
epoch 164987, loss 77906.7109375
epoch 165077, loss 77906.6328125
epoch 165098, loss 77906.609375
epoch 165376, loss 77906.3359375
epoch 165436, loss 77906.2734375
epoch 165497, loss 77906.21875
epoch 165527, loss 77906.1875
epoch 165581, loss 77906.1328125
epoch 165843, loss 77905.875
epoch 166081, loss 77905.6484375
epoch 166112, loss 7790

epoch 187427, loss 77882.2265625
epoch 187677, loss 77881.96875
epoch 187977, loss 77881.6640625
epoch 188004, loss 77881.6328125
epoch 188033, loss 77881.609375
epoch 188119, loss 77881.5234375
epoch 188188, loss 77881.453125
epoch 188318, loss 77881.2890625
epoch 188437, loss 77881.1171875
epoch 188455, loss 77881.09375
epoch 188767, loss 77880.7265625
epoch 188792, loss 77880.6953125
epoch 188963, loss 77880.5078125
epoch 189089, loss 77880.3671875
epoch 189112, loss 77880.3359375
epoch 189137, loss 77880.3125
epoch 189364, loss 77880.0625
epoch 189455, loss 77879.96875
epoch 189581, loss 77879.828125
epoch 189711, loss 77879.6875
epoch 189713, loss 77879.6953125
epoch 189745, loss 77879.65625
epoch 189794, loss 77879.609375
epoch 189845, loss 77879.5546875
epoch 189901, loss 77879.5
epoch 189958, loss 77879.4453125
epoch 189995, loss 77879.3984375
epoch 190348, loss 77879.046875
epoch 190365, loss 77879.03125
epoch 190481, loss 77878.9140625
epoch 190606, loss 77878.7890625
epoch 1

epoch 216244, loss 77855.6484375
epoch 216599, loss 77855.3671875
epoch 216685, loss 77855.3046875
epoch 216888, loss 77855.1328125
epoch 216973, loss 77855.0625
epoch 217036, loss 77855.0078125
epoch 217073, loss 77854.9765625
epoch 217189, loss 77854.8828125
epoch 217417, loss 77854.6953125
epoch 217502, loss 77854.625
epoch 217531, loss 77854.6015625
epoch 217586, loss 77854.5546875
epoch 217631, loss 77854.5234375
epoch 217832, loss 77854.359375
epoch 217841, loss 77854.3515625
epoch 217994, loss 77854.2265625
epoch 218061, loss 77854.1796875
epoch 218183, loss 77854.078125
epoch 218416, loss 77853.890625
epoch 218549, loss 77853.7890625
epoch 218800, loss 77853.5859375
epoch 218831, loss 77853.5625
epoch 218905, loss 77853.5078125
epoch 219010, loss 77853.421875
epoch 219049, loss 77853.3984375
epoch 219052, loss 77853.390625
epoch 219056, loss 77853.390625
epoch 219148, loss 77853.3125
epoch 219220, loss 77853.2578125
epoch 219255, loss 77853.234375
epoch 219259, loss 77853.22656

epoch 242440, loss 77835.8828125
epoch 242490, loss 77835.84375
epoch 242505, loss 77835.8359375
epoch 242510, loss 77835.828125
epoch 242515, loss 77835.8203125
epoch 242652, loss 77835.71875
epoch 242758, loss 77835.6328125
epoch 242766, loss 77835.6328125
epoch 242975, loss 77835.4609375
epoch 243224, loss 77835.2578125
epoch 243356, loss 77835.1484375
epoch 243430, loss 77835.0859375
epoch 243601, loss 77834.953125
epoch 243659, loss 77834.90625
epoch 244117, loss 77834.546875
epoch 244146, loss 77834.53125
epoch 244152, loss 77834.5234375
epoch 244405, loss 77834.328125
epoch 244442, loss 77834.296875
epoch 244598, loss 77834.1796875
epoch 244605, loss 77834.1796875
epoch 244913, loss 77833.9375
epoch 245066, loss 77833.8203125
epoch 245171, loss 77833.7421875
epoch 245449, loss 77833.5390625
epoch 245470, loss 77833.5234375
epoch 245490, loss 77833.5078125
epoch 245560, loss 77833.453125
epoch 245626, loss 77833.40625
epoch 245649, loss 77833.390625
epoch 245781, loss 77833.29687

epoch 269636, loss 77815.140625
epoch 269709, loss 77815.09375
epoch 269972, loss 77814.9140625
epoch 270003, loss 77814.890625
epoch 270012, loss 77814.8828125
epoch 270199, loss 77814.7578125
epoch 270237, loss 77814.734375
epoch 270268, loss 77814.7109375
epoch 270325, loss 77814.671875
epoch 270437, loss 77814.6015625
epoch 270492, loss 77814.5625
epoch 270641, loss 77814.453125
epoch 270652, loss 77814.4453125
epoch 270667, loss 77814.4375
epoch 270812, loss 77814.3359375
epoch 270935, loss 77814.2578125
epoch 271073, loss 77814.1640625
epoch 271144, loss 77814.1171875
epoch 271423, loss 77813.9375
epoch 271504, loss 77813.8828125
epoch 271543, loss 77813.859375
epoch 271579, loss 77813.8359375
epoch 271585, loss 77813.828125
epoch 271777, loss 77813.703125
epoch 271891, loss 77813.625
epoch 271930, loss 77813.6015625
epoch 272087, loss 77813.5
epoch 272091, loss 77813.5
epoch 272178, loss 77813.4453125
epoch 272318, loss 77813.3515625
epoch 272399, loss 77813.3046875
epoch 272967

epoch 294470, loss 77798.9296875
epoch 294491, loss 77798.9140625
epoch 294540, loss 77798.8828125
epoch 294784, loss 77798.7421875
epoch 294853, loss 77798.6953125
epoch 294968, loss 77798.625
epoch 295009, loss 77798.6015625
epoch 295078, loss 77798.5625
epoch 295115, loss 77798.5390625
epoch 295212, loss 77798.484375
epoch 295259, loss 77798.453125
epoch 295462, loss 77798.328125
epoch 295654, loss 77798.21875
epoch 295668, loss 77798.2109375
epoch 295756, loss 77798.1484375
epoch 295939, loss 77798.046875
epoch 296096, loss 77797.9453125
epoch 296165, loss 77797.90625
epoch 296331, loss 77797.8046875
epoch 296403, loss 77797.765625
epoch 296479, loss 77797.71875
epoch 296546, loss 77797.6796875
epoch 296781, loss 77797.5390625
epoch 297077, loss 77797.359375
epoch 297089, loss 77797.3515625
epoch 297144, loss 77797.3203125
epoch 297224, loss 77797.2734375
epoch 297252, loss 77797.2578125
epoch 297337, loss 77797.203125
epoch 297419, loss 77797.15625
epoch 297682, loss 77797.0078125

epoch 320229, loss 77784.5
epoch 320558, loss 77784.3203125
epoch 320577, loss 77784.3125
epoch 320708, loss 77784.2421875
epoch 320745, loss 77784.21875
epoch 320805, loss 77784.1875
epoch 320905, loss 77784.1328125
epoch 320906, loss 77784.1328125
epoch 320920, loss 77784.125
epoch 320960, loss 77784.1015625
epoch 321057, loss 77784.0546875
epoch 321059, loss 77784.046875
epoch 321224, loss 77783.9609375
epoch 321425, loss 77783.8515625
epoch 321792, loss 77783.65625
epoch 321832, loss 77783.6328125
epoch 321978, loss 77783.5546875
epoch 322068, loss 77783.5078125
epoch 322093, loss 77783.4921875
epoch 322431, loss 77783.3125
epoch 322499, loss 77783.2734375
epoch 322522, loss 77783.265625
epoch 322680, loss 77783.1796875
epoch 322769, loss 77783.1328125
epoch 323066, loss 77782.9765625
epoch 323080, loss 77782.96875
epoch 323153, loss 77782.9296875
epoch 323174, loss 77782.921875
epoch 323213, loss 77782.8984375
epoch 323226, loss 77782.890625
epoch 323262, loss 77782.875
epoch 3233

epoch 345225, loss 77771.8125
epoch 345267, loss 77771.796875
epoch 345401, loss 77771.734375
epoch 345443, loss 77771.7109375
epoch 345595, loss 77771.640625
epoch 345676, loss 77771.6015625
epoch 345734, loss 77771.578125
epoch 345810, loss 77771.5390625
epoch 345824, loss 77771.53125
epoch 345983, loss 77771.453125
epoch 346172, loss 77771.3671875
epoch 346184, loss 77771.359375
epoch 346422, loss 77771.2421875
epoch 346487, loss 77771.2109375
epoch 346504, loss 77771.2109375
epoch 346522, loss 77771.1953125
epoch 346536, loss 77771.1875
epoch 346764, loss 77771.078125
epoch 346787, loss 77771.0703125
epoch 347030, loss 77770.953125
epoch 347278, loss 77770.8359375
epoch 347284, loss 77770.8359375
epoch 347374, loss 77770.7890625
epoch 347560, loss 77770.703125
epoch 347629, loss 77770.671875
epoch 347643, loss 77770.6640625
epoch 347655, loss 77770.65625
epoch 347677, loss 77770.6484375
epoch 347687, loss 77770.640625
epoch 347728, loss 77770.625
epoch 347840, loss 77770.5703125
ep

epoch 371049, loss 77759.8515625
epoch 371062, loss 77759.84375
epoch 371169, loss 77759.796875
epoch 371259, loss 77759.7578125
epoch 371321, loss 77759.7265625
epoch 371405, loss 77759.6875
epoch 371519, loss 77759.640625
epoch 371660, loss 77759.578125
epoch 371725, loss 77759.546875
epoch 371744, loss 77759.5390625
epoch 371971, loss 77759.4375
epoch 371996, loss 77759.421875
epoch 372023, loss 77759.4140625
epoch 372250, loss 77759.3125
epoch 372289, loss 77759.2890625
epoch 372325, loss 77759.28125
epoch 372380, loss 77759.25
epoch 372403, loss 77759.2421875
epoch 372680, loss 77759.1171875
epoch 372738, loss 77759.0859375
epoch 372957, loss 77758.9921875
epoch 373008, loss 77758.9765625
epoch 373098, loss 77758.9296875
epoch 373356, loss 77758.8125
epoch 373373, loss 77758.8046875
epoch 373524, loss 77758.7421875
epoch 373594, loss 77758.7109375
epoch 373612, loss 77758.703125
epoch 373648, loss 77758.6875
epoch 373819, loss 77758.609375
epoch 374183, loss 77758.4453125
epoch 37

epoch 396066, loss 77748.2734375
epoch 396353, loss 77748.1484375
epoch 396359, loss 77748.140625
epoch 396381, loss 77748.1328125
epoch 396484, loss 77748.0859375
epoch 396699, loss 77747.9921875
epoch 396895, loss 77747.90625
epoch 397077, loss 77747.8203125
epoch 397221, loss 77747.765625
epoch 397361, loss 77747.6953125
epoch 397382, loss 77747.6875
epoch 397533, loss 77747.625
epoch 397588, loss 77747.6015625
epoch 397741, loss 77747.53125
epoch 397756, loss 77747.5234375
epoch 397767, loss 77747.515625
epoch 397841, loss 77747.484375
epoch 397853, loss 77747.4765625
epoch 397931, loss 77747.4453125
epoch 398075, loss 77747.3828125
epoch 398314, loss 77747.2734375
epoch 398394, loss 77747.2421875
epoch 398449, loss 77747.21875
epoch 398694, loss 77747.109375
epoch 398752, loss 77747.0859375
epoch 399051, loss 77746.953125
epoch 399093, loss 77746.9296875
epoch 399132, loss 77746.9140625
epoch 399144, loss 77746.90625
epoch 399233, loss 77746.8671875
epoch 399372, loss 77746.8125
e

KeyboardInterrupt: 

## Redes neuronales para clasificacion

In [113]:
global dict_coordenadas
dict_coordenadas = points_recep.map(lambda x: (x['Punto'],(x['x'], x['y']))).collectAsMap()

def get_classifier_mae(predictions, real):
    sum_error = 0
    count = 0
    for i in range(len(predictions)):
        if predictions[i] not in dict_coordenadas:
            print('predicted point dont exist {}'.format(predictions[i]))
            continue
        pred_position = dict_coordenadas[predictions[i]]
        real_position = dict_coordenadas[real[i]]
        sum_error += distance(pred_position, real_position)
        count += 1
    return sum_error/count

In [111]:
class LinearClassificationModel(nn.Module):

    def __init__(self, input_dim, output_dim):
        super(LinearClassificationModel, self).__init__() 
        self.linear = nn.Linear(input_dim, output_dim, bias=True)
        self.logprob = nn.LogSoftmax(dim=1)                 # -Log(Softmax probability).


    def forward(self, x):
        x = self.linear(x)
        x = self.logprob(x)
        return x

In [95]:
classi_target = pd.DataFrame(all_emissions.map(lambda x: x['point']).collect())
classi_data = pd.DataFrame(all_emissions.map(lambda x: x['data']).collect())


classi_data_np = np.array(classi_data.astype(float))
classi_target_np = np.array(classi_target.astype(int))


X_train, X_test, y_train, y_test = train_test_split(classi_data_np, classi_target_np, test_size=15)


In [63]:
input_dim = 4
output_dim = 543

model = LinearClassificationModel(input_dim,output_dim)


#criterion = nn.CrossEntropyLoss()
criterion = nn.NLLLoss()


l_rate = 0.01
optimiser = torch.optim.SGD(model.parameters(), lr = l_rate) #Stochastic Gradient Descent
#optimizer = torch.optim.Adam(model.parameters(),lr=l_rate,weight_decay=1e-4)


epochs = 50

In [64]:
for epoch in range(epochs):

    epoch +=1
    #increase the number of epochs by 1 every time
    
    inputs = Variable(torch.Tensor(X_train), requires_grad=False)
    labels = Variable(torch.Tensor(y_train).long(), requires_grad=False)
    

    #clear grads as discussed in prev post
    optimiser.zero_grad()
    #forward to get predicted values
    outputs = model.forward(inputs)
    loss = criterion(outputs, labels.view(-1))
    loss.backward()# back props
    optimiser.step()# update the parameters
    print('epoch {}, loss {}'.format(epoch,loss.item()))

epoch 1, loss 51.711814880371094
epoch 2, loss 49.58241653442383
epoch 3, loss 47.9950065612793
epoch 4, loss 46.7168083190918
epoch 5, loss 45.635292053222656
epoch 6, loss 44.693607330322266
epoch 7, loss 43.810516357421875
epoch 8, loss 42.99284744262695
epoch 9, loss 42.21604919433594
epoch 10, loss 41.474178314208984
epoch 11, loss 40.75896453857422
epoch 12, loss 40.06791687011719
epoch 13, loss 39.398746490478516
epoch 14, loss 38.74722671508789
epoch 15, loss 38.11387634277344
epoch 16, loss 37.497135162353516
epoch 17, loss 36.888389587402344
epoch 18, loss 36.28870391845703
epoch 19, loss 35.69886779785156
epoch 20, loss 35.12299346923828
epoch 21, loss 34.560829162597656
epoch 22, loss 34.01763916015625
epoch 23, loss 33.500389099121094
epoch 24, loss 33.00920486450195
epoch 25, loss 32.526268005371094
epoch 26, loss 32.04756164550781
epoch 27, loss 31.575353622436523
epoch 28, loss 31.111608505249023
epoch 29, loss 30.656635284423828
epoch 30, loss 30.210376739501953
epoch 

In [65]:
test = Variable(torch.Tensor(X_test), requires_grad=False)
predicted_proba = model.forward(test)
predicted_proba = predicted_proba.exp().detach().data.numpy()

In [66]:
predicted = []
for probas in predicted_proba:
    point = np.argmax(probas)
    predicted.append(point)

get_classifier_mae(predictions=predicted, real=y_test.ravel())

408.1757731653908

In [106]:
classi_target_np[20]

array([149])

In [107]:
np.where(np.unique(classi_target_np, axis=0) == classi_target_np[20])

(array([32]), array([0]))

In [121]:
def k_cross_validation_classi(model_builder, data, target, r_target_np, k=5):   
    kf = KFold(n_splits=k, shuffle=True)
    mae_list = []
    mae_list_train = []
    k_index = 0
    
    unique_target = np.unique(target, axis=0)
    groups = []
    for h in range(len(target)):
        i,j = np.where(unique_target == target[h])
        groups.append(i[0])
    
    for train_index, test_index in kf.split(data, groups=groups):
        regressor = KNeighborsRegressor()
        params = model_builder()
        model = params['model']
        l_rate = params['l_rate']
        optimiser = params['optimiser']
        criterion = params['criterion']
        epochs = params['epochs']

        X_train, X_test = data[train_index], data[test_index]
        y_train, y_test = target[train_index], target[test_index]
        
        for epoch in range(epochs):

            epoch +=1
            #increase the number of epochs by 1 every time

            inputs = Variable(torch.Tensor(X_train), requires_grad=False)
            labels = Variable(torch.Tensor(y_train).long(), requires_grad=False)


            #clear grads as discussed in prev post
            optimiser.zero_grad()
            #forward to get predicted values
            outputs = model.forward(inputs)
            loss = criterion(outputs, labels.view(-1))
            loss.backward()# back props
            optimiser.step()# update the parameters
            if k_index == 0 and epoch < 1000: print('epoch {}, loss {}'.format(epoch,loss.item()))
            if k_index == 0 and epoch > 1000 and random.randint(0, 100) == 1: print('epoch {}, loss {}'.format(epoch,loss.item()))

        print(loss.data)
        
        test = Variable(torch.Tensor(X_test), requires_grad=False)
        predicted_proba = model.forward(test)
        predicted_proba = predicted_proba.exp().detach().data.numpy()

        label_list = model.forward(Variable(torch.Tensor(X_train), requires_grad=False)).exp().detach().data.numpy()
        regressor.fit(label_list, r_target_np[train_index])

        c_predictions = model.forward(Variable(torch.Tensor(X_test), requires_grad=False)).exp().detach().data.numpy()
        r_predictions = regressor.predict(c_predictions)
        mae = get_regressor_mae(r_predictions, r_target_np[test_index])
        mae_list.append(mae)
        '''
        predicted = []
        for probas in predicted_proba:
            point = np.argmax(probas)
            predicted.append(point)
        KNeighborsRegressor()

        mae = get_classifier_mae(predictions=predicted, real=y_test.ravel())
        print(mae)
        
        train = Variable(torch.Tensor(X_train), requires_grad=False)
        predicted_proba_train = model.forward(train)
        predicted_proba_train = predicted_proba_train.exp().detach().data.numpy()
        
        predicted_train = []
        for probas in predicted_proba_train:
            point = np.argmax(probas)
            predicted_train.append(point)

        mae_train = get_classifier_mae(predictions=predicted_train, real=y_train.ravel())
        
        mae_list.append(mae)
        mae_list_train.append(mae_train)
        '''
        k_index += 1

    return mae_list, mae_list_train


In [122]:
def build_classi_nn_params():
    model = LinearClassificationModel(4, 543)
    #criterion = nn.CrossEntropyLoss()
    criterion = nn.NLLLoss()
    l_rate = 0.01
    optimiser = torch.optim.SGD(model.parameters(), lr = l_rate) #Stochastic Gradient Descent
    #optimizer = torch.optim.Adam(model.parameters(),lr=l_rate,weight_decay=1e-4)
    epochs = 1000
    
    return {
        'model': model,
        'l_rate': l_rate,
        'optimiser': optimiser,
        'criterion': criterion,
        'epochs': epochs
    }

In [123]:
k_cross_validation_classi(build_classi_nn_params, classi_data_np, classi_target_np, regre_target_np, k=5)

epoch 1, loss 50.011051177978516
epoch 2, loss 48.17623519897461
epoch 3, loss 46.67885971069336
epoch 4, loss 45.42620849609375
epoch 5, loss 44.35654067993164
epoch 6, loss 43.40275192260742
epoch 7, loss 42.50503158569336
epoch 8, loss 41.6658821105957
epoch 9, loss 40.868919372558594
epoch 10, loss 40.094505310058594
epoch 11, loss 39.34539794921875
epoch 12, loss 38.62009048461914
epoch 13, loss 37.924686431884766
epoch 14, loss 37.271629333496094
epoch 15, loss 36.65378189086914
epoch 16, loss 36.053932189941406
epoch 17, loss 35.47392272949219
epoch 18, loss 34.91233444213867
epoch 19, loss 34.36654281616211
epoch 20, loss 33.83866500854492
epoch 21, loss 33.32358169555664
epoch 22, loss 32.8249626159668
epoch 23, loss 32.33873748779297
epoch 24, loss 31.866304397583008
epoch 25, loss 31.409460067749023
epoch 26, loss 30.965911865234375
epoch 27, loss 30.534198760986328
epoch 28, loss 30.112667083740234
epoch 29, loss 29.69964027404785
epoch 30, loss 29.2938289642334
epoch 31, l

epoch 249, loss 6.267593860626221
epoch 250, loss 6.247483253479004
epoch 251, loss 6.2358012199401855
epoch 252, loss 6.216021537780762
epoch 253, loss 6.204683780670166
epoch 254, loss 6.185272693634033
epoch 255, loss 6.17429780960083
epoch 256, loss 6.155206680297852
epoch 257, loss 6.144572734832764
epoch 258, loss 6.125818729400635
epoch 259, loss 6.1155290603637695
epoch 260, loss 6.097100257873535
epoch 261, loss 6.087138652801514
epoch 262, loss 6.068971633911133
epoch 263, loss 6.059328556060791
epoch 264, loss 6.041471481323242
epoch 265, loss 6.032128810882568
epoch 266, loss 6.014522552490234
epoch 267, loss 6.005456924438477
epoch 268, loss 5.988126754760742
epoch 269, loss 5.9793195724487305
epoch 270, loss 5.962242126464844
epoch 271, loss 5.953705310821533
epoch 272, loss 5.9368577003479
epoch 273, loss 5.928596019744873
epoch 274, loss 5.9119696617126465
epoch 275, loss 5.903951644897461
epoch 276, loss 5.887580871582031
epoch 277, loss 5.879817485809326
epoch 278, lo

epoch 494, loss 4.659915924072266
epoch 495, loss 4.662095069885254
epoch 496, loss 4.654796600341797
epoch 497, loss 4.656999111175537
epoch 498, loss 4.649738311767578
epoch 499, loss 4.651963233947754
epoch 500, loss 4.644737243652344
epoch 501, loss 4.646982669830322
epoch 502, loss 4.639764308929443
epoch 503, loss 4.642056941986084
epoch 504, loss 4.634864807128906
epoch 505, loss 4.637181282043457
epoch 506, loss 4.6300129890441895
epoch 507, loss 4.632349491119385
epoch 508, loss 4.625199794769287
epoch 509, loss 4.627545356750488
epoch 510, loss 4.620429039001465
epoch 511, loss 4.6228179931640625
epoch 512, loss 4.615715026855469
epoch 513, loss 4.618127346038818
epoch 514, loss 4.6110520362854
epoch 515, loss 4.613472938537598
epoch 516, loss 4.606425762176514
epoch 517, loss 4.608871936798096
epoch 518, loss 4.601847171783447
epoch 519, loss 4.604316234588623
epoch 520, loss 4.5973052978515625
epoch 521, loss 4.599796295166016
epoch 522, loss 4.592812538146973
epoch 523, lo

epoch 742, loss 4.280998229980469
epoch 743, loss 4.284662246704102
epoch 744, loss 4.2792534828186035
epoch 745, loss 4.282922267913818
epoch 746, loss 4.277503490447998
epoch 747, loss 4.281179904937744
epoch 748, loss 4.275784492492676
epoch 749, loss 4.279461860656738
epoch 750, loss 4.27407169342041
epoch 751, loss 4.277751445770264
epoch 752, loss 4.272369384765625
epoch 753, loss 4.276050567626953
epoch 754, loss 4.270674705505371
epoch 755, loss 4.274360179901123
epoch 756, loss 4.268986225128174
epoch 757, loss 4.272665500640869
epoch 758, loss 4.267309188842773
epoch 759, loss 4.270998001098633
epoch 760, loss 4.265636444091797
epoch 761, loss 4.26932430267334
epoch 762, loss 4.263970375061035
epoch 763, loss 4.267668724060059
epoch 764, loss 4.26231575012207
epoch 765, loss 4.266016006469727
epoch 766, loss 4.260679244995117
epoch 767, loss 4.264381408691406
epoch 768, loss 4.259045600891113
epoch 769, loss 4.262750148773193
epoch 770, loss 4.257416725158691
epoch 771, loss 

epoch 990, loss 4.122697353363037
epoch 991, loss 4.126673221588135
epoch 992, loss 4.121790409088135
epoch 993, loss 4.125770568847656
epoch 994, loss 4.120885372161865
epoch 995, loss 4.124874591827393
epoch 996, loss 4.119991779327393
epoch 997, loss 4.123978137969971
epoch 998, loss 4.119103908538818
epoch 999, loss 4.123086929321289
tensor(4.1182)
tensor(4.0965)
tensor(4.0728)
tensor(4.1486)
tensor(4.0983)


([332.35959741192113,
  325.16471297143613,
  337.8264735781719,
  333.8000199319426,
  331.3110706098439],
 [])