In [1]:
%matplotlib notebook

In [2]:
import pyspark

sc = pyspark.sql.SparkSession.Builder().getOrCreate()

## Levanto los archivos

In [3]:
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import KFold 
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import ParameterGrid
from IPython.display import display, HTML
import math


## Utils

In [4]:
def distance(p1, p2):
    return math.sqrt((p1[0]-p2[0])**2+(p1[1]-p2[1])**2)


def k_cross_validation(clf, data, target, get_predictor_mae, k=5):
    if not isinstance(data, np.ndarray):
        data = np.array(data)
    if not isinstance(target, np.ndarray):
        target = np.array(target)
        
    kf = KFold(n_splits=k, shuffle=True)
    mae_list = []
    mae_list_train = []
    for train_index, test_index in kf.split(data):
        X_train, X_test = data[train_index], data[test_index]
        y_train, y_test = target[train_index], target[test_index]

        clf.fit(X_train, y_train)
        mae = get_predictor_mae(clf.predict(X_test), y_test)
        mae_train = get_predictor_mae(clf.predict(X_train), y_train)
        mae_list.append(mae)
        mae_list_train.append(mae_train)

    #print('mean absolute error on test set {}'.format(mae_list))
    #print('mean absolute error on train set {}'.format(mae_list_train))
    return mae_list, mae_list_train


def show_best_results(results, estimator_name, n=5):
    best_res = sc.sparkContext.parallelize(results).sortBy(lambda x: x['error']).take(n)
    df = pd.DataFrame(list(map(lambda x: x['params'], best_res)))
    df["mean_score_validation"] = list(map(lambda x: x['error'], best_res))
    df["mean_score_training"] = list(map(lambda x: x['error_train'], best_res))
    display(HTML("<h3> {}: top {} combinaciones  </h3>".format(estimator_name, n)))
    display(df.sort_values(by="mean_score_validation", ascending=True).head(n))
    
def regre_grid_search(predictor, params, regre_data, regre_target):
    results = []
    for params in ParameterGrid(params):
        clf = MultiOutputRegressor(predictor(**params))
        res_test, res_train = k_cross_validation(clf, regre_data, regre_target, get_regressor_mae)
        results.append({'params': params, 'error': np.mean(res_test), 'error_train': np.mean(res_train)})

    show_best_results(results, predictor.__name__)
    
def classi_grid_search(predictor, params, classi_data, classi_target):
    results = []
    for params in ParameterGrid(params):
        clf = predictor(**params)
        res_test, res_train = k_cross_validation(clf, classi_data, np.array(classi_target).ravel(), get_classifier_mae)
        results.append({'params': params, 'error': np.mean(res_test), 'error_train': np.mean(res_train)})

    show_best_results(results, predictor.__name__)
    


## Maps utils

In [5]:
global posicion_antenas
posicion_antenas = [(463512.015195402,6111004.324434620),(464259.981343845,6110331.85100085),(464443.295130103,6111377.26171875),(464629.562194595,6111105.34734669)]

global x_0
global y_0

x_0 = 462385.503783397
x_f = 465958.114906211
y_0 = 6109042.35153865
y_f = 6112715.80637111


def normalize_positions(row):
    row['x'] = row['x']-x_0
    row['y'] = row['y']-y_0
    return row


In [6]:
global points_recep
points_recep = sc.read.json('datos/train-test.jsonlines').rdd

In [7]:
non_empty_points_recp = points_recep.filter(lambda x: len(x['recep_0']+x['recep_1']+x['recep_2']+x['recep_3']) > 0)


### Me quedo con una sola emision por punto

In [8]:
unique_points = non_empty_points_recp.groupBy(lambda x: x['Punto']).map(lambda x: list(x[1])[0])

### Genero los atributos y etiquetas que me interesan

In [9]:
global max_0, max_1, max_2, max_3
global min_0, min_1, min_2, min_3

max_0 = 1 #unique_points.map(lambda x: max(x['recep_0']+[0])).max()
max_1 = 1 #unique_points.map(lambda x: max(x['recep_1']+[0])).max()
max_2 = 1 #unique_points.map(lambda x: max(x['recep_2']+[0])).max()
max_3 = 1 #unique_points.map(lambda x: max(x['recep_3']+[0])).max()

min_0 = 0 #unique_points.map(lambda x: min(x['recep_0']+[0])).min()
min_1 = 0 #unique_points.map(lambda x: min(x['recep_1']+[0])).min()
min_2 = 0 #unique_points.map(lambda x: min(x['recep_2']+[0])).min()
min_3 = 0 #unique_points.map(lambda x: min(x['recep_3']+[0])).min()


def generate_attrs(row):
    #f = lambda l: len(l)
    f = lambda l: float(np.mean(l) if len(l) > 0 else 0)
    data = {
        'recep_0': (f(row['recep_0'])-min_0)/max_0,
        'recep_1': (f(row['recep_1'])-min_1)/max_1,
        'recep_2': (f(row['recep_2'])-min_2)/max_2,
        'recep_3': (f(row['recep_3'])-min_3)/max_3
    }
    return {'data': data, 'x': row['x'], 'y': row['y'], 'point': row['Punto']}
    
points = unique_points.map(generate_attrs)

### Regresion por eje

In [10]:
regre_target = pd.DataFrame(points.map(lambda x: [x['x'], x['y']]).collect())
regre_data = pd.DataFrame(points.map(lambda x: x['data']).collect())



clf = MultiOutputRegressor(KNeighborsRegressor(n_neighbors=5))


def get_regressor_mae(predictions, real):
    mae = 0
    for i in range(len(predictions)):
        mae += distance(predictions[i], real[i])
    mae = mae/len(predictions)
    return mae


res_test, res_train = k_cross_validation(clf, regre_data, regre_target, get_regressor_mae, k=50)
np.mean(res_test)

367.58843784914154

In [11]:
from sklearn.ensemble import GradientBoostingRegressor
clf = MultiOutputRegressor(GradientBoostingRegressor())


res_test, res_train = k_cross_validation(clf, regre_data, regre_target, get_regressor_mae, k=50)
np.mean(res_test)


380.0675125035309

### Clasificacion de puntos

In [12]:
classi_target = pd.DataFrame(points.map(lambda x: x['point']).collect())
classi_data = pd.DataFrame(points.map(lambda x: x['data']).collect())


global dict_coordenadas
dict_coordenadas = points_recep.map(lambda x: (x['Punto'],(x['x'], x['y']))).collectAsMap()



def get_classifier_mae(predictions, real):
    sum_error = 0
    count = 0
    for i in range(len(predictions)):
        pred_position = dict_coordenadas[predictions[i]]
        real_position = dict_coordenadas[real[i]]
        sum_error += distance(pred_position, real_position)
        count += 1
    return sum_error/count
    


    

clf = KNeighborsClassifier(n_neighbors=5, weights='distance')
res_test, res_train = k_cross_validation(clf, classi_data, np.array(classi_target).ravel(), get_classifier_mae, 50)
np.mean(res_test)

451.8461625342927

## Clasificación de regiones + regresion

In [156]:
def get_region(point):
    x = point['x']
    y = point['y']
    return '{}_{}'.format(x%500, y%500)


def evaluate_classi_regre(classifier, regressor, points, get_region):
    cr_target = pd.DataFrame(points.map(get_region).collect())
    cr_data = pd.DataFrame(points.map(lambda x: x['data']).collect())
    r_target = pd.DataFrame(points.map(lambda x: [x['x'], x['y']]).collect())

    data = np.array(cr_data)
    target = np.array(cr_target).ravel()
    r_target_np = np.array(r_target)


    kf = KFold(n_splits=5, shuffle=True)
    mae_list = []
    for train_index, test_index in kf.split(data):
        X_train, X_test = data[train_index], data[test_index]
        y_train, y_test = target[train_index], target[test_index]

        classifier.fit(X_train, y_train)

        label_list = classifier.predict_proba(X_train)

        regressor.fit(label_list, r_target_np[train_index])

        c_predictions = classifier.predict_proba(X_test)
        r_predictions = regressor.predict(c_predictions)
        mae = get_regressor_mae(r_predictions, r_target_np[test_index])
        mae_list.append(mae)

    print(np.mean(mae_list))
    #print(np.mean(mae_list_train))

evaluate_classi_regre(KNeighborsClassifier(), KNeighborsRegressor(), points, get_region) 

372.595468493086


## Clasificación de regiones + regresion with kd-tree

In [173]:
import kdtree

global tree
tree = kdtree.create(regre_target.values.tolist())

def get_region_kd(point):
    max_height = 4
    coordinates = (point['x'], point['y'])
    i = 0
    region = ''
    subtree = tree
    while i < max_height:
        axis = subtree.axis
        if coordinates[axis] < subtree.data[axis]:
            subtree = subtree.left
            region = region+'0'
        else:
            subtree = subtree.right
            region = region+'1'
        i += 1
    return region


evaluate_classi_regre(KNeighborsClassifier(), KNeighborsRegressor(), points, get_region_kd) 
    
        
    

389.9241191274627


### Regression grid search

In [167]:
knn_params = {
    "n_neighbors" : np.arange(1, 15),
    "weights" : ["uniform", "distance"],
    "p" : [1, 2]
}

regre_grid_search(KNeighborsRegressor, knn_params)
 


Unnamed: 0,n_neighbors,p,weights,mean_score_validation,mean_score_training
0,10,1,distance,357.592072,13.59521
1,12,1,distance,361.668447,11.849891
2,13,1,uniform,362.354639,336.379496
3,14,2,distance,363.283585,11.307475
4,11,1,uniform,363.326578,332.634592


In [144]:
gboost_params = {
    #"loss" : ["ls", "lad", "huber", "quantile"] ,
    "learning_rate" : np.arange(0.01, 1, 0.1),
    "n_estimators" : np.arange(50, 1000, 200),
    "max_depth": np.arange(2,6),
}

regre_grid_search(GradientBoostingRegressor, gboost_params)


{'learning_rate': 0.01, 'max_depth': 2, 'n_estimators': 50}
{'learning_rate': 0.01, 'max_depth': 2, 'n_estimators': 250}
{'learning_rate': 0.01, 'max_depth': 2, 'n_estimators': 450}
{'learning_rate': 0.01, 'max_depth': 2, 'n_estimators': 650}
{'learning_rate': 0.01, 'max_depth': 2, 'n_estimators': 850}
{'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 50}
{'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 250}
{'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 450}
{'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 650}
{'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 850}
{'learning_rate': 0.01, 'max_depth': 4, 'n_estimators': 50}
{'learning_rate': 0.01, 'max_depth': 4, 'n_estimators': 250}
{'learning_rate': 0.01, 'max_depth': 4, 'n_estimators': 450}
{'learning_rate': 0.01, 'max_depth': 4, 'n_estimators': 650}
{'learning_rate': 0.01, 'max_depth': 4, 'n_estimators': 850}
{'learning_rate': 0.01, 'max_depth': 5, 'n_estimators': 50}
{'learning_rate': 0.01, 'max

{'learning_rate': 0.6100000000000001, 'max_depth': 2, 'n_estimators': 50}
{'learning_rate': 0.6100000000000001, 'max_depth': 2, 'n_estimators': 250}
{'learning_rate': 0.6100000000000001, 'max_depth': 2, 'n_estimators': 450}
{'learning_rate': 0.6100000000000001, 'max_depth': 2, 'n_estimators': 650}
{'learning_rate': 0.6100000000000001, 'max_depth': 2, 'n_estimators': 850}
{'learning_rate': 0.6100000000000001, 'max_depth': 3, 'n_estimators': 50}
{'learning_rate': 0.6100000000000001, 'max_depth': 3, 'n_estimators': 250}
{'learning_rate': 0.6100000000000001, 'max_depth': 3, 'n_estimators': 450}
{'learning_rate': 0.6100000000000001, 'max_depth': 3, 'n_estimators': 650}
{'learning_rate': 0.6100000000000001, 'max_depth': 3, 'n_estimators': 850}
{'learning_rate': 0.6100000000000001, 'max_depth': 4, 'n_estimators': 50}
{'learning_rate': 0.6100000000000001, 'max_depth': 4, 'n_estimators': 250}
{'learning_rate': 0.6100000000000001, 'max_depth': 4, 'n_estimators': 450}
{'learning_rate': 0.61000000

Unnamed: 0,learning_rate,max_depth,n_estimators,mean_score_validation,mean_score_training
0,0.11,4,50,356.7389,185.831377
1,0.01,3,450,364.423069,258.543994
2,0.01,4,250,366.596825,254.823671
3,0.11,2,50,367.045352,306.023335
4,0.11,5,50,370.22635,135.535366


In [168]:

from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR

regre_grid_search(SVR, {})
regre_grid_search(DecisionTreeRegressor, {})




Unnamed: 0,mean_score_validation,mean_score_training
0,696.940495,694.17369


Unnamed: 0,mean_score_validation,mean_score_training
0,431.603805,11.688387


### Classification grid search

In [12]:
knn_params = {
    "n_neighbors" : np.arange(1, 15),
    "weights" : ["uniform", "distance"],
    "p" : [1, 2]
}


classi_grid_search(KNeighborsClassifier, knn_params, classi_data, classi_target)

Unnamed: 0,n_neighbors,p,weights,mean_score_validation,mean_score_training
0,5,2,distance,426.630368,12.348431
1,6,1,distance,426.83819,12.340234
2,2,1,distance,430.085256,12.733256
3,3,1,distance,431.883609,12.550093
4,11,2,distance,432.897245,12.547031


### Classi + regre grid search

In [81]:
knn_params = {
    "n_neighbors" : np.arange(1, 15),
    "weights" : ["uniform", "distance"],
    "p" : [1, 2]
}

for params_regre in ParameterGrid(knn_params):
    for params_classi in ParameterGrid(knn_params):
        #evaluate_classi_regre(KNeighborsClassifier(), KNeighborsRegressor(), points) 
        evaluate_classi_regre(KNeighborsClassifier(**params_classi), KNeighborsRegressor(**params_regre), points)
        
## El optimo parece estar muy cerca de los parametros default

456.73280404435144
448.5962055837657
459.0551872190439
442.2008782230467
483.929514897807
439.9729728928336
460.7683238630254
433.248202073848
453.3227382463912
440.19898549031876
454.5267073744852
467.95736224325884
440.2745153879129
421.09055630488876
454.305421676067
454.1545341172497
440.69679196165526
454.19272038794315
428.42305980375113
443.6414321931632
458.6479212172374
437.2850251665068
484.3831974692025
441.7503985913054
435.7092910061254
439.2304840169439
432.61322107991265
427.1153486248942
431.7086575339305
454.4444747769814
458.6702941542909
445.60320818891626
471.1054344204734
464.2979841973436
461.6227905549713
433.4804170471501
469.9994999270463
451.04572112500836
468.99499070853517
446.6694053354651
454.56112106325963
455.51247003309845
458.88219362058237
438.0367656134782
451.79553764620067
461.08119237877236
470.2492627171573
453.9064838168016
461.25167910045576
486.66365798237285
466.7480796142994
463.7107185437384
458.6559653139254
467.2573017788642
527.912826423

619.2285398314579
647.110893473919
422.33549544769915
520.627496827067
423.5481572988386
478.77486343480143
393.2490008782576
367.88574815024106
385.3508734711824
395.1218450932213
375.1860799819503
384.8381943076866
401.42897375323093
388.103276779222
398.04533747063317
380.06654680501316
411.8225501055484
387.1266606687244
387.8708935911749
385.7027614826371
386.1484003039072
378.32875099921256
398.4193439656498
376.81465542081986
398.3480548720488
386.9664957056084
407.28580956493187
372.30603917477436
402.92482655564015
388.5991418224103
407.3402410227847
386.2717947237814
394.4766607707771
388.3198439988012
393.4630404870876
377.3284081855446
391.5712208203445
394.5918418723301
389.1719665688601
369.01106102894187
400.5455981080928
406.05212388355693
387.6328017562792
386.37439978743214
390.63591769124395
407.0683018024054
409.68824251027354
385.21625252897957
396.6735582714943
399.9292729758223
400.593916867463
387.3670380046825
407.93736569406735
396.2131985683625
445.2902560426

503.1995818053184
558.7050225562638
474.54843822996827
556.6739966073352
406.4876368612336
488.3581922096914
395.30503535926584
493.5160336969161
377.82573215756213
405.0655849715533
379.5823448416005
403.2699673796028
363.45968486186905
371.58267746518186
390.7289770533838
361.7520035985474
373.0665242402825
370.42269471193634
376.48077617630713
386.6060280002291
374.81904941500204
377.58548517400675
366.61913057259005
369.8139311012103
399.57988981655024
371.28939218634764
389.89582227503877
357.7692294154537
382.8482659083134
378.4569603324104
385.2904302954659
383.0280545330728
397.0797394750738
376.16308248991436
400.1400257514143
374.5252543694727
384.4588465306646
386.105106847144
383.441082376164
381.4454506740393
375.89273754510856
375.01069584664526
388.8783164494936
376.2625736775234
370.28177071280317
380.3452568041201
392.5320869018809
374.0110871786927
379.4423444762635
368.2366605969734
384.1348970898324
391.2794177214585
445.81559922684835
421.4807365465014
456.64557385

541.6443315186046
587.2458596165061
430.1178434654348
540.572148201
445.0086370788882
536.15829235213
372.86610402348913
490.56329644921027
393.5190996413827
474.2640076849598
371.6874400008885
429.9881421751187
372.6639932787249
439.6050060841445
378.91743662106137
405.06126451573925
368.6103453666056
398.83432709079887
371.81314865882297
377.1774317990852
388.25752460780006
380.1222610088627
374.8439033631607
383.62635665977166
376.3615633759936
367.58020545660463
377.5008014602584
373.2178135516966
376.3250948628623
377.4084485519425
383.8608316633562
367.1607531723894
375.05434876910095
370.97225528287385
374.74134841501865
371.46711281748213
382.9434220563072
374.05481700124506
380.8368913291146
374.4547339536219
379.24031941794055
371.32357455583065
371.15898326473473
380.7575498639122
374.41280404717327
369.82694864668224
385.3134130689076
388.1199666416641
383.28607738826724
380.85965676144895
451.33949186490474
437.2478610993332
474.19548102604415
450.26381888763916
420.087542

472.7196796485674
574.5515724424107
490.82125979516513
573.9000454000303
400.8003620800489
528.2682354174766
407.84579551651046
537.9585332153218
365.4643963856725
476.1939623142877
365.3678882191546
482.26453013644397
359.4790259647565
440.1786303516077
372.8708284419378
436.3330885292677
373.09150008539325
413.9398497121647
363.8654990128228
427.37725715781073
372.1334982816629
385.02928123843697
370.4481033804822
394.9021871132309
367.7587109894989
374.1545341093326
373.11341675315805
372.2420324227818
381.23023186148396
377.3234109036893
374.8897393445869
375.53648532756546
373.23154576265387
370.1858987463289
371.57530297107996
381.12609528523046
375.4855548763527
366.20267269377524
377.79476185581836
379.93734514548186
371.74629804362866
367.6901734769578
391.19454077769615
379.23003153735783
374.4583826279976
373.89675550331
376.4193236343481
371.9506439615517
467.0637483208508
449.31901842340795
463.79925651271543
450.6795951291686
430.3417755459479
553.044331908646
443.1892847

KeyboardInterrupt: 

### Error analysis

In [None]:
points_pd = pd.DataFrame(unique_points.collect())

## Regression
clf = KNeighborsRegressor(n_neighbors=5)
X = points_data
y = points_target_x
info_adder = lambda X: X.set_index('real').join(points_pd.set_index(11))

def error_analysis(clf, X, y, info_adder):
    X_train, X_test, y_train, y_test = train_test_split(points_data, y, test_size=0.20, random_state=23)
    clf.fit(X_train, y_train)

    predictions = clf.predict(X_test)
    X_test['real'] = y_test
    X_test['prediction'] = predictions
    X_test = info_adder(X_test)
    display(X_test)


In [None]:
error_analysis(clf, X, y, info_adder)

### Mapa mostrando el error para cada punto

In [85]:
def show_prediction_map_errors(all_points_positions, all_points_error):
    x_pos = list(map(lambda x: x[0]-x_0, all_points_positions))
    y_pos = list(map(lambda x: x[1]-y_0, all_points_positions))
    points_pos_errors = pd.DataFrame({'x': x_pos,'y': y_pos,'error': all_points_error})
    ax = points_pos_errors.plot.scatter(x='x', y='y',c='error',cmap='Oranges')
    
    for antenna in range(4):
        pos_antena = posicion_antenas[antenna]
        plt.plot(pos_antena[0]-x_0, pos_antena[1]-y_0, "db")

    plt.ylim(0,y_f-y_0)
    plt.xlim(0,x_f-x_0)



In [97]:
clf = KNeighborsRegressor(n_neighbors=5)
data = np.array(regre_data)
target = np.array(regre_target)

kf = KFold(n_splits=5, shuffle=True)
all_points_positions = None
all_points_error = []

for train_index, test_index in kf.split(data):
    X_train, X_test = data[train_index], data[test_index]
    y_train, y_test = target[train_index], target[test_index]

    clf.fit(X_train, y_train)

    predictions = clf.predict(X_test)
    
    all_points_positions = np.concatenate([all_points_positions, y_test]) if (all_points_positions is not None) else y_test
    all_points_error = all_points_error + [distance(predictions[i], y_test[i]) for i in range(len(predictions))]
    
show_prediction_map_errors(all_points_positions, all_points_error)


<IPython.core.display.Javascript object>

In [96]:
clf = KNeighborsClassifier(n_neighbors=5)
data = np.array(classi_data)
target = np.array(classi_target).ravel()

kf = KFold(n_splits=5, shuffle=True)

all_points_positions = []
all_points_error = []

point_to_pos = dict(points_recep.map(lambda x: (x['Punto'], (x['x'], x['y']))).collect())
for train_index, test_index in kf.split(data):
    X_train, X_test = data[train_index], data[test_index]
    y_train, y_test = target[train_index], target[test_index]

    clf.fit(X_train, y_train)

    predictions = clf.predict(X_test)
    
    y_test = list(map(lambda x: point_to_pos[x], y_test.tolist()))
    predictions = list(map(lambda x: point_to_pos[x], predictions.tolist()))
    
    all_points_positions = all_points_positions + y_test
    all_points_error = all_points_error + [distance(predictions[i], y_test[i]) for i in range(len(predictions))]

show_prediction_map_errors(all_points_positions, all_points_error)

<IPython.core.display.Javascript object>

### Mapa mostrando ejemplos de predicciones

In [48]:
def show_prediction_map_examples(predictions, real_points):
    all_points_pos = pd.DataFrame(points_recep.map(lambda p: {'x': p['x'], 'y': p['y']}).map(normalize_positions).collect())
    ax = all_points_pos.plot.scatter(x='x', y='y',c='#96EAE1')
    for antenna in range(4):
        pos_antena = posicion_antenas[antenna]
        plt.plot(pos_antena[0]-x_0, pos_antena[1]-y_0, "dy")

    plt.ylim(0,y_f-y_0)
    plt.xlim(0,x_f-x_0)

    
    for i in range(len(predictions)):
        
        predicted_point = predictions[i]
        real_point = real_points.iloc[i]

        plt.scatter(predicted_point[0]-x_0, predicted_point[1]-y_0, c='r', s=15)
        plt.scatter(real_point[0]-x_0, real_point[1]-y_0, c='b', s=15)

        ax.arrow(real_point[0]-x_0, real_point[1]-y_0, predicted_point[0]-real_point[0], predicted_point[1]-real_point[1], head_width=30, head_length=30, fc='k', ec='k', length_includes_head=True)
        
        


In [100]:
clf = KNeighborsRegressor(n_neighbors=5)
X_train, X_test, y_train, y_test = train_test_split(regre_data, regre_target, test_size=0.20)
clf.fit(X_train, y_train)

predictions = clf.predict(X_test)

show_prediction_map_examples(predictions, y_test)

<IPython.core.display.Javascript object>

In [50]:
clf = KNeighborsClassifier(n_neighbors=5, weights='distance')
X_train, X_test, y_train, y_test = train_test_split(classi_data, np.array(classi_target).ravel(), test_size=0.20)
clf.fit(X_train, y_train)

predictions = clf.predict(X_test)

point_to_pos = dict(points_recep.map(lambda x: (x['Punto'], (x['x'], x['y']))).collect())
predictions_with_coordinates = list(map(lambda p: point_to_pos[p], predictions))
y_to_coordinates = list(map(lambda p: point_to_pos[p], y_test))
show_prediction_map_examples(predictions_with_coordinates, pd.DataFrame(y_to_coordinates))

<IPython.core.display.Javascript object>