# Single Output Regression

In [7]:
import os
import sys
import numpy as np
import pandas as pd
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
from pathlib import Path

SEED = 42
HOME = Path.cwd().parent
RAW_PATH = HOME / 'data/raw'
print(RAW_PATH)
MAX_TIME = 120

/home/rco/DS/kdd22/data/raw


In [8]:
pub = pd.read_csv(RAW_PATH / 'public.csv')
print(pub.shape)
pub.head()

(146262, 6)


Unnamed: 0,Filename,Altitude,Delta,North,East,kfold
0,00003e3b9e5336685200ae85d21b4f5e.jpg,178.829834,-0.065231,-0.386045,0.929772,0
1,0001261e2060303a06ba6c64d676d639.jpg,207.921478,-0.080688,0.635584,0.152819,2
2,0002ac0d783338cfeab0b2bdbd872cda.jpg,178.048431,0.021576,-1.228229,-0.499388,3
3,0004289ee1c7b8b08c77e19878106ae3.jpg,201.084625,0.505981,-1.739709,-0.699928,1
4,0004d0b59e19461ff126e3a08a814c33.jpg,187.550201,-0.328156,-0.169798,2.828752,0


In [9]:
def merge_pixels_dist(dataset='train', suffixes=['', '_a', '_a_est', '_h', '_h_est', '_p', '_p_est']):
    df_final = pub.copy()
    for s in suffixes:
        filename = 'pixels_dist_' + dataset + s + '.csv'
        df_right = pd.read_csv(RAW_PATH / filename)
        df_right.rename({'Images':'Filename'}, axis=1, inplace=True)
        df_final = df_final.merge(df_right, on='Filename', how='inner', suffixes=(None, s))
        print(df_final.shape)

    return df_final

train = merge_pixels_dist('train', ['_rco'])
train.head()

(91231, 26)


Unnamed: 0,Filename,Altitude,Delta,North,East,kfold,pxl_0,pxl_1,pxl_2,pxl_3,...,pxl_10,pxl_11,pxl_12,pxl_13,pxl_14,pxl_15,pxl_16,pxl_17,pxl_18,pxl_19
0,00003e3b9e5336685200ae85d21b4f5e.jpg,178.829834,-0.065231,-0.386045,0.929772,0,-2.905516,-1.293549,-2.908318,-1.244682,...,-2.882545,-1.254211,-3.426113,-1.640629,-2.829666,-1.183493,-2.998511,-1.221453,-2.975761,-1.224934
1,0001261e2060303a06ba6c64d676d639.jpg,207.921478,-0.080688,0.635584,0.152819,2,-0.325096,1.066655,-0.301397,1.057419,...,-0.268454,0.959177,-0.203712,1.094475,-0.268454,0.959177,-0.252202,1.062943,-0.326046,0.971859
2,0002ac0d783338cfeab0b2bdbd872cda.jpg,178.048431,0.021576,-1.228229,-0.499388,3,1.688988,-3.81617,1.60363,-3.971138,...,1.601097,-3.896843,1.576309,-3.810501,1.55455,-3.741711,1.583,-3.806505,1.571258,-3.768505
3,0004289ee1c7b8b08c77e19878106ae3.jpg,201.084625,0.505981,-1.739709,-0.699928,1,1.723282,-5.506313,-1.370886,-6.696845,...,3.585079,-6.579813,1.656113,-5.524193,2.281319,-5.760706,1.84214,-5.587303,2.933319,-6.275887
4,0004d0b59e19461ff126e3a08a814c33.jpg,187.550201,-0.328156,-0.169798,2.828752,0,-7.933868,-0.467377,-7.94595,-0.667904,...,-7.957325,-0.792717,-7.955856,-0.466026,-7.913773,-0.495544,-7.805359,-0.389091,-7.685028,-0.443871


In [10]:
test = merge_pixels_dist('test', ['_rco'])
test.drop(['North', 'East'], axis=1, inplace=True)
test.head()

(55031, 26)


Unnamed: 0,Filename,Altitude,Delta,kfold,pxl_0,pxl_1,pxl_2,pxl_3,pxl_4,pxl_5,...,pxl_10,pxl_11,pxl_12,pxl_13,pxl_14,pxl_15,pxl_16,pxl_17,pxl_18,pxl_19
0,000053b1e684c9e7ea73727b2238ce18.jpg,167.943069,0.010269,0,1.148582,-3.340916,-18.83366,17.370598,0.824883,-3.930728,...,1.148582,-3.340916,0.824883,-3.930728,-8.145741,16.754318,-0.074838,0.269563,-0.299238,-1.830235
1,00029153d12ae1c9abe59c17ff2e0895.jpg,195.853088,0.089218,2,2.752609,2.573463,4.366684,4.157536,4.681839,3.957802,...,4.385246,3.600212,3.636406,2.604145,4.080269,3.412636,4.182144,2.752205,2.429424,1.857857
2,0006246bee639c7a7b11a08e34dd3cc6.jpg,146.943466,-0.018326,1,-0.169895,-12.048111,-0.13204,-12.098686,-0.043144,-11.731617,...,0.00318,-15.928162,0.132889,-11.713745,0.002014,-11.64962,-0.202403,-11.936136,-0.197823,-11.768921
3,00063cb5da1826febf178b669eea3250.jpg,213.184418,-0.108704,1,5.899114,-5.90437,3.899361,-1.867081,-14.714348,-4.842091,...,9.191109,-32.700813,-16.807194,-2.444351,-15.071526,2.416723,-1.628705,-5.106833,-4.408707,-24.759686
4,00063ece2e68a8847f228e8fd922f851.jpg,184.757767,0.0177,0,6.314903,-0.068115,6.296444,-0.425659,6.142395,-0.198502,...,5.979332,-0.635529,5.925034,-0.113266,6.414247,-0.772942,5.471008,-0.318184,6.608952,-0.145115


In [11]:
import h2o
from h2o.automl import H2OAutoML

h2o.init(nthreads=16, max_mem_size='32G')

Checking whether there is an H2O instance running at http://localhost:54321 . connected.


0,1
H2O_cluster_uptime:,14 secs
H2O_cluster_timezone:,America/Sao_Paulo
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.38.0.2
H2O_cluster_version_age:,4 days
H2O_cluster_name:,H2O_from_python_rco_tvwqpb
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,28.42 Gb
H2O_cluster_total_cores:,16
H2O_cluster_allowed_cores:,16


In [12]:
train = h2o.H2OFrame(train)
test = h2o.H2OFrame(test)

Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%


In [13]:
features = [f for f in train.columns if f not in ['Filename', 'North', 'East', 'kfold']]

def train_automl(train, test, target, max_runtime_secs=3600):
    
    aml = H2OAutoML(seed=SEED,
                    max_runtime_secs=max_runtime_secs,
                    stopping_metric='RMSE',
                    sort_metric='RMSE',
                    exploitation_ratio=0.1)

    aml.train(x=features, y=target, training_frame=train)  
    
    train_preds = aml.leader.predict(train)
    train_preds.rename({'predict':target+'_prediction'})
    
    test_preds = aml.leader.predict(test)
    test_preds.rename({'predict':target+'_prediction'})
    
    return aml, train_preds, test_preds
    

In [14]:
north_aml, north_train_preds, north_test_preds = train_automl(train, test, 'North', MAX_TIME)
north_aml.leaderboard.head(20)

AutoML progress: |███████████████████████████████████████████████████████████████| (done) 100%
stackedensemble prediction progress: |███████████████████████████████████████████| (done) 100%
stackedensemble prediction progress: |███████████████████████████████████████████| (done) 100%


model_id,rmse,mse,mae,rmsle,mean_residual_deviance
StackedEnsemble_AllModels_3_AutoML_1_20221031_232417,0.400321,0.160257,0.168722,,0.160257
StackedEnsemble_BestOfFamily_4_AutoML_1_20221031_232417,0.402763,0.162218,0.168593,,0.162218
StackedEnsemble_AllModels_2_AutoML_1_20221031_232417,0.404264,0.163429,0.174653,,0.163429
StackedEnsemble_AllModels_1_AutoML_1_20221031_232417,0.404921,0.163961,0.176466,,0.163961
StackedEnsemble_BestOfFamily_3_AutoML_1_20221031_232417,0.405909,0.164762,0.173282,,0.164762
StackedEnsemble_BestOfFamily_2_AutoML_1_20221031_232417,0.406908,0.165574,0.175356,,0.165574
StackedEnsemble_BestOfFamily_1_AutoML_1_20221031_232417,0.41225,0.16995,0.183692,,0.16995
GBM_1_AutoML_1_20221031_232417,0.418011,0.174733,0.19132,,0.174733
DRF_1_AutoML_1_20221031_232417,0.418179,0.174874,0.179806,,0.174874
GBM_4_AutoML_1_20221031_232417,0.421682,0.177816,0.203047,,0.177816


In [15]:
east_aml, east_train_preds, east_test_preds = train_automl(train, test, 'East', MAX_TIME)
east_aml.leaderboard.head(20)

AutoML progress: |███████████████████████████████████████████████████████████████| (done) 100%
stackedensemble prediction progress: |███████████████████████████████████████████| (done) 100%
stackedensemble prediction progress: |███████████████████████████████████████████| (done) 100%


model_id,rmse,mse,mae,rmsle,mean_residual_deviance
StackedEnsemble_AllModels_3_AutoML_2_20221031_232619,0.374925,0.140568,0.167026,,0.140568
StackedEnsemble_BestOfFamily_4_AutoML_2_20221031_232619,0.378111,0.142968,0.167122,,0.142968
StackedEnsemble_AllModels_2_AutoML_2_20221031_232619,0.378338,0.143139,0.171666,,0.143139
StackedEnsemble_AllModels_1_AutoML_2_20221031_232619,0.379754,0.144213,0.174081,,0.144213
StackedEnsemble_BestOfFamily_3_AutoML_2_20221031_232619,0.381207,0.145319,0.171311,,0.145319
StackedEnsemble_BestOfFamily_2_AutoML_2_20221031_232619,0.38334,0.14695,0.174075,,0.14695
StackedEnsemble_BestOfFamily_1_AutoML_2_20221031_232619,0.387759,0.150357,0.181341,,0.150357
GBM_1_AutoML_2_20221031_232619,0.393315,0.154696,0.188675,,0.154696
GBM_4_AutoML_2_20221031_232619,0.394094,0.15531,0.196073,,0.15531
DRF_1_AutoML_2_20221031_232619,0.396147,0.156932,0.178566,,0.156932


In [16]:
train_final = train.cbind(north_train_preds)
train_final = train_final.cbind(east_train_preds)
train_final.as_data_frame().to_csv('train_final.csv')

train_final.head(5)

Filename,Altitude,Delta,North,East,kfold,pxl_0,pxl_1,pxl_2,pxl_3,pxl_4,pxl_5,pxl_6,pxl_7,pxl_8,pxl_9,pxl_10,pxl_11,pxl_12,pxl_13,pxl_14,pxl_15,pxl_16,pxl_17,pxl_18,pxl_19,North_prediction,East_prediction
00003e3b9e5336685200ae85d21b4f5e.jpg,178.83,-0.0652313,-0.386045,0.929772,0,-2.90552,-1.29355,-2.90832,-1.24468,-3.59486,-1.19225,-2.85976,-1.20882,-2.86176,-1.2799,-2.88255,-1.25421,-3.42611,-1.64063,-2.82967,-1.18349,-2.99851,-1.22145,-2.97576,-1.22493,-0.357914,0.898713
0001261e2060303a06ba6c64d676d639.jpg,207.921,-0.0806885,0.635584,0.152819,2,-0.325096,1.06665,-0.301397,1.05742,-0.30209,1.02171,-0.251114,0.208279,-0.326046,0.971859,-0.268454,0.959177,-0.203712,1.09447,-0.268454,0.959177,-0.252202,1.06294,-0.326046,0.971859,0.59553,0.157414
0002ac0d783338cfeab0b2bdbd872cda.jpg,178.048,0.0215759,-1.22823,-0.499388,3,1.68899,-3.81617,1.60363,-3.97114,1.63071,-3.83078,1.58744,-3.74135,1.53305,-3.82208,1.6011,-3.89684,1.57631,-3.8105,1.55455,-3.74171,1.583,-3.80651,1.57126,-3.76851,-1.19212,-0.471702
0004289ee1c7b8b08c77e19878106ae3.jpg,201.085,0.505981,-1.73971,-0.699928,1,1.72328,-5.50631,-1.37089,-6.69685,2.1552,-5.6545,2.96579,-6.2966,2.36815,-6.12882,3.58508,-6.57981,1.65611,-5.52419,2.28132,-5.76071,1.84214,-5.5873,2.93332,-6.27589,-1.73075,-0.656233
0004d0b59e19461ff126e3a08a814c33.jpg,187.55,-0.328156,-0.169798,2.82875,0,-7.93387,-0.467377,-7.94595,-0.667904,-9.20364,-0.8188,-7.78825,-0.459106,-7.8754,-0.574451,-7.95732,-0.792717,-7.95586,-0.466026,-7.91377,-0.495544,-7.80536,-0.389091,-7.68503,-0.443871,-0.14807,2.6868


In [17]:
test_final = test.cbind(north_test_preds)
test_final = test_final.cbind(east_test_preds)
test_final.as_data_frame().to_csv('test_final.csv')

test_final

Filename,Altitude,Delta,kfold,pxl_0,pxl_1,pxl_2,pxl_3,pxl_4,pxl_5,pxl_6,pxl_7,pxl_8,pxl_9,pxl_10,pxl_11,pxl_12,pxl_13,pxl_14,pxl_15,pxl_16,pxl_17,pxl_18,pxl_19,North_prediction,East_prediction
000053b1e684c9e7ea73727b2238ce18.jpg,167.943,0.0102692,0,1.14858,-3.34092,-18.8337,17.3706,0.824883,-3.93073,-0.355776,-1.16761,1.00562,-1.44288,1.14858,-3.34092,0.824883,-3.93073,-8.14574,16.7543,-0.0748377,0.269563,-0.299238,-1.83023,-0.461572,0.0319499
00029153d12ae1c9abe59c17ff2e0895.jpg,195.853,0.0892181,2,2.75261,2.57346,4.36668,4.15754,4.68184,3.9578,7.69817,2.22164,2.62536,2.22768,4.38525,3.60021,3.63641,2.60415,4.08027,3.41264,4.18214,2.7522,2.42942,1.85786,0.843858,-1.15745
0006246bee639c7a7b11a08e34dd3cc6.jpg,146.943,-0.0183258,1,-0.169895,-12.0481,-0.13204,-12.0987,-0.0431442,-11.7316,0.027256,-12.1229,-0.0410385,-12.2621,0.00317955,-15.9282,0.132889,-11.7137,0.00201416,-11.6496,-0.202403,-11.9361,-0.197823,-11.7689,-1.42322,0.121149
00063cb5da1826febf178b669eea3250.jpg,213.184,-0.108704,1,5.89911,-5.90437,3.89936,-1.86708,-14.7143,-4.84209,-1.69508,-24.1241,-16.3963,24.7725,9.19111,-32.7008,-16.8072,-2.44435,-15.0715,2.41672,-1.62871,-5.10683,-4.40871,-24.7597,-0.692103,0.431144
00063ece2e68a8847f228e8fd922f851.jpg,184.758,0.0177002,0,6.3149,-0.0681152,6.29644,-0.425659,6.1424,-0.198502,6.49742,0.0932236,6.58315,0.140034,5.97933,-0.635529,5.92503,-0.113266,6.41425,-0.772942,5.47101,-0.318184,6.60895,-0.145115,0.0212979,-1.48248
000838c1249fec206b77360ff0adc110.jpg,209.136,-0.544525,1,2.38117,3.89162,2.36828,3.56954,2.63266,3.54911,2.45233,3.56828,2.45103,3.64938,2.89822,3.66876,2.58473,3.60395,2.91451,3.4528,2.63266,3.54911,2.92265,3.24918,1.62507,-1.42808
000a8e84b013655b832041a6f362e5c9.jpg,177.72,-0.23024,4,3.88873,-0.492369,4.42173,-0.520847,4.33254,-0.263458,4.40611,-0.245636,4.39208,-0.493393,4.26459,-0.423016,4.37817,-0.565887,4.41778,-0.276993,4.09095,-0.462299,4.17667,-0.121765,-0.144194,-1.33138
000d0c74074191add6f22e0004db8f76.jpg,200.488,0.0812378,3,6.19389,2.58061,5.38988,2.19228,4.69729,2.00504,4.63818,2.25755,4.80048,1.88295,4.52833,1.88636,4.76211,1.88943,6.22773,2.53867,5.80002,3.7495,6.10212,2.94987,0.728265,-2.06877
000dd3543ac84d906eae52e7c779bb2a.jpg,155.313,0.062027,4,9.49759,-1.18337,7.23822,-5.68969,7.88213,-3.91421,14.9235,-6.43969,8.47189,-4.42818,8.47189,-4.42818,7.81363,-3.73524,8.22931,-4.20756,7.84705,-3.81264,7.23822,-5.68969,-0.546442,-1.31919
00129b07887a18a7331909231c28816e.jpg,187.643,-0.133362,4,-4.19293,0.394976,-4.23288,0.537773,-4.15697,0.387848,-4.19751,0.569031,-4.25222,0.434129,-4.11543,0.453648,-4.33271,0.644951,-4.19122,0.497551,-4.19726,0.93837,-4.19633,0.480124,0.214803,1.84979


# Submission

In [18]:
submission = test_final.as_data_frame()[['Filename', 'North_prediction', 'East_prediction']]
submission.rename({'North_prediction':'North', 'East_prediction':'East'}, axis=1, inplace=True)
submission = submission.melt(id_vars='Filename', value_name='Predicted')
submission['Id'] = submission.Filename + ':' + submission.variable
submission = submission[['Id', 'Predicted']]
submission.head()

Unnamed: 0,Id,Predicted
0,000053b1e684c9e7ea73727b2238ce18.jpg:North,-0.461572
1,00029153d12ae1c9abe59c17ff2e0895.jpg:North,0.843858
2,0006246bee639c7a7b11a08e34dd3cc6.jpg:North,-1.423215
3,00063cb5da1826febf178b669eea3250.jpg:North,-0.692103
4,00063ece2e68a8847f228e8fd922f851.jpg:North,0.021298


In [19]:
submission.to_csv('submission_h2o_full.csv', index=False)

# North Explanation

In [20]:
# north_aml.explain(train)

# East Explanation

In [21]:
# east_aml.explain(train)

In [22]:
h2o.shutdown()

H2O session _sid_a7f8 closed.


  h2o.shutdown()
