In [1]:
import numpy as np
import pandas as pd
from scipy import stats
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.neural_network import MLPRegressor
import helpers.processing_helpers as ph
from sklearn.svm import LinearSVR
from sklearn.multioutput import MultiOutputRegressor
from sklearn.linear_model import LinearRegression

In [2]:
df = pd.read_csv("./dataset/development.csv")

noise_indexes = [0,7,12,15,16,17]



features = ['pmax', 'negpmax', 'area', 'tmax', 'rms']

drop_features = ['area', 'tmax', 'rms']

df = df.drop(columns=ph.get_column_names(features, noise_indexes)) 

In [3]:
def find_best_feature_reduction(df: pd.DataFrame):
    
    feature_to_loss = {}
    drop_features_list = [('area',), ('tmax',), ('rms',), ('area', 'tmax'), ('area', 'rms'), ('rms', 'tmax'), ('area', 'tmax', 'rms')]
    acc_idxs = [1,2,3,4,5,6,8,9,10,11,13,14]

    for drop_features in drop_features_list:
        
        df_dev = df.drop(columns=ph.get_column_names(drop_features, acc_idxs))
        y_train_valid = df_dev[['x', 'y']].copy()

        X_train_valid = df_dev.drop(columns=['x', 'y'])

        X_train, X_valid, y_train, y_valid = train_test_split(X_train_valid, y_train_valid, shuffle=True)

        means = X_train.mean()
        stds = X_train.std()

        X_train_normalized = (X_train - means) / stds

        X_valid_normalized = (X_valid - means) / stds

        mlp = MLPRegressor(random_state=42, verbose=1, n_iter_no_change=500, max_iter=100, learning_rate_init=0.01, activation="logistic", learning_rate="adaptive")
        mlp.fit(X_train_normalized, y_train)

        y_pred = mlp.predict(X_valid_normalized)

        result = (ph.mean_euclid_dist(y_valid, y_pred))
        feature_to_loss[drop_features] = result
    return feature_to_loss

In [4]:
results = find_best_feature_reduction(df)

Iteration 1, loss = 13537.75947061
Iteration 2, loss = 888.22991078
Iteration 3, loss = 117.24240615
Iteration 4, loss = 35.31888956
Iteration 5, loss = 21.70328892
Iteration 6, loss = 17.39044158
Iteration 7, loss = 15.09834849
Iteration 8, loss = 13.73975474
Iteration 9, loss = 12.88917924
Iteration 10, loss = 12.20596193
Iteration 11, loss = 11.82781507
Iteration 12, loss = 11.73215550
Iteration 13, loss = 11.45227981
Iteration 14, loss = 11.34841722
Iteration 15, loss = 11.20070123
Iteration 16, loss = 11.05743879
Iteration 17, loss = 10.98085114
Iteration 18, loss = 10.95930083
Iteration 19, loss = 10.92524564
Iteration 20, loss = 10.77958054
Iteration 21, loss = 10.72592638
Iteration 22, loss = 10.71419799
Iteration 23, loss = 10.58348023
Iteration 24, loss = 10.51718586
Iteration 25, loss = 10.52278397
Iteration 26, loss = 10.48441766
Iteration 27, loss = 10.44388416
Iteration 28, loss = 10.46356541
Iteration 29, loss = 10.47131998
Iteration 30, loss = 10.40083455
Iteration 31, 



Iteration 1, loss = 13400.72949000
Iteration 2, loss = 832.71508264
Iteration 3, loss = 116.29342713
Iteration 4, loss = 37.07159113
Iteration 5, loss = 22.25388377
Iteration 6, loss = 17.15182781
Iteration 7, loss = 14.73578206
Iteration 8, loss = 13.31492971
Iteration 9, loss = 12.41543918
Iteration 10, loss = 11.74685443
Iteration 11, loss = 11.27750748
Iteration 12, loss = 10.96715916
Iteration 13, loss = 10.74482125
Iteration 14, loss = 10.62695102
Iteration 15, loss = 10.45366404
Iteration 16, loss = 10.36279427
Iteration 17, loss = 10.39045600
Iteration 18, loss = 10.25917738
Iteration 19, loss = 10.16699898
Iteration 20, loss = 10.09415279
Iteration 21, loss = 9.99141398
Iteration 22, loss = 9.96678237
Iteration 23, loss = 9.94111132
Iteration 24, loss = 9.83021790
Iteration 25, loss = 9.91693461
Iteration 26, loss = 9.76922666
Iteration 27, loss = 9.81567446
Iteration 28, loss = 9.68938546
Iteration 29, loss = 9.65799850
Iteration 30, loss = 9.64466890
Iteration 31, loss = 9.6



Iteration 1, loss = 13434.80413616
Iteration 2, loss = 844.40580423
Iteration 3, loss = 117.06756726
Iteration 4, loss = 36.86647175
Iteration 5, loss = 22.13489106
Iteration 6, loss = 17.19413382
Iteration 7, loss = 14.77457273
Iteration 8, loss = 13.35507712
Iteration 9, loss = 12.45853751
Iteration 10, loss = 11.89547539
Iteration 11, loss = 11.51342146
Iteration 12, loss = 11.18137693
Iteration 13, loss = 10.93676688
Iteration 14, loss = 10.71970733
Iteration 15, loss = 10.66003941
Iteration 16, loss = 10.49077335
Iteration 17, loss = 10.34699816
Iteration 18, loss = 10.25896885
Iteration 19, loss = 10.20875832
Iteration 20, loss = 10.19702634
Iteration 21, loss = 10.14197971
Iteration 22, loss = 9.97134001
Iteration 23, loss = 10.00591688
Iteration 24, loss = 9.96316499
Iteration 25, loss = 9.84398211
Iteration 26, loss = 9.86237665
Iteration 27, loss = 9.81745620
Iteration 28, loss = 9.88110274
Iteration 29, loss = 9.80741088
Iteration 30, loss = 9.73910966
Iteration 31, loss = 9



Iteration 1, loss = 13330.80227120
Iteration 2, loss = 876.80813419
Iteration 3, loss = 116.85266940
Iteration 4, loss = 35.62263640
Iteration 5, loss = 21.45309393
Iteration 6, loss = 16.89400604
Iteration 7, loss = 14.50129148
Iteration 8, loss = 12.99143541
Iteration 9, loss = 12.01925256
Iteration 10, loss = 11.50177126
Iteration 11, loss = 11.09619246
Iteration 12, loss = 10.81957710
Iteration 13, loss = 10.60769668
Iteration 14, loss = 10.45566333
Iteration 15, loss = 10.29129976
Iteration 16, loss = 10.24714445
Iteration 17, loss = 10.15110813
Iteration 18, loss = 10.13871236
Iteration 19, loss = 10.06466002
Iteration 20, loss = 9.95050717
Iteration 21, loss = 9.90934046
Iteration 22, loss = 9.77769644
Iteration 23, loss = 9.77852294
Iteration 24, loss = 9.69565105
Iteration 25, loss = 9.72395607
Iteration 26, loss = 9.65423581
Iteration 27, loss = 9.65951102
Iteration 28, loss = 9.64248939
Iteration 29, loss = 9.59047511
Iteration 30, loss = 9.56429176
Iteration 31, loss = 9.51



Iteration 1, loss = 13484.71792897
Iteration 2, loss = 887.51720797
Iteration 3, loss = 117.34693773
Iteration 4, loss = 35.89773582
Iteration 5, loss = 21.97673281
Iteration 6, loss = 17.40778942
Iteration 7, loss = 14.95373202
Iteration 8, loss = 13.52155341
Iteration 9, loss = 12.63344555
Iteration 10, loss = 12.09477901
Iteration 11, loss = 11.62731528
Iteration 12, loss = 11.30785508
Iteration 13, loss = 11.14135014
Iteration 14, loss = 10.92423974
Iteration 15, loss = 10.82226989
Iteration 16, loss = 10.62995147
Iteration 17, loss = 10.51853862
Iteration 18, loss = 10.37798185
Iteration 19, loss = 10.35766718
Iteration 20, loss = 10.33218843
Iteration 21, loss = 10.32039455
Iteration 22, loss = 10.18604509
Iteration 23, loss = 10.15594242
Iteration 24, loss = 10.16306009
Iteration 25, loss = 10.04588071
Iteration 26, loss = 9.99863405
Iteration 27, loss = 9.96792003
Iteration 28, loss = 9.94055384
Iteration 29, loss = 9.83664085
Iteration 30, loss = 9.83300735
Iteration 31, loss 



Iteration 1, loss = 13354.57938972
Iteration 2, loss = 839.13772878
Iteration 3, loss = 116.49190162
Iteration 4, loss = 36.63688791
Iteration 5, loss = 22.12628482
Iteration 6, loss = 17.05797415
Iteration 7, loss = 14.51143741
Iteration 8, loss = 13.10089109
Iteration 9, loss = 12.20086162
Iteration 10, loss = 11.62597727
Iteration 11, loss = 11.11626810
Iteration 12, loss = 10.81345696
Iteration 13, loss = 10.48358195
Iteration 14, loss = 10.41417149
Iteration 15, loss = 10.32133774
Iteration 16, loss = 10.14248415
Iteration 17, loss = 10.08462679
Iteration 18, loss = 10.01081337
Iteration 19, loss = 9.89123982
Iteration 20, loss = 9.86261946
Iteration 21, loss = 9.79087557
Iteration 22, loss = 9.77770678
Iteration 23, loss = 9.66597303
Iteration 24, loss = 9.69843039
Iteration 25, loss = 9.65111497
Iteration 26, loss = 9.61274771
Iteration 27, loss = 9.62863564
Iteration 28, loss = 9.60139047
Iteration 29, loss = 9.61045305
Iteration 30, loss = 9.57725852
Iteration 31, loss = 9.509



Iteration 1, loss = 13338.15487702
Iteration 2, loss = 876.37121940
Iteration 3, loss = 117.41044644
Iteration 4, loss = 36.21746859
Iteration 5, loss = 21.85254631
Iteration 6, loss = 17.20132677
Iteration 7, loss = 14.74812032
Iteration 8, loss = 13.45278324
Iteration 9, loss = 12.53470977
Iteration 10, loss = 11.91924185
Iteration 11, loss = 11.45324250
Iteration 12, loss = 11.08859246
Iteration 13, loss = 10.76324282
Iteration 14, loss = 10.59691827
Iteration 15, loss = 10.38689421
Iteration 16, loss = 10.26907381
Iteration 17, loss = 10.12072954
Iteration 18, loss = 10.02204096
Iteration 19, loss = 9.95121919
Iteration 20, loss = 9.88710679
Iteration 21, loss = 9.84659405
Iteration 22, loss = 9.71419653
Iteration 23, loss = 9.67070625
Iteration 24, loss = 9.57361618
Iteration 25, loss = 9.52628835
Iteration 26, loss = 9.51959579
Iteration 27, loss = 9.43588351
Iteration 28, loss = 9.34471916
Iteration 29, loss = 9.28215442
Iteration 30, loss = 9.21333300
Iteration 31, loss = 9.287



In [5]:
results

{('area',): 5.099858216678731,
 ('tmax',): 4.897088939036051,
 ('rms',): 4.875721949870735,
 ('area', 'tmax'): 4.932731016850282,
 ('area', 'rms'): 4.813012135913863,
 ('rms', 'tmax'): 4.810926736529878,
 ('area', 'tmax', 'rms'): 4.681118470934068}