## Importação dos pacotes

In [1]:
# importar pacotes necessários
import numpy as np
import pandas as pd

In [2]:
# importar os pacotes necessários para os algoritmos de regressão
from sklearn.ensemble import BaggingRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import GradientBoostingRegressor
#from sklearn.ensemble import VotingRegressor
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.kernel_ridge import KernelRidge
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet
from sklearn.linear_model import Lars
from sklearn.linear_model import LassoLars
from sklearn.linear_model import OrthogonalMatchingPursuit
from sklearn.linear_model import PassiveAggressiveRegressor
from sklearn.linear_model import BayesianRidge
from sklearn.linear_model import ARDRegression
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import SGDRegressor
from sklearn.linear_model import Perceptron
from sklearn.linear_model import PassiveAggressiveRegressor
from sklearn.linear_model import RANSACRegressor
from sklearn.linear_model import TheilSenRegressor
from sklearn.linear_model import HuberRegressor
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neighbors import RadiusNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.svm import SVR
from sklearn.svm import NuSVR
from sklearn.svm import LinearSVR
from sklearn.tree import DecisionTreeRegressor
from xgboost import XGBRegressor

In [3]:
sufixo_arquivo = 'o11-6.15'

## Carga dos dados de entrada

In [5]:
# carregar arquivo de dados de treino
train_data = pd.read_csv('input/abalone-train-' + sufixo_arquivo + '.csv', index_col='id')
#train_data = pd.read_csv('abalone-train.csv', index_col='id')

In [6]:
# carregar arquivo de dados de teste
test_data = pd.read_csv('abalone-test.csv', index_col='id')

## Transformações nos dados

In [7]:
#data_sex = pd.get_dummies(train_data['sex'], prefix='sex')
#train_data = train_data.join(data_sex)
train_data.drop('sex', axis=1, inplace=True)

#data_sex = pd.get_dummies(test_data['sex'], prefix='sex')
#test_data = test_data.join(data_sex)
test_data.drop('sex', axis=1, inplace=True)

## Seleção dos dados de treino e teste

In [8]:
# definir dados de treino

X_train = train_data.drop(['rings'], axis=1) # tudo, exceto a coluna alvo
y_train = train_data['rings'] # apenas a coluna alvo

print('Forma dos dados de treino:', X_train.shape, y_train.shape)

Forma dos dados de treino: (2565, 7) (2565,)


In [9]:
# definir dados de teste

X_test = test_data # tudo, já que não possui a coluna alvo

print('Forma dos dados de teste:', X_test.shape)

Forma dos dados de teste: (1393, 7)


## Treinamento dos modelos e geração dos resultados 

In [9]:
models = []

# Generalized Linear Models
models.append(('LinReg', LinearRegression(n_jobs=2, fit_intercept=False, normalize=True)))
models.append(('LogReg', LogisticRegression(n_jobs=2, random_state=42, multi_class='auto', C=1000, solver='newton-cg')))
models.append(('OMP', OrthogonalMatchingPursuit(n_nonzero_coefs=8, fit_intercept=True, normalize=True)))
models.append(('PAR', PassiveAggressiveRegressor(random_state=42, C=0.1, fit_intercept=True, max_iter=1000, tol=0.001)))
models.append(('PP', Perceptron(random_state=42, penalty='l2', alpha=1e-5, fit_intercept=True, max_iter=1000, tol=1e-3)))
models.append(('RANSAC', RANSACRegressor(random_state=42, min_samples=0.75)))
models.append(('Ridge', Ridge(random_state=42, alpha=0.1, fit_intercept=False, normalize=True)))
models.append(('SGD', SGDRegressor(random_state=42, alpha=1e-06, fit_intercept=True, penalty=None, tol=1e-3)))
models.append(('TSR', TheilSenRegressor(random_state=42, n_jobs=2, fit_intercept=True)))

# Decision Trees
models.append(('DTR', DecisionTreeRegressor(random_state=42, max_depth=4, min_samples_split=0.25)))

# Gaussian Processes
models.append(('GPR', GaussianProcessRegressor(random_state=42, alpha=0.01, normalize_y=True)))

# Kernel Ridge Regression
models.append(('KRR', KernelRidge()))

# Naïve Bayes
models.append(('GNB', GaussianNB(var_smoothing=0.1)))

# Nearest Neighbors
models.append(('kNN', KNeighborsRegressor(n_jobs=2, n_neighbors=13, weights='distance')))

# Support Vector Machines
models.append(('SVM', SVR(gamma='auto', kernel='linear')))

# Neural network models
models.append(('MLP', MLPRegressor(random_state=42, max_iter=500,
                     activation='logistic', hidden_layer_sizes=(50,), solver='lbfgs')))

# Ensemble Methods
models.append(('RFR', RandomForestRegressor(random_state=42, n_jobs=2, n_estimators=100, max_depth=7)))
models.append(('GBR', GradientBoostingRegressor(random_state=42, learning_rate=0.05, n_estimators=100,
                                  subsample=0.4, max_depth=6, max_features=1.0)))
models.append(('ETR', ExtraTreesRegressor(random_state=42, n_jobs=2, n_estimators=200, max_features=1.0)))
models.append(('BDTR', BaggingRegressor(random_state=42, n_jobs=2, base_estimator=DecisionTreeRegressor())))
models.append(('ABDTR', AdaBoostRegressor(random_state=42, n_estimators=100, base_estimator=DecisionTreeRegressor())))

# XGBoost
models.append(('XGBR', XGBRegressor(random_state=42, n_jobs=2, learning_rate=0.1,
                                    n_estimators=50, max_depth=5, objective='reg:squarederror')))

In [10]:
!mkdir submissions

mkdir: cannot create directory ‘submissions’: File exists


In [12]:
for name, model in models:
    print(model, '\n')
    
    # treinar o modelo
    model.fit(X_train, y_train)
    
    # executar previsão usando o modelo
    y_pred = model.predict(X_test)
    
    # gerar dados de envio (submissão)
    submission = pd.DataFrame({
      'id': X_test.index,
      'rings': y_pred
    })
    submission.set_index('id', inplace=True)

    # gerar arquivo CSV para o envio
    filename = 'submissions/abalone-submission-p-%s-%s.csv' % (sufixo_arquivo, name.lower())
    submission.to_csv(filename)

MLPRegressor(activation='logistic', alpha=0.0001, batch_size='auto',
       beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(50,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=500, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=42, shuffle=True, solver='lbfgs', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False) 



In [13]:
# verificar conteúdo dos arquivos gerados
!head submissions/abalone-submission-p-*.csv

==> submissions/abalone-submission-p-abdtr.csv <==
id,rings
1512,10.0
1401,11.0
509,11.0
2883,8.0
3259,11.0
2135,9.0
1499,10.0
3597,11.0
2414,8.0

==> submissions/abalone-submission-p-bdtr.csv <==
id,rings
1512,9.5
1401,10.5
509,11.1
2883,8.4
3259,11.9
2135,10.0
1499,10.0
3597,11.3
2414,9.8

==> submissions/abalone-submission-p-dtr.csv <==
id,rings
1512,10.481404958677686
1401,12.079136690647482
509,12.793814432989691
2883,7.406926406926407
3259,12.079136690647482
2135,9.669218989280244
1499,10.481404958677686
3597,12.079136690647482
2414,7.406926406926407

==> submissions/abalone-submission-p-etr.csv <==
id,rings
1512,10.675
1401,10.7
509,10.59
2883,8.37
3259,11.315
2135,8.65
1499,9.705
3597,11.715
2414,8.12

==> submissions/abalone-submission-p-gbr.csv <==
id,rings
1512,10.338712532548499
1401,11.504335292274668
509,10.805311071733177
2883,8.004956288265097
3259,10.8284434353022
2135,9.611951164989971
1499,9.490343285069773
3597,11.831989258230958
2414,8.095341268852662

==> submissi

id,rings
1512,10.16856920474763
1401,11.267367936361739
509,11.126073262885072
2883,8.087839938142217
3259,10.747164098912076
2135,9.059914612569791
1499,9.751131252839723
3597,11.641050337940516
2414,7.9107575965198365

==> submissions/abalone-submission-p-o4-gnb.csv <==
id,rings
1512,11
1401,11
509,8
2883,7
3259,11
2135,9
1499,10
3597,11
2414,6

==> submissions/abalone-submission-p-o4-gpr.csv <==
id,rings
1512,9.355318978028961
1401,11.063983009922666
509,13.417888380304547
2883,8.363015852088964
3259,11.895245616316586
2135,9.145761142158406
1499,9.281954381198219
3597,11.88588278496907
2414,7.990448720025441

==> submissions/abalone-submission-p-o4-knn.csv <==
id,rings
1512,10.450920122076752
1401,10.547940060237595
509,11.000246426623573
2883,8.294157251191635
3259,12.770812891655028
2135,8.334921659332283
1499,9.948263505915676
3597,11.613391575547311
2414,7.302274816638973

==> submissions/abalone-submission-p-o4-krr.csv <==
id,rings
1512,9.1929540319083
1401,11.37936775746197
5

id,rings
1512,10.325233644859813
1401,12.621832358674464
509,12.194805194805195
2883,7.460887949260043
3259,12.621832358674464
2135,9.815261044176706
1499,10.325233644859813
3597,12.621832358674464
2414,7.460887949260043

==> submissions/abalone-submission-p-o9-0.50-etr.csv <==
id,rings
1512,10.57
1401,10.495
509,10.39
2883,8.49
3259,11.29
2135,8.85
1499,9.945
3597,12.03
2414,8.075

==> submissions/abalone-submission-p-o9-0.50-gbr.csv <==
id,rings
1512,9.87285165348793
1401,10.851799372157407
509,11.847504663122145
2883,7.91268603503088
3259,11.360657247241175
2135,9.709072937919581
1499,9.742350583238855
3597,12.702666916278588
2414,8.28399656533435

==> submissions/abalone-submission-p-o9-0.50-gnb.csv <==
id,rings
1512,11
1401,11
509,8
2883,7
3259,11
2135,9
1499,10
3597,11
2414,6

==> submissions/abalone-submission-p-o9-0.50-gpr.csv <==
id,rings
1512,9.450294320761408
1401,11.0727681606621
509,13.226506458349014
2883,8.353146624774446
3259,11.876225907909976
2135,9.19481535748421
149