## Importação dos pacotes

In [1]:
# importar pacotes necessários
import numpy as np
import pandas as pd

In [2]:
# importar os pacotes necessários para os algoritmos de regressão
from sklearn.ensemble import BaggingRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import GradientBoostingRegressor
#from sklearn.ensemble import VotingRegressor
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.kernel_ridge import KernelRidge
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet
from sklearn.linear_model import Lars
from sklearn.linear_model import LassoLars
from sklearn.linear_model import OrthogonalMatchingPursuit
from sklearn.linear_model import BayesianRidge
from sklearn.linear_model import ARDRegression
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import SGDRegressor
from sklearn.linear_model import Perceptron
from sklearn.linear_model import PassiveAggressiveRegressor
from sklearn.linear_model import RANSACRegressor
from sklearn.linear_model import TheilSenRegressor
from sklearn.linear_model import HuberRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neighbors import RadiusNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.svm import SVR
from sklearn.svm import NuSVR
from sklearn.svm import LinearSVR
from sklearn.tree import DecisionTreeRegressor

## Carga dos dados de entrada

In [3]:
# carregar arquivo de dados de treino
train_data = pd.read_csv('abalone-train.csv', index_col='id')

In [4]:
# carregar arquivo de dados de teste
test_data = pd.read_csv('abalone-test.csv', index_col='id')

In [5]:
data_sex = pd.get_dummies(train_data['sex'], prefix='sex')
train_data = train_data.join(data_sex)
train_data.drop('sex', axis=1, inplace=True)

data_sex = pd.get_dummies(test_data['sex'], prefix='sex')
test_data = test_data.join(data_sex)
test_data.drop('sex', axis=1, inplace=True)

## Seleção dos dados de treino e teste

In [6]:
# definir dados de treino

X_train = train_data.drop(['rings'], axis=1) # tudo, exceto a coluna alvo
y_train = train_data['rings'] # apenas a coluna alvo

print('Forma dos dados de treino:', X_train.shape, y_train.shape)

Forma dos dados de treino: (2784, 10) (2784,)


In [7]:
# definir dados de teste

X_test = test_data # tudo, já que não possui a coluna alvo

print('Forma dos dados de teste:', X_test.shape)

Forma dos dados de teste: (1393, 10)


## Treinamento dos modelos e geração dos resultados 

In [9]:
models = []
models.append(('LinReg', LinearRegression(n_jobs=2, fit_intercept=True, normalize=True)))
models.append(('LogReg', LogisticRegression(random_state=42, n_jobs=2, multi_class='auto', C=1000, solver='newton-cg')))
models.append(('Ridge', Ridge(random_state=42, alpha=0.1, fit_intercept=False, normalize=True)))
models.append(('KNN', KNeighborsRegressor(n_jobs=2, n_neighbors=11, weights='distance')))
models.append(('SVM', SVR(gamma='auto', kernel='linear')))
models.append(('RF', RandomForestRegressor(random_state=42, n_jobs=2, n_estimators=100, max_depth=7)))
models.append(('GB', GradientBoostingRegressor(random_state=42)))
models.append(('DT', DecisionTreeRegressor(random_state=42)))
models.append(('GP', GaussianProcessRegressor()))
models.append(('MLP', MLPRegressor(random_state=42)))

In [10]:
for name, model in models:
    print(model, '\n')
    
    # treinar o modelo
    model.fit(X_train, y_train)
    
    # executar previsão usando o modelo
    y_pred = model.predict(X_test)
    
    # gerar dados de envio (submissão)
    submission = pd.DataFrame({
      'id': X_test.index,
      'rings': y_pred
    })
    submission.set_index('id', inplace=True)

    # gerar arquivo CSV para o envio
    filename = 'abalone-submission-p-%s.csv' % name.lower()
    submission.to_csv(filename)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=2, normalize=True) 

LogisticRegression(C=1000, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='auto', n_jobs=2,
          penalty='l2', random_state=42, solver='newton-cg', tol=0.0001,
          verbose=0, warm_start=False) 

Ridge(alpha=0.1, copy_X=True, fit_intercept=False, max_iter=None,
   normalize=True, random_state=42, solver='auto', tol=0.001) 

KNeighborsRegressor(algorithm='auto', leaf_size=30, metric='minkowski',
          metric_params=None, n_jobs=2, n_neighbors=11, p=2,
          weights='distance') 

SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='auto',
  kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False) 

RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=7,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_le



In [11]:
# verificar conteúdo dos arquivos gerados
!head abalone-submission-p-*.csv

==> abalone-submission-p-dt.csv <==
id,rings
1512,11.0
1401,12.0
509,17.0
2883,9.0
3259,9.0
2135,11.0
1499,9.0
3597,13.0
2414,8.0

==> abalone-submission-p-gb.csv <==
id,rings
1512,10.009380909708643
1401,11.151726312062774
509,12.018518145626894
2883,7.810752670775166
3259,11.15306370585676
2135,9.317058657838714
1499,10.05816996813471
3597,12.123627728186323
2414,7.720781144138582

==> abalone-submission-p-gp.csv <==
id,rings
1512,9.91204833984375
1401,10.514533996582031
509,15.443496704101562
2883,7.8646697998046875
3259,11.645833015441895
2135,7.863286018371582
1499,8.795156955718994
3597,8.822440147399902
2414,9.293121337890625

==> abalone-submission-p-knn.csv <==
id,rings
1512,10.380819160905158
1401,10.29977072520611
509,11.138693350282232
2883,8.339181558178032
3259,13.03636964431456
2135,8.069914555004514
1499,9.798677764639338
3597,11.833865809371813
2414,7.273008642499337

==> abalone-submission-p-linreg.csv <==
id,rings
151