## Cocomo81

In [10]:
import numpy as np

import pandas as pd
import matplotlib.pyplot as plt

from scipy.io import arff

#algoritmos ia
from sklearn.neighbors import KNeighborsRegressor 
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn import svm
from sklearn.tree import DecisionTreeRegressor

from sklearn import model_selection as ms
from sklearn.model_selection import GridSearchCV

## Preparando dados

In [11]:
#Carregando dados

data, meta = arff.loadarff('datasets/cocomo81.arff')
df = pd.DataFrame(data)
df.head()

Unnamed: 0,rely,data,cplx,time,stor,virt,turn,acap,aexp,pcap,vexp,lexp,modp,tool,sced,loc,actual
0,0.88,1.16,0.7,1.0,1.06,1.15,1.07,1.19,1.13,1.17,1.1,1.0,1.24,1.1,1.04,113.0,2040.0
1,0.88,1.16,0.85,1.0,1.06,1.0,1.07,1.0,0.91,1.0,0.9,0.95,1.1,1.0,1.0,293.0,1600.0
2,1.0,1.16,0.85,1.0,1.0,0.87,0.94,0.86,0.82,0.86,0.9,0.95,0.91,0.91,1.0,132.0,243.0
3,0.75,1.16,0.7,1.0,1.0,0.87,1.0,1.19,0.91,1.42,1.0,0.95,1.24,1.0,1.04,60.0,240.0
4,0.88,0.94,1.0,1.0,1.0,0.87,1.0,1.0,1.0,0.86,0.9,0.95,1.24,1.0,1.0,16.0,33.0


In [12]:
#removendo linhas com atributos nulos ou vazios

df = df.dropna()

#normalizando dados - #min-max df=(df-df.min())/(df.max()-df.min()) - #zscore df=(df-df.mean())/df.std()

df = (df-df.mean())/df.std()

#separando atributo classe actual para entrada nos modelos #df.columns[-1]

X = df.drop(df.columns[-1], axis=1)
y = df.take([-1], axis=1).values.ravel()

## Treinando e avaliando o desempenho dos modelos

In [13]:
cv = ms.KFold(n_splits=3, shuffle=True, random_state=1)
models = []
#Instanciando os modelos #0 - nome, 1 - instância, 2 - parâmetros
models.append(['DT', DecisionTreeRegressor(), { "criterion" : ["squared_error", "friedman_mse", "absolute_error"],
                                                "splitter": ["best", "random"],
                                                "max_depth": np.arange(4,12),
                                                "random_state": np.arange(2,6)
                                                }])
models.append(['KNN', KNeighborsRegressor(), {  "n_neighbors" : np.arange(1,10),
                                                "weights": ["uniform", "distance"],
                                                "algorithm": ['auto', 'ball_tree', 'kd_tree', 'brute']
                                                }])
models.append(['MLP', MLPRegressor(), {"hidden_layer_sizes": np.arange(10,100,10),
                                                "activation": ['identity', 'logistic', 'tanh', 'relu'],
                                                "solver": ['lbfgs', 'sgd', 'adam'],
                                                "random_state": [2], #np.arange(2,6) fixado para reduzir o tempo de processamento.
                                                "max_iter": [1500]
                                                }])
models.append(['RFR', RandomForestRegressor(), {"n_estimators": np.arange(5,20,5),
                                                    "max_depth" : np.arange(2,14,2),
                                                    "random_state": np.arange(2,6),
                                                    }])
models.append(['SVR', svm.SVR(), { "kernel" : [ 'poly', 'linear', 'rbf', 'sigmoid'], 
                                        "C": np.arange(2,14,2), 
                                        "epsilon": np.arange(0.001,0.1,0.001), 
                                    }])

In [14]:
t_mean = 0
for model in models:
    clf = GridSearchCV(model[1], model[2], scoring='neg_mean_absolute_error',  cv=cv,  n_jobs=-1)
    clf.fit(X, y)
    print(">", model[0], ">------ MAE: %.3f" % (1 + clf.best_score_), " ----- STD: %.3f" % (clf.cv_results_['std_test_score'][clf.best_index_]), " ----- Best Params:", (clf.best_params_))
    t_mean = t_mean + (1 + clf.best_score_)

print("> Média Total:  %.3f" % (t_mean/len(models))) 

> DT >------ MAE: 0.753  ----- STD: 0.101  ----- Best Params: {'criterion': 'absolute_error', 'max_depth': 8, 'random_state': 3, 'splitter': 'random'}
> KNN >------ MAE: 0.682  ----- STD: 0.194  ----- Best Params: {'algorithm': 'auto', 'n_neighbors': 4, 'weights': 'distance'}
> MLP >------ MAE: 0.656  ----- STD: 0.138  ----- Best Params: {'activation': 'relu', 'hidden_layer_sizes': 60, 'max_iter': 1500, 'random_state': 2, 'solver': 'lbfgs'}
> RFR >------ MAE: 0.709  ----- STD: 0.142  ----- Best Params: {'max_depth': 6, 'n_estimators': 15, 'random_state': 3}
> SVR >------ MAE: 0.696  ----- STD: 0.140  ----- Best Params: {'C': 10, 'epsilon': 0.023, 'kernel': 'linear'}
> Média Total:  0.699
