## Cocomo81

In [None]:
import numpy as np

import pandas as pd
import matplotlib.pyplot as plt

from scipy.io import arff

#algoritmos ia
from sklearn.neighbors import KNeighborsRegressor 
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn import svm
from sklearn.tree import DecisionTreeRegressor

from sklearn import model_selection as ms
from sklearn.model_selection import GridSearchCV

## Preparando dados

In [None]:
#Carregando dados

data, meta = arff.loadarff('datasets/cocomo81.arff')
df = pd.DataFrame(data)
df.head()

In [None]:
#removendo linhas com atributos nulos ou vazios

df = df.dropna()

#normalizando dados - #min-max df=(df-df.min())/(df.max()-df.min()) - #zscore df=(df-df.mean())/df.std()

df = (df-df.mean())/df.std()

#separando atributo classe actual para entrada nos modelos #df.columns[-1]

X = df.drop(df.columns[-1], axis=1)
y = df.take([-1], axis=1)

## Treinando e avaliando o desempenho dos modelos

In [None]:
np.arange(0.001,0.1,0.001)

In [None]:
np.arange(2,11,2)

In [317]:
models = []
#Instanciando os modelos #0 - nome, 1 - instância, 2 - parâmetros
#models.append(['DT', DecisionTreeRegressor())
"""models.append(['KNN', KNeighborsRegressor(), {"p": [1,2], 
                                                    "n_neighbors" : np.arange(1,11), #1 a 10
                                                    "weights": ["uniform", "distance"]
                                                }])"""
models.append(['MLP', MLPRegressor(), {"hidden_layer_sizes": np.arange(10,100,10),
                                        "activation": ['identity', 'logistic', 'tanh', 'relu'],
                                        "solver": ['lbfgs', 'sgd', 'adam'],
                                        "alpha": np.arange(0.0001,0.001,0.0002),
                                        "learning_rate" : ['constant', 'invscaling', 'adaptive'],
                                        "random_state": [2],
                                        "max_iter": [3000]
                                        }])
"""models.append(['RFR', RandomForestRegressor(), {"n_estimators": np.arange(5,20,5),
                                                    "max_depth" : np.arange(2,14,2),
                                                    "random_state": np.arange(2,14,2)
                                                    }])"""
"""models.append(['SVR', svm.SVR(), { "kernel" : [ 'poly', 'linear', 'rbf', 'sigmoid'], 
                                        "C": np.arange(2,14,2), #1 a 10
                                        "epsilon": np.arange(0.001,0.1,0.002), # 0.1 a 0.9
                                    }])"""

cv = ms.KFold(n_splits=3, shuffle=True, random_state=1)

In [318]:
for model in models:
    clf = GridSearchCV(model[1], model[2], scoring='neg_mean_absolute_error',  cv=cv,  n_jobs=-1)
    clf.fit(X, y.values.ravel())
    print(">", model[0], ">------ MAE: %.4f" % (np.absolute(clf.best_score_)), " ----- STD: %.4f" % (clf.cv_results_['std_test_score'][clf.best_index_]), " ----- Best Params:", (clf.best_params_))

> MLP >------ MAE: 0.3438  ----- STD: 0.1377  ----- Best Params: {'activation': 'relu', 'alpha': 0.0007000000000000001, 'hidden_layer_sizes': 60, 'learning_rate': 'constant', 'max_iter': 3000, 'random_state': 2, 'solver': 'lbfgs'}


In [None]:
scores = ms.cross_val_score(KNeighborsRegressor(n_neighbors=4, weights='distance'), X, y, scoring='neg_mean_absolute_error', cv=cv, n_jobs=-1)

scores = np.absolute(scores)
s_mean = np.mean(scores)
s_std = np.std(scores)
    
print('Scores:', scores, 'MAE: %.4f' % (s_mean), 'STD: %.4f' % (s_std)) 

In [None]:


parameters={"splitter":["best","random"],
            "max_depth" : np.arange(1,10), #[ 1,  3,  5,  7,  9, 11, 13]
            "criterion": ["squared_error", "absolute_error"]
}
dt = DecisionTreeRegressor()
clf = GridSearchCV(dt, parameters, scoring='neg_mean_absolute_error',  cv=3,  n_jobs=-1)
clf.fit(X, y)

print(clf.best_estimator_)
print(clf.best_score_)

pd.DataFrame(clf.cv_results_)[['params',
                                'mean_test_score',
                                'rank_test_score',
                                'std_test_score']].sort_index(by=["rank_test_score"])



In [None]:
for model in models:
    
    scores = ms.cross_val_score(model[1], X, y, scoring='neg_mean_absolute_error', cv=cv, n_jobs=-1)
    #converter para positivo.
    scores = np.absolute(scores)
    s_mean = np.mean(scores)
    s_std = np.std(scores)
    
    print('-', model[0],'------------------------------------')
    print('Scores:', scores, 'MAE: %.4f' % (s_mean), 'STD: %.4f' % (s_std)) 

In [None]:
dd