## Desharnais

In [2]:
import numpy as np

import pandas as pd
import matplotlib.pyplot as plt

from scipy.io import arff

#algoritmos ia
from sklearn.neighbors import KNeighborsRegressor 
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn import svm 

from sklearn import model_selection as ms
from sklearn.datasets import make_regression

## Preparando Dados

In [26]:
#Carregando dados

data, meta = arff.loadarff('datasets/desharnais.arff')
df = pd.DataFrame(data)
df.head()

Unnamed: 0,Project,TeamExp,ManagerExp,YearEnd,Length,Effort,Transactions,Entities,PointsAdjust,Envergure,PointsNonAjust,Language
0,1.0,1.0,4.0,85.0,12.0,5152.0,253.0,52.0,305.0,34.0,302.0,b'1'
1,2.0,0.0,0.0,86.0,4.0,5635.0,197.0,124.0,321.0,33.0,315.0,b'1'
2,3.0,4.0,4.0,85.0,1.0,805.0,40.0,60.0,100.0,18.0,83.0,b'1'
3,4.0,0.0,0.0,86.0,5.0,3829.0,200.0,119.0,319.0,30.0,303.0,b'1'
4,5.0,0.0,0.0,86.0,4.0,2149.0,140.0,94.0,234.0,24.0,208.0,b'1'


In [27]:
def one_hot_enconder(X_transform, transform_data): #X_transform = data, transform_data = colunas
    
    for col in transform_data:
        cols = X_transform[col].unique() #retorna os valores da categoria que serão transformados em colunas 
        col_position = X_transform.columns.get_loc(col) #retorna posição da coluna, a partir dessa posição será adicionado as demais
        col_values = X_transform.pop(col) #retorna os valores da coluna primordial e apaga e remove 
        for c in cols: 
            X_transform.insert(col_position, str(col) + "_" + str(c).replace('b\'','').replace('\'',''), col_values.eq(c))
            col_position += 1

    return X_transform.replace({True: 1, False: 0})  

In [28]:
#removendo atributos desnecessário "Project" (id do projeto), "PointsAdjust" (PoinstNonAjust + fator
#de ajustes na técnica de pontos por função) e YearEnd.

df = df.drop(['Project','YearEnd', 'PointsAdjust'], axis=1)

#removendo linhas com atributos nulos ou vazios

df = df.dropna()

#Convertendo atributos categóricos em colunas. "OneHotEncoder"

transform_data = ['Language']
df =  one_hot_enconder(df, transform_data)

#normalizando dados - #min-max df=(df-df.min())/(df.max()-df.min()) - #zscore df=(df-df.mean())/df.std()

df = (df-df.mean())/df.std()

#separando atributo classe Effort para entrada nos modelos

X = df.drop(['Effort'], axis=1)
y = df['Effort']

## Treinando e avaliando o desempenho dos modelos

In [29]:
models = []
#Instanciando os modelos
models.append(['KNN', KNeighborsRegressor(n_neighbors=5)]) 
models.append(['MLP', MLPRegressor(activation='logistic', solver='sgd', alpha=0.02, max_iter=300, hidden_layer_sizes=200)])
models.append(['RFR', RandomForestRegressor()])
models.append(['SVR', svm.SVR()])

In [30]:
for model in models:
    cv = ms.KFold(n_splits=3, shuffle=True, random_state=1)
    scores = ms.cross_val_score(model[1], X, y, scoring='neg_mean_absolute_error', cv=cv, n_jobs=-1)
    #converter para positivo.
    scores = np.absolute(scores)
    s_mean = np.mean(scores)
    s_std = np.std(scores)
    
    print('-', model[0],'------------------------------------')
    print('Scores:', scores, 'MAE: %.4f' % (s_mean), 'STD: %.4f' % (s_std)) 

- KNN ------------------------------------
Scores: [0.65863484 0.54939163 0.46204835] MAE: 0.5567 STD: 0.0804
- MLP ------------------------------------
Scores: [0.82196392 0.34494041 0.45481773] MAE: 0.5406 STD: 0.2040
- RFR ------------------------------------
Scores: [0.63216137 0.52002376 0.44766894] MAE: 0.5333 STD: 0.0759
- SVR ------------------------------------
Scores: [0.75838534 0.39715315 0.42373082] MAE: 0.5264 STD: 0.1644


