In [None]:
# experimento com todas as imagens das Palmer para predição do tempo de cada amostra, usando CV 5

In [1]:
import cv2
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os 
import math
import scipy

from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import ShuffleSplit, KFold

%load_ext autoreload
plt.rcParams['figure.figsize'] = [20, 15]

path = os.getcwd()+'\\..\\imagens\\'
sys.path.append(os.getcwd()+'\\..\\scripts\\')
from MNG import MNG
from MNGFeatures import MNGFeatures
from MNGFeaturesMeans import MNGFeaturesMeans
from MNGFeaturesSize import MNGFeaturesSize
from MNGModel import MNGModel

In [57]:
indexes = ['fold1', 'fold2', 'fold3', 'fold4', 'fold5', 'media']
columns = ['MLR', 'RF100', 'RF200', 'RF300', 'RF400', 'RF500', 'RF600', 'RF700']
data_path = 'C:\\Users\\juju-\\Desktop\\projetos\\mng\\features\\all_features.csv'
data = pd.read_csv(data_path, sep=';', index_col=0)
cv_num = 5

# ss = ShuffleSplit(n_splits=cv_num, test_size=0.2,random_state=0)
kf = KFold(n_splits=5, shuffle=True)

train = [[], [], [], [], []]
test = [[], [], [], [], []]

In [65]:
def compute_models(data):
    df_r2 = pd.DataFrame(index=indexes, columns=columns)
    df_rmse = pd.DataFrame(index=indexes, columns=columns)
    
    X = data.drop(columns='time')
    Y = data['time']

    i = 0
    for train_i, test_i in kf.split(X):
        train[i] = train_i
        test[i] = test_i
        X_train, X_test = X.iloc[train_i], X.iloc[test_i]
        Y_train, Y_test = Y.iloc[train_i], Y.iloc[test_i]
    
        mlr_model = LinearRegression().fit(X_train, Y_train)
        Y_predicted = mlr_model.predict(X_test)
    
        r2 = r2_score(Y_test, Y_predicted)
        rmse= math.sqrt(mean_squared_error(Y_test, Y_predicted))
    
        df_r2['MLR'].iloc[i] = r2
        df_rmse['MLR'].iloc[i] = rmse
        
        for tree in np.arange(100, 200, 100):
            rf_model = RandomForestRegressor(n_estimators=tree).fit(X_train, Y_train)
            Y_predicted = rf_model.predict(X_test)
            
            r2 = r2_score(Y_test, Y_predicted)
            rmse= math.sqrt(mean_squared_error(Y_test, Y_predicted))
            
            df_r2['RF'+str(tree)].iloc[i] = r2 
            df_rmse['RF'+str(tree)].iloc[i] = rmse 
    
        i = i + 1
    
    df_r2['MLR'].iloc[-1] = df_r2['MLR'].mean()
    df_rmse['MLR'].iloc[-1] = df_rmse['MLR'].mean()
    
    for tree in np.arange(100, 701, 100):
        df_r2['RF'+str(tree)].iloc[-1] = df_r2['RF'+str(tree)].mean()
        df_rmse['RF'+str(tree)].iloc[-1] = df_rmse['RF'+str(tree)].mean()
        
    return df_r2, df_rmse

In [18]:
# mean and size
cols = ['time', 'mean_R_full', 'mean_G_full', 'mean_B_full', 'mean_H_full', 'mean_S_full', 'mean_V_full', 'mean_L_full', \
           'mean_a_full', 'mean_b_full', 'area', 'diameter', 'height', 'width']
r2, rmse = compute_models(data[cols])

In [107]:
# mean (A1 group)
cols = ['time', 'mean_R_full', 'mean_G_full', 'mean_B_full', 'mean_H_full', 'mean_S_full', 'mean_V_full', 'mean_L_full', \
           'mean_a_full']
r2, rmse = compute_models(data[cols])

In [21]:
# size (A2 group)
cols = ['time', 'area', 'diameter', 'height', 'width']
r2, rmse = compute_models(data[cols])

In [23]:
# dominant hsv (A3 group)
cols = ['time', 'dominant_HSV']
r2, rmse = compute_models(data[cols])

In [24]:
# rates (A4 group)
cols = ['time', 'RG_rate', 'RB_rate', 'SH_rate']
r2, rmse = compute_models(data[cols])

In [26]:
# A5 group
cols = ['time','RG_diff_full','RB_diff_full','GB_diff_full','apex_R','apex_G','apex_B','equator_R','equator_G','equator_B',\
        'stalk_R','stalk_G','stalk_B','apex_equator_R_diff','equator_stalk_R_diff','apex_stalk_R_diff','apex_equator_G_diff',\
        'equator_stalk_G_diff','apex_stalk_G_diff','apex_equator_B_diff','equator_stalk_B_diff','apex_stalk_B_diff', \
        'long_gradient', 'mean_R_full', 'mean_G_full', 'mean_B_full']
r2, rmse = compute_models(data[cols])

In [32]:
# A6 group
cols = ['time', 'area']
r2, rmse = compute_models(data[cols])

In [36]:
# A7 group
cols = ['time', 
        'region_0_R','region_0_G','region_0_B','region_1_R','region_1_G','region_1_B','region_2_R','region_2_G',\
        'region_2_B','region_3_R','region_3_G','region_3_B','region_4_R','region_4_G','region_4_B','region_0_H','region_0_S',\
        'region_0_V','region_1_H','region_1_S','region_1_V','region_2_H','region_2_S','region_2_V','region_3_H','region_3_S',\
        'region_3_V','region_4_H','region_4_S','region_4_V','region_0_L','region_0_a','region_0_b','region_1_L','region_1_a',\
        'region_1_b','region_2_L','region_2_a','region_2_b','region_3_L','region_3_a','region_3_b','region_4_L','region_4_a',\
        'region_4_b']
r2, rmse = compute_models(data[cols])

In [40]:
# A8 group
cols = ['time', 
        'region_0_R','region_0_G','region_0_B','region_1_R','region_1_G','region_1_B','region_2_R','region_2_G',\
        'region_2_B','region_3_R','region_3_G','region_3_B','region_4_R','region_4_G','region_4_B','region_0_H','region_0_S',\
        'region_0_V','region_1_H','region_1_S','region_1_V','region_2_H','region_2_S','region_2_V','region_3_H','region_3_S',\
        'region_3_V','region_4_H','region_4_S','region_4_V','region_0_L','region_0_a','region_0_b','region_1_L','region_1_a',\
        'region_1_b','region_2_L','region_2_a','region_2_b','region_3_L','region_3_a','region_3_b','region_4_L','region_4_a',\
        'region_4_b', 'region_0_R','region_0_G','region_0_B','region_1_R','region_1_G','region_1_B','region_2_R','region_2_G',\
        'region_2_B','region_3_R','region_3_G','region_3_B','region_4_R','region_4_G','region_4_B','region_0_H','region_0_S',\
        'region_0_V','region_1_H','region_1_S','region_1_V','region_2_H','region_2_S','region_2_V','region_3_H','region_3_S',\
        'region_3_V','region_4_H','region_4_S','region_4_V','region_0_L','region_0_a','region_0_b','region_1_L','region_1_a',\
        'region_1_b','region_2_L','region_2_a','region_2_b','region_3_L','region_3_a','region_3_b','region_4_L','region_4_a',\
        'region_4_b','0_1_R_diff','0_1_G_diff','0_1_B_diff','0_2_R_diff','0_2_G_diff','0_2_B_diff','0_3_R_diff','0_3_G_diff',\
        '0_3_B_diff','0_4_R_diff','0_4_G_diff','0_4_B_diff','1_2_R_diff','1_2_G_diff','1_2_B_diff','1_3_R_diff','1_3_G_diff',\
        '1_3_B_diff','1_4_R_diff','1_4_G_diff','1_4_B_diff','2_3_R_diff','2_3_G_diff','2_3_B_diff','2_4_R_diff','2_4_G_diff',\
        '2_4_B_diff','3_4_R_diff','3_4_G_diff','3_4_B_diff','0_1_H_diff','0_1_S_diff','0_1_V_diff','0_2_H_diff','0_2_S_diff',\
        '0_2_V_diff','0_3_H_diff','0_3_S_diff','0_3_V_diff','0_4_H_diff','0_4_S_diff','0_4_V_diff','1_2_H_diff','1_2_S_diff',\
        '1_2_V_diff','1_3_H_diff','1_3_S_diff','1_3_V_diff','1_4_H_diff','1_4_S_diff','1_4_V_diff','2_3_H_diff','2_3_S_diff',\
        '2_3_V_diff','2_4_H_diff','2_4_S_diff','2_4_V_diff','3_4_H_diff','3_4_S_diff','3_4_V_diff','0_1_L_diff','0_1_a_diff',\
        '0_1_b_diff','0_2_L_diff','0_2_a_diff','0_2_b_diff','0_3_L_diff','0_3_a_diff','0_3_b_diff','0_4_L_diff','0_4_a_diff',\
        '0_4_b_diff','1_2_L_diff','1_2_a_diff','1_2_b_diff','1_3_L_diff','1_3_a_diff','1_3_b_diff','1_4_L_diff','1_4_a_diff',\
        '1_4_b_diff','2_3_L_diff','2_3_a_diff','2_3_b_diff','2_4_L_diff','2_4_a_diff','2_4_b_diff','3_4_L_diff','3_4_a_diff',\
        '3_4_b_diff']

r2, rmse = compute_models(data[cols])

In [58]:
# group A9
cols = ['time', 'mean_L_full', 'mean_a_full', 'mean_b_full', 'bcd', 'cd', 'dd']

r2, rmse = compute_models(data[cols])

In [61]:
# group A10
cols = ['time', 'bcd', 'cd', 'dd']

r2, rmse = compute_models(data[cols])

In [44]:
# group A11
cols = ['time', 'mean_H_full', 'mean_S_full', 'mean_V_full']

r2, rmse = compute_models(data[cols])

In [47]:
# group A12
cols = ['time', 'mean_H_full', 'mean_S_full', 'mean_V_full', 'mean_L_full', 'mean_a_full', 'mean_b_full']

r2, rmse = compute_models(data[cols])

In [51]:
# group A13
cols = ['time', 'mean_H_full', 'mean_S_full', 'mean_V_full', 'mean_R_full', 'mean_G_full', 'mean_B_full', 'RG_rate', 'RB_rate',\
       'SH_rate']

r2, rmse = compute_models(data[cols])

In [54]:
# group A14
cols = ['time', 'mean_b_full', 'area', 'diameter']

r2, rmse = compute_models(data[cols])

In [66]:
# ALL - novo
r2, rmse = compute_models(data.drop(columns=['region_0_R','region_0_G','region_0_B','region_1_R','region_1_G','region_1_B','region_2_R','region_2_G',\
        'region_2_B','region_3_R','region_3_G','region_3_B','region_4_R','region_4_G','region_4_B','region_0_H','region_0_S',\
        'region_0_V','region_1_H','region_1_S','region_1_V','region_2_H','region_2_S','region_2_V','region_3_H','region_3_S',\
        'region_3_V','region_4_H','region_4_S','region_4_V','region_0_L','region_0_a','region_0_b','region_1_L','region_1_a',\
        'region_1_b','region_2_L','region_2_a','region_2_b','region_3_L','region_3_a','region_3_b','region_4_L','region_4_a',\
        'region_4_b', 'region_0_R','region_0_G','region_0_B','region_1_R','region_1_G','region_1_B','region_2_R','region_2_G',\
        'region_2_B','region_3_R','region_3_G','region_3_B','region_4_R','region_4_G','region_4_B','region_0_H','region_0_S',\
        'region_0_V','region_1_H','region_1_S','region_1_V','region_2_H','region_2_S','region_2_V','region_3_H','region_3_S',\
        'region_3_V','region_4_H','region_4_S','region_4_V','region_0_L','region_0_a','region_0_b','region_1_L','region_1_a',\
        'region_1_b','region_2_L','region_2_a','region_2_b','region_3_L','region_3_a','region_3_b','region_4_L','region_4_a',\
        'region_4_b','0_1_R_diff','0_1_G_diff','0_1_B_diff','0_2_R_diff','0_2_G_diff','0_2_B_diff','0_3_R_diff','0_3_G_diff',\
        '0_3_B_diff','0_4_R_diff','0_4_G_diff','0_4_B_diff','1_2_R_diff','1_2_G_diff','1_2_B_diff','1_3_R_diff','1_3_G_diff',\
        '1_3_B_diff','1_4_R_diff','1_4_G_diff','1_4_B_diff','2_3_R_diff','2_3_G_diff','2_3_B_diff','2_4_R_diff','2_4_G_diff',\
        '2_4_B_diff','3_4_R_diff','3_4_G_diff','3_4_B_diff','0_1_H_diff','0_1_S_diff','0_1_V_diff','0_2_H_diff','0_2_S_diff',\
        '0_2_V_diff','0_3_H_diff','0_3_S_diff','0_3_V_diff','0_4_H_diff','0_4_S_diff','0_4_V_diff','1_2_H_diff','1_2_S_diff',\
        '1_2_V_diff','1_3_H_diff','1_3_S_diff','1_3_V_diff','1_4_H_diff','1_4_S_diff','1_4_V_diff','2_3_H_diff','2_3_S_diff',\
        '2_3_V_diff','2_4_H_diff','2_4_S_diff','2_4_V_diff','3_4_H_diff','3_4_S_diff','3_4_V_diff','0_1_L_diff','0_1_a_diff',\
        '0_1_b_diff','0_2_L_diff','0_2_a_diff','0_2_b_diff','0_3_L_diff','0_3_a_diff','0_3_b_diff','0_4_L_diff','0_4_a_diff',\
        '0_4_b_diff','1_2_L_diff','1_2_a_diff','1_2_b_diff','1_3_L_diff','1_3_a_diff','1_3_b_diff','1_4_L_diff','1_4_a_diff',\
        '1_4_b_diff','2_3_L_diff','2_3_a_diff','2_3_b_diff','2_4_L_diff','2_4_a_diff','2_4_b_diff','3_4_L_diff','3_4_a_diff',\
        '3_4_b_diff']))

In [69]:
# ALL + novo

r2, rmse = compute_models(data)