In [1]:
import cv2
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os 
import timeit
import math
from shutil import copyfile

from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.model_selection import cross_val_score

%load_ext autoreload
plt.rcParams['figure.figsize'] = [20, 15]

path = os.getcwd()+'\\..\\imagens\\'
sys.path.append(os.getcwd()+'\\..\\scripts\\')
from MNG import MNG
from MNGFolds import MNGFolds
from MNGSegmentation import MNGSegmentation
from MNGPreProcessing import MNGPreProcessing
from MNGContour import MNGContour
from MNGFeaturesMeans import MNGFeaturesMeans
from MNGModel import MNGModel
from MNGFeaturesSize import MNGFeaturesSize

In [2]:
path = os.getcwd() + '\\..\\imagens\\'
imgs = os.listdir(path)
img_names = [img.split('.')[0] for img in imgs]

mng = MNG(path, img_names, 'sst')
mng.segmentation = MNGSegmentation()
mng.preprocessing = MNGPreProcessing(mng.segmentation)
mng.contour = MNGContour()

features_mlr = mng.features.feature_names[:9]
mng.features.current_features = features_mlr
mng.features.current_features_name = 'means'
mng.features.new_df()

In [28]:
def pre_process(img, ori, dest):
    bgr = cv2.imread(ori+img)
    med = mng.preprocessing.median_filter(bgr, 11)
    filt, __ = mng.segmentation.otsu_thresholding(med)
    filt = mng.preprocessing.opening_operation(filt, np.ones((15,15), np.uint8))
    filt = mng.preprocessing.remove_shadow(filt, 85)
    filt = mng.preprocessing.closing_operation(filt, med, np.ones((1200,1200), np.uint8))
    filt[filt==0] = 255
    filt = mng.contour.cut_image(filt)
    cv2.imwrite(dest+img, filt)

In [25]:
path2 = path + '\\..\\pendentes\\' 
path3 = path + '\\..\\pendentes2\\'

imgs = os.listdir(path)

In [26]:
erradas = pd.read_csv(path + '\\..\\sombras.csv', sep=';', dtype='object').values
erradas = [err[0] for err in erradas]
dict_erradas = dict((el,0) for el in erradas)

for img in imgs:
    if img[:4] in dict_erradas:
        copyfile(path+img, path2+img)

In [29]:
imgs = os.listdir(path2)

for img in imgs:
    pre_process(img, path2, path3)

99


In [None]:
imgs = os.listdir(path2)
nums = np.arange(1,1681)

for img,num in zip(imgs,nums):
    print(num)
    bgr = cv2.imread(path2+img)
    mng.features.extract_features(bgr, img)
    
file_path = mng.features.save_data()

In [None]:
file_path = 'C:\\Users\\juju-\\Desktop\\projetos\\mng\\features\\means_all_half.csv'

palmer_sst 		= 'C:\\Users\\juju-\\Desktop\\projetos\\mng\\palmer_sst.csv'
tommy_sst 		= 'C:\\Users\\juju-\\Desktop\\projetos\\mng\\tommy_sst.csv'
mng.features.add_target(file_path, palmer_sst, tommy_sst)

In [None]:
mng.folds = MNGFolds(path, file_path, 7)
mng.folds.separate_folds()

In [424]:
model_type = 'all'
mng.model = MNGModel(path, mng.folds, model_type, 'sst')
mng.model.build_mlr_model(model_type, features_mlr)
mng.model.build_rf_model(model_type)

In [450]:
df = pd.read_csv('C:\\Users\\juju-\\Desktop\\projetos\\mng\\features\\means_all.csv', sep=';', index_col=0)

new_data = df.copy()
columns = df.columns
index = df.index.values

info = list()
info = [ind.split('_') for ind in index]

var = [inf[0][4:] for inf in info]
sem = [int(inf[1][-1]) for inf in info]
num = [int(inf[2].split('lado')[0][3:]) for inf in info]
lado= [int(inf[2][-1]) for inf in info]

var = pd.Series(var, index)
sem = pd.Series(sem, index)
num = pd.Series(num, index)
lado = pd.Series(lado, index)

new_data['var'] = var
new_data['sem'] = sem
new_data['num'] = num
new_data['lado'] = lado

# new_data = new_data.loc[new_data['lado']==1]
new_data = new_data.groupby(['var', 'sem', 'num']).sum()
new_data = new_data.reset_index().drop(columns=['var', 'sem', 'num', 'lado'])
# new_data = new_data.drop(columns=['var', 'sem', 'num', 'lado'])

new_index = [ind.split('lado')[0] for ind in index]
new_index = new_index[::2]

new_data['ind'] = new_index
new_data.set_index('ind', inplace=True)

file_path = path+'..\\features\\' + 'means' + '_all_half_soma.csv'
new_data.to_csv(file_path, sep=';')

mng.features.add_target(file_path, palmer_sst, tommy_sst)

In [452]:
file_path = 'C:\\Users\\juju-\\Desktop\\projetos\\mng\\features\\means_all_half_soma.csv'
mng.folds = MNGFolds(path, file_path, 7)
mng.folds.separate_folds()

In [455]:
model_type = 'tommy'
mng.model = MNGModel(path, mng.folds, model_type, 'sst')
mng.model.build_mlr_model(model_type, features_mlr)
mng.model.build_rf_model(model_type)

In [469]:
df = pd.read_csv('C:\\Users\\juju-\\Desktop\\projetos\\mng\\features\\tommy\\tommy_all.csv', sep=';', index_col=0)
X = df[features_mlr]
Y = df['sst']
mlr_model = LinearRegression().fit(X, Y)
scores = cross_val_score(mlr_model, X, Y, cv=6, scoring='r2')
scores

array([-0.1842658 , -0.05137528, -0.1888806 ,  0.02267984, -0.69343669,
       -0.21317699])