In [None]:
#!pip install --upgrade pip setuptools wheel
#!python -m pip install opencv-python
#!pip install matplotlib
#!pip freeze
#!pip install opencv-python-headless
#!pip install scikit-image
#!pip install -U scikit-fuzzy
#!pip install pandas
#!pip install scikit-learn
#!pip install tqdm==4.40.0

from matplotlib import pyplot as plt
import cv2
import os
import random
import pandas as pd
import numpy as np
from os.path import join
from skimage import measure
from skimage.feature import blob_dog, blob_log, blob_doh
import skfuzzy as fuzz
import library

### GENERATING FEATURES

In [None]:
import library
import texture_features
import color_features
import glcm_features
import hair_removal
import pandas as pd
import numpy as np
import cv2
import os

flag = True

output = os.path.join('/home',
                     'emily',
                     'Desktop',
                     'CAD',
                     'MelanomaChallenge',
                     'features',
                     'featuresCh1E.csv')



samples, flag = library.get_sample("/home/emily/Desktop/CAD/challenge2/train", output, flag)
print(len(samples))

dictF = {}

features = pd.DataFrame()
count = 0

for sample in samples:
    #print('count ', count)
    count += 1
    
    # read image
    img = cv2.imread(sample)
    
    # clahe preprocessing
    #clahe = library.clahe_rgb(img, 8)
    
    # gray world. color balanced
    #grey_world = library.grey_world(clahe)
    
    # hair removal
    clahe = clahe.astype("uint8")
    hairless = hair_removal.hair_remove(img, 17, 4)
    
    # save name
    dictF['name'] = sample
    # label
    if 'bcc' in sample:
        dictF['label'] = 1
    elif 'mel' in sample:
        dictF['label'] = 0
    elif 'scc' in sample:
        dictF['label'] = 2
    
    # color features
    colors = color_features.extract_color_features(hairless)
    dictF.update(colors)
    
    # glcm features
    angles = [0, np.pi/4, np.pi/2, 3*np.pi/4]
    distances = [1]
    colorspaces = ['rgb', 'hsv', 'lab', 'ycc', 'gray']
    
    for cs in colorspaces:
        glcm = glcm_features.get_glcm(hairless, angles, distances, cs)
        dictF.update(glcm)
    
    
    # lbp features
    lbp = texture_features.extract_lbp(hairless, 1, 8)
    dictF.update(lbp)
    
    # orb features
    #hairless = cv2.cvtColor(hairless, cv2.COLOR_BGR2RGB)
    #hairless = np.uint16(hairless)
    #orb = texture_features.extract_orb(hairless, 64)
    #dictF.update(orb)
    
    features = features.append(dictF, ignore_index=True)
    
    # save features
    library.writeFeatures(features,
                  flag,
                  os.path.join('/home',
                             'emily',
                             'Desktop',
                             'CAD',
                             'MelanomaChallenge',
                             'features'),
                             'featuresCh2TrainB.csv')
    
    
    flag = False
    features = pd.DataFrame()
    dictF.clear()


# MACHINE LEARNING

In [None]:
import os
import library
import pandas as pd
from sklearn import tree
from sklearn.pipeline import Pipeline

from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import RFE
from sklearn.feature_selection import SelectFromModel
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.feature_selection import SequentialFeatureSelector
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.feature_selection import SelectKBest, f_classif, chi2, mutual_info_classif    

#
classifiers = ["rf", "tree", "svm", "adaboost", "gradboost", "histgradboost", "knn", "lda"]

train = pd.read_csv(os.path.join('/home','emily','Desktop','CAD','MelanomaChallenge','features','features_train_bh_3000.csv'))
test = pd.read_csv(os.path.join('/home','emily','Desktop','CAD','MelanomaChallenge','features','features_test_bh_3000.csv'))

y = train['label']
X = train.drop(['label'], axis=1)
X = X.drop(['name'], axis=1)


y_test = test['label']
X_test = test.drop(['label'], axis=1)
X_test = X_test.drop(['name'], axis=1)

X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, test_size=0.2, random_state=1)

# preprocessing options
#('selectFromModel', SelectFromModel(RandomForestClassifier(random_state=42, n_jobs = -1)))
#('selector rfe', RFE(RandomForestClassifier(random_state=42, n_jobs = -1))),
#('reduce_dims', PCA(n_components=150)),
#('mutual_info_classif, SelectKBest(mutual_info_classif, k=100)),

for classifier in classifiers:
    
    # preprocessing steps
    pipe = [('scale', StandardScaler()),
            ('selector rfe', RFE(RandomForestClassifier(random_state=42, n_jobs = -1)))
           ]

    
    if classifier == "svm":
        clf, best_params = library.SVC_linear(X_val, y_val, cv=2)
        clf.set_params(**best_params)
        print("### SVM ###")
    
    elif classifier == "rf":
        clf, best_params = library.RandomForest(X_val, y_val, cv=2)
        clf.set_params(**best_params)
        print("### RF ###")
    
    elif classifier == "tree":
        clf = tree.DecisionTreeClassifier()
        print("### TREE ###")
    
    elif classifier == "adaboost":
        clf, best_params = library.AdaBoost(X_val, y_val)
        clf.set_params(**best_params)
        print("### ADABOOST ###")
    
    elif classifier == "gradboost":
        clf, best_params = library.GradientBoosting(X_val, y_val)
        clf.set_params(**best_params)
        print("### GRADBOOST ###")
    
    elif classifier == "knn":
        clf, best_params = library.knn(X_val, y_val)
        clf.set_params(**best_params)
        print("### KNN ###")
        
    elif classifier == "histgradboost":
        clf = HistGradientBoostingClassifier()
        print("### HISTGRADBOOST ###")
        
    elif classifier == "lda":
        clf = LinearDiscriminantAnalysis()
        print("### LDA ###")        
        
    # add classifier 
    pipe.append(tuple(('clf', clf)))
    
    steps = Pipeline(pipe)
    
    # pipeline shape
    print("current pipeline")
    print(steps)
    
    library.fit_report(steps, X, y, X_test, y_test)
    