In [61]:
import pandas as pd
import cv2 as cv
import numpy as np
import glob 
import sys
import matplotlib.pyplot as plt
import os

from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.gaussian_process.kernels import RBF
from sklearn.gaussian_process import GaussianProcessClassifier

from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, cross_validate

def _get_lib_versions():
    import sklearn
    print(sklearn.__version__)

_get_lib_versions()

import pprint
pp = pprint.PrettyPrinter(indent=4)

0.19.2


In [28]:
!ls

ClassificationTry.ipynb  radial_compression_classifier.ipynb
LightGBM.ipynb		 refleX - preprocessing pipeline.ipynb
playground


In [32]:
#@title Classifier Parameters

# result_plot_dir = "/home/reflex/reflex/result_plots/" #@param {type:"string"}
joint_csv_path = '/home/reflex/refleX/metadata/joint_file.csv' #@param {type:"string"}
no_estimators = 70 #@param {type:"raw"}
no_cv_folds = 10 #@param {type:"raw"}
no_jobs = 8 #@param {type:"raw"}
no_neighbours = 3 #@param {type:"raw"}


## Global variables and constatnts

In [93]:
statistics = ["min" , "max", "var", "median", "mean","95th_percentile","5th_percentile"]
scoring_types = ['recall_macro','accuracy','precision','recall','f1','roc_auc']
class_names = ["Loop scattering","Background ring","Strong background","Diffuse scattering","Artifact","Ice ring","Non-uniform detector"]
classifier_names = ["RFC","DTC","KNN","GaussianNB","QuadraticDisciminantAnalysis"]

In [36]:
def get_classifier_filename(classifier_name, class_name):
  
    if classifier_name not in classifier_names:
        raise Exception("Invalid classifier name!")
  
    return dict(zip(classifier_names, [
      result_plot_dir + "RFClf-cv"+str(no_cv_folds)+"-n_est"+str(no_estimators)+"-"+class_name+".jpg",
      result_plot_dir + "DTClf-cv"+str(no_cv_folds)+class_name+".jpg",
      result_plot_dir + "KNNClf-cv"+str(no_cv_folds)+"-neighbours-"+str(no_neighbours)+"-"+class_name+".jpg",
      result_plot_dir + "GaussClf-cv"+str(no_cv_folds)+"-"+class_name+".jpg",
      result_plot_dir + "QDAClf-cv"+str(no_cv_folds)+"-"+class_name+".jpg",
      result_plot_dir + "GaussPrc-cv"+str(no_cv_folds)+"-"+class_name+".jpg",
      result_plot_dir + "SV-cv"+str(no_cv_folds)+"-"+class_name+".jpg" 
    ]))[classifier_name]
    

'''
Constructs an appropriate classifier object for the given classifier name.
'''
def get_classifier(classifier_name):
  
    if classifier_name not in classifier_names:
        raise Exception("Invalid classifier name!")
    
    classifier_objects = [
        RandomForestClassifier(random_state=23, n_estimators=no_estimators, n_jobs=no_jobs),
        DecisionTreeClassifier(random_state=10),
        KNeighborsClassifier(n_neighbors=no_neighbours, n_jobs=no_jobs),
        GaussianNB(),
        QuadraticDiscriminantAnalysis(),
        GaussianProcessClassifier(1.0 * RBF(1.0)),
        SVC(gamma=2, C=1, probability="True")
    ]
        
    return dict(zip(classifier_names, classifier_objects))[classifier_name]

In [11]:
def create_joint_vector(image, directory, statistics=None, all_statistics=False, sort=False, 
                        sort_function=lambda x: x, vector_length=None, 
                        fill_value=None, image_as_index=True, predefined_file_list=None, 
                        remove_suffix=False):

    """
    Tworzy dataframe z wybranego zdjecia w postaci <nazwa_zdjecia> <wektor_zlaczonych_statystyk>

    Params:
    --------------
    image                 - nazwa pliku ze zdjeciem (z suffixem (.SSSxSSS.png))
    directory             - katalog z katalogami ze wszystkimi statystykami
    statistics            - lista statystyk branych pod uwage
    all_statistics        - jezeli True to poprzedni parametr jest ignorowany i pod uwage bierzemy
                            wszystkie statystyki we wskazanym folderze
    sort                  - jezeli True to nazwy statystyk sortowane sa wedlug podanej funkcji
                            sortujacej (domyslnie leksykograficznie)
    sort_function         - funkcja (key) sortujaca statystyki (domyslnie leksykograficznie)
    vector_length         - parametr okreslajacy pozadana dlugosc wektora. W przypadku 
                            nadmiaru jest przycinany, w przeciwnym razie jest wypelniniany
                            kolejnym paremetrem. Niezdefiniowany (None) nie modyfikuje wektora.
    fill_value            - wartosc, ktora wypelniany bedzie wektor, jezeli będzie za krótki
                            w przypadku zdefiniowania dlugości
    image_as_index        - ustawie nazwe pliku jako index DataFrame'u
    remove_suffix         - usuwa suffix (.SSSxSSS.png) z nazwy pliku


    Returns:
    --------------
    None                  - w przpadku bledu (brak zdef. statystyk, zly katalog, zla nazwa pliku)
    Dataframe             - kiedy wszystko poszlo zgodnie z zalozeniami
                    
    """
  
    # Normalizacja sciezki do foldery
    if directory[-1] != '/':
        directory += '/'
    
    # Sprawdzamy czy mamy jakiekolwiek statystyki do złączenia
    statistic_names = list(os.listdir(directory)) if all_statistics else statistics 

    if statistic_names and sort:
        statistic_names.sort(key=sort_function)

    values = []

    # Laczenie wektora
    for stat_name in statistic_names:

        current_vector = cv.imread(f'{directory}{stat_name}/{image}', cv.IMREAD_GRAYSCALE).flatten().tolist()
      
        # Dostosowywanie dlugosci wektora
        if vector_length:
            current_vector = current_vector[:vector_length]
            current_vector.extend([fill_value] * max(0, vector_length - len(current_vector)))
      
        values.append(current_vector)

    # Tworzenie dataframe
    if remove_suffix:
        image = image[:-12]
      
    data = []
    column_names = ['img']
    for idx, stat_vec in enumerate(values):
        column_names += [f'{statistic_names[idx]}_{i}' for i in range(len(stat_vec))]
        data += stat_vec
      
    df = pd.DataFrame([[image, *data]], columns=column_names)

    # Ustawianie nowego indexu
    if image_as_index:
        df.set_index('img', inplace=True)

    return df
      
    return None


'''
Constructs a DataFrame from all vector files in the given directory.
'''
def vector_folder_to_df(directory, limit=None):
  
    files = list(os.listdir(directory + '/' + list(os.listdir(directory))[0]))
    all_dfs = []

    for idx, image in enumerate(files[:limit]): 
        all_dfs.append(create_joint_vector(image, directory, all_statistics=True, sort=True, vector_length=240, remove_suffix=True, image_as_index=True))
    
    return pd.concat(all_dfs)


def extract_filename(path):
    path, filename = os.path.split(path)
    return '.'.join(filename.split('.')[:-1])

  
def construct_joint_csv(output_filepath):
    df1 = pd.read_csv('/home/reflex/reflex/data/results_constant_vector_length/vectors/vectors.csv')
    df1.set_index('img', inplace=True, drop=True)

    df2 = pd.read_csv('/home/reflex/reflex/reflex.csv')
    df2['Image'] = df2['Image'].apply(lambda x: extract_filename(x))
    df2.set_index('Image', inplace=True, drop=True)

    joint = pd.concat([df1, df2], axis=1, join='inner')
    joint.to_csv(output_filepath)
    return joint

In [53]:
def classify(X, y, classifier_name, scoring):    
  
    #X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=.1, random_state=42)
    #print(f'Training set size: {len(X_train)} & Test set size: {len(X_test)}')
    ### cv - cross-validation generator - default KFold(n_splits, shuffle, random state) splits into K folds 
    ### it trains on K-1, test on 'untouched' 1 part
    
    clf = get_classifier(classifier_name)
    return cross_validate(clf, X, y, cv=no_cv_folds, scoring=scoring)
    #return cross_val_score(clf, X, y, cv=no_cv_folds, scoring=scoring)

In [54]:
def plot(data, classifier_name, class_name, scoring_names):    
    
    f, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(12,12))
    ax1.set_title(f'{class_name}-{classifier_name}-{no_cv_folds}folds')
    
    x = []
    for i in range(len(data)):
        for j in range(len(data[0])):
            x.append(i+1)
    y = np.array(data).flatten()

    color = np.tile(np.arange(len(data[0])), len(scoring_names))
    scatter = ax1.scatter(x, y, s=20, c=color)
    cbar = plt.colorbar(scatter, ax=[ax1, ax2])
    cbar.set_ticks(np.arange(len(data[0])))
    
    ax2.boxplot(data, 0, 'gD', 1)
    
    ax2.set_xticklabels(scoring_names, rotation=45)
        
    plt.show()
    f.savefig(get_classifier_filename(classifier_name, class_name))

In [55]:
def initDF(class_name):
        
    joined = pd.read_csv(joint_csv_path)
    print("Dataset size: ", len(joined))
    
    y = joined.loc[:, class_name].values
    X = joined.as_matrix(columns=joined.columns[1:-7])
    
    return X, y  

In [56]:
def initDF_augmented(class_name):
        
    joined = pd.read_csv(joint_csv_path)
    print("Dataset size: ", len(joined))
    
    
    #for index, row in joined.iterrows():
    #    print(index)
    #    print(row)
    
    y = joined.loc[:, class_name].values
    X = joined.as_matrix(columns=joined.columns[1:-7])
    
    return X, y  

In [94]:
def main():
    
    for class_name in class_names:  # iterate over classes (Strong Bg, Loop Scattering, ... ) 
        print(class_name)
        res_df = pd.DataFrame(columns=['clf', 'fold', 'fit_time', 'score_time']+scoring_types)
        
        for classifier_name in classifier_names: # iterate over classifiers (RFC,DTC ...)
            print(classifier_name, end=' ')
            
            X, y = initDF_augmented(class_name)
            
            scores = classify(X, y, classifier_name, scoring_types)
            
            # pp.pprint(scores)
                        
            for i in range(no_cv_folds):
                data = {'clf': classifier_name}
                data['fold'] = i
                for score_name in scoring_types:
                    data[score_name] = scores[f'test_{score_name}'][i]
                data['fit_time'] = scores['fit_time'][i]
                data['score_time'] = scores['score_time'][i]
                
                res_df = res_df.append(pd.DataFrame(data, index=[0]), ignore_index = True)
            
            #plot(scores, classifier_name, class_name, scoring_types) # plot all stats on specific: CLF , FEATURE , SCORING_TYPE
        print(res_df)
        res_df.to_csv(f'7_classifiers/{class_name}.csv')

In [95]:
#construct_joint_csv(joint_csv_path)
main()

Loop scattering
RFC Dataset size:  2174


  if sys.path[0] == '':
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


DTC Dataset size:  2174


  if sys.path[0] == '':
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


KNN Dataset size:  2174


  if sys.path[0] == '':
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


GaussianNB Dataset size:  2174


  if sys.path[0] == '':
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


QuadraticDisciminantAnalysis Dataset size:  2174


  if sys.path[0] == '':
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


    accuracy                           clf        f1  fit_time fold  \
0   0.720183                           RFC  0.651429  0.801221    0   
1   0.752294                           RFC  0.712766  0.898984    1   
2   0.692661                           RFC  0.652850  0.902515    2   
3   0.743119                           RFC  0.714286  0.899875    3   
4   0.723502                           RFC  0.700000  0.902087    4   
5   0.714286                           RFC  0.643678  0.901118    5   
6   0.741935                           RFC  0.698925  0.902397    6   
7   0.751152                           RFC  0.715789  0.898207    7   
8   0.705069                           RFC  0.659574  0.897225    8   
9   0.755760                           RFC  0.703911  0.899056    9   
10  0.605505                           DTC  0.542553  1.989606    0   
11  0.596330                           DTC  0.526882  2.016367    1   
12  0.651376                           DTC  0.612245  1.833519    2   
13  0.

  if sys.path[0] == '':
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


DTC Dataset size:  2174


  if sys.path[0] == '':
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


KNN Dataset size:  2174


  if sys.path[0] == '':
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


GaussianNB Dataset size:  2174


  if sys.path[0] == '':
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


QuadraticDisciminantAnalysis Dataset size:  2174


  if sys.path[0] == '':
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


    accuracy                           clf        f1  fit_time fold  \
0   0.766055                           RFC  0.822300  0.894299    0   
1   0.793578                           RFC  0.835165  0.896277    1   
2   0.775229                           RFC  0.824373  0.896245    2   
3   0.766055                           RFC  0.817204  0.902305    3   
4   0.798165                           RFC  0.841727  0.896567    4   
5   0.738532                           RFC  0.794224  0.897553    5   
6   0.783410                           RFC  0.831541  0.898336    6   
7   0.792627                           RFC  0.839858  0.895787    7   
8   0.787037                           RFC  0.830882  0.895872    8   
9   0.754630                           RFC  0.815331  0.896379    9   
10  0.600917                           DTC  0.666667  2.049133    0   
11  0.706422                           DTC  0.764706  1.962331    1   
12  0.688073                           DTC  0.738462  1.875376    2   
13  0.

  if sys.path[0] == '':
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


DTC Dataset size:  2174


  if sys.path[0] == '':
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


KNN Dataset size:  2174


  if sys.path[0] == '':
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


GaussianNB Dataset size:  2174


  if sys.path[0] == '':
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


QuadraticDisciminantAnalysis Dataset size:  2174


  if sys.path[0] == '':
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


    accuracy                           clf        f1  fit_time fold  \
0   0.917431                           RFC  0.924370  0.694134    0   
1   0.922018                           RFC  0.931727  0.705563    1   
2   0.917431                           RFC  0.925000  0.698032    2   
3   0.903670                           RFC  0.912134  0.696244    3   
4   0.940092                           RFC  0.947368  0.696987    4   
5   0.898618                           RFC  0.909091  0.697438    5   
6   0.930876                           RFC  0.937238  0.695353    6   
7   0.921659                           RFC  0.930041  0.695297    7   
8   0.921659                           RFC  0.929461  0.697185    8   
9   0.930876                           RFC  0.937238  0.693503    9   
10  0.857798                           DTC  0.865801  1.629922    0   
11  0.912844                           DTC  0.922449  1.520786    1   
12  0.862385                           DTC  0.876033  1.749520    2   
13  0.

  if sys.path[0] == '':
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


DTC Dataset size:  2174


  if sys.path[0] == '':
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


KNN Dataset size:  2174


  if sys.path[0] == '':
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


GaussianNB Dataset size:  2174


  if sys.path[0] == '':
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


QuadraticDisciminantAnalysis Dataset size:  2174


  if sys.path[0] == '':
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
of pandas will change to not sort by

    accuracy                           clf        f1  fit_time fold  \
0   0.862385                           RFC  0.062500  0.598191    0   
1   0.871560                           RFC  0.176471  0.605976    1   
2   0.880734                           RFC  0.187500  0.700113    2   
3   0.862385                           RFC  0.062500  0.902716    3   
4   0.876147                           RFC  0.068966  0.922722    4   
5   0.880734                           RFC  0.187500  0.907538    5   
6   0.875576                           RFC  0.068966  0.906624    6   
7   0.870968                           RFC  0.125000  0.906070    7   
8   0.875000                           RFC  0.129032  0.999502    8   
9   0.870370                           RFC  0.000000  0.997019    9   
10  0.788991                           DTC  0.206897  1.862347    0   
11  0.788991                           DTC  0.281250  1.679394    1   
12  0.816514                           DTC  0.259259  1.694432    2   
13  0.

  if sys.path[0] == '':
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


DTC Dataset size:  2174


  if sys.path[0] == '':
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


KNN Dataset size:  2174


  if sys.path[0] == '':
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


GaussianNB Dataset size:  2174


  if sys.path[0] == '':
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


QuadraticDisciminantAnalysis Dataset size:  2174


  if sys.path[0] == '':
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
of pandas will change to not sort by

    accuracy                           clf        f1  fit_time fold  \
0   0.917431                           RFC  0.500000  0.897896    0   
1   0.940367                           RFC  0.682927  0.898723    1   
2   0.940367                           RFC  0.628571  0.901585    2   
3   0.912844                           RFC  0.486486  1.022873    3   
4   0.926606                           RFC  0.555556  0.799697    4   
5   0.931193                           RFC  0.516129  0.904008    5   
6   0.931193                           RFC  0.594595  0.901278    6   
7   0.939815                           RFC  0.606061  0.899650    7   
8   0.976852                           RFC  0.864865  0.897651    8   
9   0.921296                           RFC  0.451613  0.907111    9   
10  0.908257                           DTC  0.523810  2.454944    0   
11  0.889908                           DTC  0.520000  1.894546    1   
12  0.866972                           DTC  0.431373  2.472071    2   
13  0.

  if sys.path[0] == '':
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


DTC Dataset size:  2174


  if sys.path[0] == '':
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


KNN Dataset size:  2174


  if sys.path[0] == '':
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


GaussianNB Dataset size:  2174


  if sys.path[0] == '':
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


QuadraticDisciminantAnalysis Dataset size:  2174


  if sys.path[0] == '':
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
of pandas will change to not sort by

    accuracy                           clf        f1  fit_time fold  \
0   0.917808                           RFC  0.357143  1.398768    0   
1   0.917431                           RFC  0.307692  1.397421    1   
2   0.922018                           RFC  0.320000  1.498935    2   
3   0.926267                           RFC  0.384615  1.714529    3   
4   0.907834                           RFC  0.166667  1.514754    4   
5   0.907834                           RFC  0.285714  1.498077    5   
6   0.912442                           RFC  0.240000  1.499649    6   
7   0.903226                           RFC  0.086957  1.501676    7   
8   0.912442                           RFC  0.344828  1.398245    8   
9   0.926267                           RFC  0.428571  1.499419    9   
10  0.881279                           DTC  0.458333  4.250922    0   
11  0.885321                           DTC  0.444444  4.351990    1   
12  0.880734                           DTC  0.315789  3.871010    2   
13  0.

  if sys.path[0] == '':
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


DTC Dataset size:  2174


  if sys.path[0] == '':
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


KNN Dataset size:  2174


  if sys.path[0] == '':
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


GaussianNB Dataset size:  2174


  if sys.path[0] == '':
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


QuadraticDisciminantAnalysis Dataset size:  2174


  if sys.path[0] == '':


    accuracy                           clf        f1  fit_time fold  \
0   0.839450                           RFC  0.186047  0.896713    0   
1   0.839450                           RFC  0.102564  0.904699    1   
2   0.830275                           RFC  0.051282  1.005275    2   
3   0.848624                           RFC  0.195122  0.904606    3   
4   0.838710                           RFC  0.102564  0.902541    4   
5   0.857143                           RFC  0.243902  0.897909    5   
6   0.847926                           RFC  0.153846  0.898440    6   
7   0.834101                           RFC  0.142857  0.908759    7   
8   0.843318                           RFC  0.105263  0.899081    8   
9   0.857143                           RFC  0.205128  1.006105    9   
10  0.729358                           DTC  0.191781  1.938946    0   
11  0.756881                           DTC  0.208955  1.749563    1   
12  0.743119                           DTC  0.317073  2.056078    2   
13  0.

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)
