In [117]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import f1_score
from sklearn.naive_bayes import GaussianNB
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import RandomOverSampler
from sklearn.feature_selection import r_regression, RFE, SelectFromModel, SelectKBest
from sklearn.linear_model import LassoCV
from sklearn.svm import SVR

Ladownie danych 

In [118]:
data = np.loadtxt("eeg.csv", dtype=float, delimiter=',')
features = data[:, :-1]
classes = data[:,-1]

Zadaeklarowanie klasyfikatorow, metod balansowania oraz selekcji cech

In [119]:
classifiers = {
    'KNN': KNeighborsClassifier(),
    'DecisionTree': DecisionTreeClassifier(),
    'Beyesian': GaussianNB()    
}

In [121]:
balancing_methods = {
    'Undersampling': RandomUnderSampler(),
    'SMOTE': SMOTE(),
    'Oversampling': RandomOverSampler()
}

Sprawdzenie metod balansu danych dla poszczególnych klasyfikatorów

In [122]:
results = pd.DataFrame(columns=['classifier', 'balance_method', 'accuracy'])

In [123]:
for balancing_name, balancing_method in balancing_methods.items():

    x_resampled, y_resampled = balancing_method.fit_resample(features, classes)
    x_train, x_test, y_train, y_test = train_test_split(x_resampled, y_resampled, test_size=0.1, random_state=50, stratify=y_resampled)
    y_train_weight = np.where(y_train == 1, 0.13, 1 )

    scaler = StandardScaler()
    x_train = scaler.fit_transform(x_train)
    x_test = scaler.fit_transform(x_test)
    
    for classifier_name, classifier in classifiers.items():
        classifier.fit(x_train, y_train)

        y_pred  = classifier.predict(x_test)

        accuracy = f1_score(y_test, y_pred)
        results = results.append({'classifier': classifier_name, 'balance_method': balancing_name, 'accuracy': accuracy}, ignore_index=True)

results

  results = results.append({'classifier': classifier_name, 'balance_method': balancing_name, 'accuracy': accuracy}, ignore_index=True)
  results = results.append({'classifier': classifier_name, 'balance_method': balancing_name, 'accuracy': accuracy}, ignore_index=True)
  results = results.append({'classifier': classifier_name, 'balance_method': balancing_name, 'accuracy': accuracy}, ignore_index=True)
  results = results.append({'classifier': classifier_name, 'balance_method': balancing_name, 'accuracy': accuracy}, ignore_index=True)
  results = results.append({'classifier': classifier_name, 'balance_method': balancing_name, 'accuracy': accuracy}, ignore_index=True)
  results = results.append({'classifier': classifier_name, 'balance_method': balancing_name, 'accuracy': accuracy}, ignore_index=True)
  results = results.append({'classifier': classifier_name, 'balance_method': balancing_name, 'accuracy': accuracy}, ignore_index=True)
  results = results.append({'classifier': classifier_na

Unnamed: 0,classifier,balance_method,accuracy
0,KNN,Undersampling,0.898678
1,DecisionTree,Undersampling,0.690583
2,Beyesian,Undersampling,0.426966
3,KNN,SMOTE,0.83682
4,DecisionTree,SMOTE,0.599217
5,Beyesian,SMOTE,0.666667
6,KNN,Oversampling,0.827545
7,DecisionTree,Oversampling,0.628623
8,Beyesian,Oversampling,0.666667


In [124]:
max_indexes = results.groupby('classifier')['accuracy'].idxmax()
results_best_balance = results.loc[max_indexes]
results_best_balance

Unnamed: 0,classifier,balance_method,accuracy
5,Beyesian,SMOTE,0.666667
1,DecisionTree,Undersampling,0.690583
0,KNN,Undersampling,0.898678


In [125]:
results_with_selector = pd.DataFrame(columns= ['classifier', 'balance_method', 'accuracy', 'selector_method', 'k'])

Metoda selekcji cech: rekurencyjne usuwanie cech

In [126]:
estimator = SVR(kernel="linear")
selector_name = 'Recursive'
selector_method = lambda k: RFE(estimator= estimator, n_features_to_select=k)

for i, (classifier_name, balancing_name) in results_best_balance[['classifier', 'balance_method']].iterrows():
    classifier = classifiers[classifier_name]
    balancing_method = balancing_methods[balancing_name]

    x_resampled, y_resampled = balancing_method.fit_resample(features, classes)
    x_train, x_test, y_train, y_test = train_test_split(x_resampled, y_resampled, test_size=0.1, random_state=50, stratify=y_resampled)
    
    scaler = StandardScaler()
    x_train = scaler.fit_transform(x_train)
    x_test = scaler.fit_transform(x_test)

    for k in range(1, 15):
        selector = selector_method(k)
        selector.fit(x_train, y_train)
        
        x_selected = selector.transform(x_train)
        x_test_selected = selector.transform(x_test)
        
        classifier.fit(x_selected, y_train)

        y_pred  = classifier.predict(x_test_selected)

        accuracy = f1_score(y_test, y_pred)
        results_with_selector = results_with_selector.append({'classifier': classifier_name, 'balance_method': balancing_name, 'accuracy': accuracy, 'selector_method': selector_name, 'k': k}, ignore_index=True)

  results_with_selector = results_with_selector.append({'classifier': classifier_name, 'balance_method': balancing_name, 'accuracy': accuracy, 'selector_method': selector_name, 'k': k}, ignore_index=True)
  results_with_selector = results_with_selector.append({'classifier': classifier_name, 'balance_method': balancing_name, 'accuracy': accuracy, 'selector_method': selector_name, 'k': k}, ignore_index=True)
  results_with_selector = results_with_selector.append({'classifier': classifier_name, 'balance_method': balancing_name, 'accuracy': accuracy, 'selector_method': selector_name, 'k': k}, ignore_index=True)
  results_with_selector = results_with_selector.append({'classifier': classifier_name, 'balance_method': balancing_name, 'accuracy': accuracy, 'selector_method': selector_name, 'k': k}, ignore_index=True)
  results_with_selector = results_with_selector.append({'classifier': classifier_name, 'balance_method': balancing_name, 'accuracy': accuracy, 'selector_method': selector_name, 'k'

Metoda selekcji cech: LassoCV: SelectFromModel

In [128]:
selector_name = 'Lasso'
selector_method = lambda k: SelectFromModel(LassoCV(), max_features=k)

for i, (classifier_name, balancing_name) in results_best_balance[['classifier', 'balance_method']].iterrows():
    classifier = classifiers[classifier_name]
    balancing_method = balancing_methods[balancing_name]

    x_resampled, y_resampled = balancing_method.fit_resample(features, classes)
    x_train, x_test, y_train, y_test = train_test_split(x_resampled, y_resampled, test_size=0.1, random_state=50, stratify=y_resampled)
    
    scaler = StandardScaler()
    x_train = scaler.fit_transform(x_train)
    x_test = scaler.fit_transform(x_test)

    for k in range(1, 15):
        selector = selector_method(k)
        selector.fit(x_train, y_train)
        x_selected = selector.transform(x_train)
        x_test_selected = selector.transform(x_test)
        
        classifier.fit(x_selected, y_train)
        y_pred  = classifier.predict(x_test_selected)

        accuracy = f1_score(y_test, y_pred)
        results_with_selector = results_with_selector.append({'classifier': classifier_name, 'balance_method': balancing_name, 'accuracy': accuracy, 'selector_method': selector_name, 'k': k}, ignore_index=True)

  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descen

Metoda selekcji cech: korelacja liniowa Pearsona

In [129]:
selector_name = 'Pearson'
selector_method = lambda k: SelectKBest(r_regression, k=k)

for i, (classifier_name, balancing_name) in results_best_balance[['classifier', 'balance_method']].iterrows():
    classifier = classifiers[classifier_name]
    balancing_method = balancing_methods[balancing_name]

    x_resampled, y_resampled = balancing_method.fit_resample(features, classes)
    x_train, x_test, y_train, y_test = train_test_split(x_resampled, y_resampled, test_size=0.1, random_state=50, stratify=y_resampled)
    
    scaler = StandardScaler()
    x_train = scaler.fit_transform(x_train)
    x_test = scaler.fit_transform(x_test)

    for k in range(1, 15):
        selector = selector_method(k)
        selector.fit(x_train, y_train)
        x_selected = selector.transform(x_train)
        x_test_selected = selector.transform(x_test)
        
        classifier.fit(x_selected, y_train)
        y_pred  = classifier.predict(x_test_selected)

        accuracy = f1_score(y_test, y_pred)
        results_with_selector = results_with_selector.append({'classifier': classifier_name, 'balance_method': balancing_name, 'accuracy': accuracy, 'selector_method': selector_name, 'k': k}, ignore_index=True)

  results_with_selector = results_with_selector.append({'classifier': classifier_name, 'balance_method': balancing_name, 'accuracy': accuracy, 'selector_method': selector_name, 'k': k}, ignore_index=True)
  results_with_selector = results_with_selector.append({'classifier': classifier_name, 'balance_method': balancing_name, 'accuracy': accuracy, 'selector_method': selector_name, 'k': k}, ignore_index=True)
  results_with_selector = results_with_selector.append({'classifier': classifier_name, 'balance_method': balancing_name, 'accuracy': accuracy, 'selector_method': selector_name, 'k': k}, ignore_index=True)
  results_with_selector = results_with_selector.append({'classifier': classifier_name, 'balance_method': balancing_name, 'accuracy': accuracy, 'selector_method': selector_name, 'k': k}, ignore_index=True)
  results_with_selector = results_with_selector.append({'classifier': classifier_name, 'balance_method': balancing_name, 'accuracy': accuracy, 'selector_method': selector_name, 'k'

In [131]:
results_with_selector.to_csv('relusts.csv')

Stworzenie wkresu dokladnosci od liczby cech

In [None]:
results_with_selector = pd.read_csv('results.csv')