In [10]:
import os
import numpy as np
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

from scipy.ndimage import convolve
from sklearn import linear_model, datasets, metrics
from sklearn.model_selection import train_test_split
from sklearn.neural_network import BernoulliRBM
from sklearn.pipeline import Pipeline

from sklearn.cluster import KMeans
from sklearn import datasets


In [3]:
ccf_train_data = "./data/2018_4_21_22_38_train.csv"
ccf_test_data = "./data/2018_4_30_08_33_test.csv"


def load_data(filepath):
    from numpy import genfromtxt

    csv_data = genfromtxt(filepath, delimiter=",", skip_header=1)
    data = []
    labels = []

    for d in csv_data:
        data.append(d[:-1])
        labels.append(d[-1])

    return np.array(data), np.array(labels)



In [4]:
train_dataset, train_labels = load_data(ccf_train_data)
test_dataset, test_labels = load_data(ccf_test_data)

In [11]:
predicted_train_lables = KMeans(n_clusters=2, init='k-means++', n_init=10, max_iter=300,).fit_predict(train_dataset)
predicted_test_lables = KMeans(n_clusters=2, init='k-means++', n_init=10, max_iter=300,).fit_predict(test_dataset)

#traning Set labels --> K Mean Clustering

In [12]:
# Models we will use
logistic = linear_model.LogisticRegression()
rbm = BernoulliRBM(random_state=0, verbose=True)

classifier = Pipeline(steps=[('rbm', rbm), ('logistic', logistic)])

In [24]:
# #############################################################################
# Training

# Hyper-parameters. These were set by cross-validation,
# using a GridSearchCV. Here we are not performing cross-validation to
# save time.
rbm.learning_rate = 0.06
rbm.n_iter = 20
# More components tend to give better prediction performance, but larger
# fitting time
rbm.n_components = 100
logistic.C = 6000.0

# Training RBM-Logistic Pipeline
classifier.fit(train_dataset, predicted_train_lables)

# Training Logistic regression
logistic_classifier = linear_model.LogisticRegression(C = 100.0)
logistic_classifier.fit(test_dataset, predicted_test_lables)

[BernoulliRBM] Iteration 1, pseudo-likelihood = -112406.90, time = 0.47s
[BernoulliRBM] Iteration 2, pseudo-likelihood = -218526.11, time = 0.80s
[BernoulliRBM] Iteration 3, pseudo-likelihood = -324088.78, time = 0.82s
[BernoulliRBM] Iteration 4, pseudo-likelihood = -429334.12, time = 0.81s
[BernoulliRBM] Iteration 5, pseudo-likelihood = -534379.51, time = 0.82s
[BernoulliRBM] Iteration 6, pseudo-likelihood = -639324.31, time = 0.84s
[BernoulliRBM] Iteration 7, pseudo-likelihood = -744212.94, time = 0.84s
[BernoulliRBM] Iteration 8, pseudo-likelihood = -849061.35, time = 0.86s
[BernoulliRBM] Iteration 9, pseudo-likelihood = -953899.71, time = 0.85s
[BernoulliRBM] Iteration 10, pseudo-likelihood = -1058729.22, time = 0.77s
[BernoulliRBM] Iteration 11, pseudo-likelihood = -1163559.16, time = 0.85s
[BernoulliRBM] Iteration 12, pseudo-likelihood = -1268390.41, time = 0.79s
[BernoulliRBM] Iteration 13, pseudo-likelihood = -1373218.18, time = 0.87s
[BernoulliRBM] Iteration 14, pseudo-likelih

LogisticRegression(C=100.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [26]:
print()

print("Logistic regression using RBM features:\n%s\n" % (metrics.classification_report(test_labels, classifier.predict(test_dataset))))

print("Logistic regression using raw pixel features:\n%s\n" % (metrics.classification_report(train_labels, logistic_classifier.predict(train_dataset))))


Logistic regression using RBM features:
             precision    recall  f1-score   support

        0.0       0.49      1.00      0.66     49405
        1.0       0.00      0.00      0.00     50595

avg / total       0.24      0.49      0.33    100000


Logistic regression using raw pixel features:
             precision    recall  f1-score   support

        0.0       0.52      0.49      0.50     12141
        1.0       0.51      0.55      0.53     11999

avg / total       0.52      0.52      0.52     24140




  'precision', 'predicted', average, warn_for)
