In [None]:
import pandas as pd
import numpy as np
from sklearn import linear_model
from sklearn import metrics
import statsmodels.api as sm
from sklearn.model_selection import train_test_split
import math 
from sklearn.metrics import hamming_loss
from sklearn.metrics import accuracy_score
from sklearn.datasets import make_multilabel_classification
from skmultilearn.problem_transform import BinaryRelevance
from sklearn.naive_bayes import GaussianNB
from skmultilearn.problem_transform import ClassifierChain
from skmultilearn.problem_transform import LabelPowerset
from skmultilearn.adapt import MLkNN
from sklearn.datasets import fetch_openml
from sklearn.linear_model import LogisticRegression
from keras.datasets import boston_housing
from numpy.random import seed
from sklearn.model_selection import train_test_split
import tensorflow
from sklearn.preprocessing import StandardScaler
from keras import optimizers
from keras import initializers
from keras import regularizers

seed(1907)
tensorflow.random.set_seed(2)


from skmultilearn.dataset import load_dataset_dump
#from skmultilearn.problem_transformation import BinaryRelevance
#from skmultilearn.datasets import load_dataset


#X, y = load_dataset('yeast')
#X_train, X_test, y_train, y_test = train_test_split(X, y)
#X, y, feature_names, label_names = load_dataset_dump('yeast')

X, Y = fetch_openml('yeast', version=4, return_X_y=True)
Y = Y == 'TRUE'
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=.2, random_state=0)

# this will generate a random multi-label dataset
#X, y = make_multilabel_classification(sparse = True, n_labels = 20, return_indicator = 'sparse', allow_unlabeled = False)

#sparse: If True, returns a sparse matrix, where sparse matrix means a matrix having a large number of zero elements.
#n_labels:  The average number of labels for each instance.
#return_indicator: If ‘sparse’ return Y in the sparse binary indicator format.
#allow_unlabeled: If True, some instances might not belong to any class.


In [None]:
# using binary relevance
# initialize binary relevance multi-label classifier
# with a gaussian naive bayes base classifier
classifier = BinaryRelevance(GaussianNB())

# train
classifier.fit(X_train, y_train)

# predict
predictions = classifier.predict(X_test)

print("hamming loss: ")
print(hamming_loss(y_test, predictions))

print("accuracy:")
print(accuracy_score(y_test, predictions))

In [None]:
# using classifier chains
# initialize classifier chains multi-label classifier
# with a gaussian naive bayes base classifier
classifier = ClassifierChain(GaussianNB())

# train
classifier.fit(X_train, y_train)

# predict
predictions = classifier.predict(X_test)

print("hamming loss: ")
print(hamming_loss(y_test, predictions))

print("accuracy:")
print(accuracy_score(y_test, predictions))

In [None]:
# using Label Powerset
# initialize Label Powerset multi-label classifier
# with a gaussian naive bayes base classifier
classifier = LabelPowerset(GaussianNB())

# train
classifier.fit(X_train, y_train)

# predict
predictions = classifier.predict(X_test)

print("hamming loss: ")
print(hamming_loss(y_test, predictions))

print("accuracy:")
print(accuracy_score(y_test, predictions))

In [None]:
classifier = MLkNN(k=20)

# train
classifier.fit(X_train, y_train)

# predict
predictions = classifier.predict(X_test)

print("hamming loss: ")
print(hamming_loss(y_test, predictions))

print("accuracy:")
print(accuracy_score(y_test, predictions))

In [None]:
classifier = BinaryRelevance(LogisticRegression())
classifier.fit(X_train, y_train)
predictions = classifier.predict(X_test)


print("hamming loss: ")
print(hamming_loss(y_test, predictions))

print("accuracy:")
print(accuracy_score(y_test, predictions))


In [None]:
# using Label Powerset
# initialize Label Powerset multi-label classifier
# with a gaussian naive bayes base classifier
classifier = LabelPowerset(LogisticRegression())

# train
classifier.fit(X_train, y_train)

# predict
predictions = classifier.predict(X_test)

print("hamming loss: ")
print(hamming_loss(y_test, predictions))

print("accuracy:")
print(accuracy_score(y_test, predictions))

In [None]:
X_train.shape

In [None]:
y_train.shape

In [None]:
from keras import models
from keras import layers

model = models.Sequential()
model.add(layers.Dense(100, input_shape=(X_train.shape[1],), kernel_initializer='he_uniform', activation='relu'))
model.add(layers.Dense(50, activation='relu'))
model.add(layers.Dense(14, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.2)

preds = model.predict(X_test)
preds[preds>=0.5] = 1
preds[preds<0.5] = 0
# score = compare preds and y_test


In [None]:
print("hamming loss: ")
print(hamming_loss(y_test, preds))

print("accuracy:")
print(accuracy_score(y_test, preds))

In [None]:
model = models.Sequential()
model.add(layers.Dense(100, input_shape=(X_train.shape[1],), activation='relu'))
model.add(layers.Dropout(0.2))
model.add(layers.Dense(50, activation='relu'))
model.add(layers.Dropout(0.2))
model.add(layers.Dense(14, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.2)

preds = model.predict(X_test)
preds[preds>=0.5] = 1
preds[preds<0.5] = 0
# score = compare preds and y_test




In [None]:
print("hamming loss: ")
print(hamming_loss(y_test, preds))

print("accuracy:")
print(accuracy_score(y_test, preds))

In [None]:
# created scaler
scaler = StandardScaler()
# fit scaler on training dataset
scaler.fit(X_train)
# transform training dataset
X_train = scaler.transform(X_train)
# transform test dataset
X_test = scaler.transform(X_test)


In [None]:
model = models.Sequential()
model.add(layers.Dense(200, input_shape=(X_train.shape[1],), activation='relu'))
model.add(layers.Dropout(0.3))
model.add(layers.Dense(100, activation='relu'))
model.add(layers.Dropout(0.3))
model.add(layers.Dense(14, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.2)

preds = model.predict(X_test)
preds[preds>=0.5] = 1
preds[preds<0.5] = 0
# score = compare preds and y_test

print("hamming loss: ")
print(hamming_loss(y_test, preds))

print("accuracy:")
print(accuracy_score(y_test, preds))


In [None]:
model = models.Sequential()
model.add(layers.Dense(100, activation='relu', kernel_regularizer=regularizers.l2(0.001), input_shape=(X_train.shape[1],)))
model.add(layers.Dropout(0.1))
model.add(layers.Dense(50, kernel_regularizer=regularizers.l2(0.001), activation='relu'))
model.add(layers.Dropout(0.1))
model.add(layers.Dense(14, activation='sigmoid'))

opt = optimizers.Adam(learning_rate=0.01, beta_1=0.9, beta_2=0.999, epsilon=1e-07)
model.compile(optimizer=opt,
loss='binary_crossentropy',
metrics=['accuracy'])

model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

preds = model.predict(X_test)
preds[preds>=0.5] = 1
preds[preds<0.5] = 0
# score = compare preds and y_test

print("hamming loss: ")
print(hamming_loss(y_test, preds))

print("accuracy:")
print(accuracy_score(y_test, preds))
