In [1]:
#import ensemble model python file
from EnsembleModel import EnsembleModel
import numpy as np
import pandas as pd
import joblib
from Preprocessing import preprocess

In [2]:
%run Preprocessing.py

In [3]:
#use preprocess function to get train and test data
X_train, X_test, y_train, y_test, X_val, y_val = preprocess(pd.read_csv('austin_weather.csv'))

In [4]:
# using classifier chains
from skmultilearn.problem_transform import ClassifierChain
from sklearn.linear_model import LogisticRegression# initialize classifier chains multi-label classifier
classifier = ClassifierChain(LogisticRegression())# Training logistic regression model on train data
classifier.fit(X_train, y_train)# predict
predictions = classifier.predict(X_test)# accuracy
from sklearn.metrics import accuracy_score
print("Accuracy = ",accuracy_score(y_test,predictions))

#calculate weighted accuracy, precision, recall, f1 score
from sklearn.metrics import precision_score, recall_score, f1_score
print("Weighted Precision = ",precision_score(y_test, predictions, average="weighted"))
print("Weighted Recall = ",recall_score(y_test, predictions, average="weighted"))
print("Weighted F1 score = ",f1_score(y_test, predictions, average="weighted"))


Accuracy =  0.8282828282828283
Weighted Precision =  0.8000604047115676
Weighted Recall =  0.6233766233766234
Weighted F1 score =  0.6912999479346175


In [5]:
# using binary relevance
from skmultilearn.problem_transform import BinaryRelevance
from sklearn.naive_bayes import GaussianNB
# initialize binary relevance multi-label classifier
# with a logistic regression base classifier
classifier = ClassifierChain(LogisticRegression())
# train
classifier.fit(X_train, y_train)
# predict
predictions = classifier.predict(X_test)
# accuracy
print("Accuracy = ",accuracy_score(y_test,predictions))
#calculate weighted accuracy, precision, recall, f1 score
from sklearn.metrics import precision_score, recall_score, f1_score
print("Weighted Precision = ",precision_score(y_test, predictions, average="weighted"))
print("Weighted Recall = ",recall_score(y_test, predictions, average="weighted"))
print("Weighted F1 score = ",f1_score(y_test, predictions, average="weighted"))



Accuracy =  0.8282828282828283
Weighted Precision =  0.8000604047115676
Weighted Recall =  0.6233766233766234
Weighted F1 score =  0.6912999479346175


In [6]:
# using label powerset
from skmultilearn.problem_transform import LabelPowerset
from sklearn.naive_bayes import GaussianNB
# initialize label powerset multi-label classifier
# with logistic regression base classifier
classifier = LabelPowerset(LogisticRegression())# train
classifier.fit(X_train, y_train)# predict
predictions = classifier.predict(X_test)# accuracy
print("Accuracy = ",accuracy_score(y_test,predictions))
#calculate weighted accuracy, precision, recall, f1 score
from sklearn.metrics import precision_score, recall_score, f1_score
print("Weighted Precision = ",precision_score(y_test, predictions, average="weighted"))
print("Weighted Recall = ",recall_score(y_test, predictions, average="weighted"))
print("Weighted F1 score = ",f1_score(y_test, predictions, average="weighted"))



Accuracy =  0.8282828282828283
Weighted Precision =  0.8057513914656772
Weighted Recall =  0.6363636363636364
Weighted F1 score =  0.7088205911735325


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [7]:
# using binary relevance
from skmultilearn.problem_transform import BinaryRelevance
from sklearn.naive_bayes import GaussianNB
# initialize binary relevance multi-label classifier
# with a gaussian naive bayes base classifier
classifier = BinaryRelevance(GaussianNB())# train
classifier.fit(X_train, y_train)# predict
predictions = classifier.predict(X_test)# accuracy
print("Accuracy = ",accuracy_score(y_test,predictions))
#calculate weighted accuracy, precision, recall, f1 score
from sklearn.metrics import precision_score, recall_score, f1_score
print("Weighted Precision = ",precision_score(y_test, predictions, average="weighted"))
print("Weighted Recall = ",recall_score(y_test, predictions, average="weighted"))
print("Weighted F1 score = ",f1_score(y_test, predictions, average="weighted"))

Accuracy =  0.42424242424242425
Weighted Precision =  0.7295010252904989
Weighted Recall =  0.7922077922077922
Weighted F1 score =  0.677056277056277


In [8]:
# using adapted algorithm
from skmultilearn.adapt import MLkNN
from scipy.sparse import csr_matrix, lil_matrix
# initialize mlknn multi-label classifier
# with k=10
X_train_csr = lil_matrix(X_train).toarray()
y_train_csr = lil_matrix(y_train).toarray()
X_test_csr = lil_matrix(X_test).toarray()
classifier_new = MLkNN(k=10)# train
classifier_new.fit(X=X_train_csr, y=y_train_csr)# predict
predictions = classifier_new.predict(X_test_csr)# accuracy
print("Accuracy = ",accuracy_score(y_test,predictions))
#calculate weighted accuracy, precision, recall, f1 score
from sklearn.metrics import precision_score, recall_score, f1_score
print("Weighted Precision = ",precision_score(y_test, predictions, average="weighted"))
print("Weighted Recall = ",recall_score(y_test, predictions, average="weighted"))
print("Weighted F1 score = ",f1_score(y_test, predictions, average="weighted"))




Accuracy =  0.7777777777777778
Weighted Precision =  0.6461321287408244
Weighted Recall =  0.6103896103896104
Weighted F1 score =  0.6269204479730796


In [19]:
#use ensemble classifier
from sklearn.naive_bayes import GaussianNB
from skmultilearn.ensemble import RakelD
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
import scipy
from scipy.sparse import csr_matrix, lil_matrix

# initialize RakelD multi-label classifier
# with a gaussian naive bayes base classifier
#convert to dense
#scipy.sparse.csr_matrix(X_train.values)
X_train_csr = csr_matrix.todense(X_train)
y_train_csr = csr_matrix.todense(y_train)
X_test_csr = csr_matrix.todense(X_test)

classifier = RakelD(
    base_classifier=GaussianNB(),
    base_classifier_require_dense=[False, True],
    labelset_size=2
)

# train
classifier.fit(X_train_csr, y_train_csr)

# predict
predictions = classifier.predict(X_test_csr)

# accuracy
print("Accuracy = ",accuracy_score(y_test,predictions))

#calculate weighted accuracy, precision, recall, f1 score
from sklearn.metrics import precision_score, recall_score, f1_score
print("Weighted Precision = ",precision_score(y_test, predictions, average="weighted"))
print("Weighted Recall = ",recall_score(y_test, predictions, average="weighted"))
print("Weighted F1 score = ",f1_score(y_test, predictions, average="weighted"))




AttributeError: 'DataFrame' object has no attribute '_ascontainer'

In [22]:
from sklearn.naive_bayes import GaussianNB
from skmultilearn.ensemble import RakelO
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
import scipy
from scipy.sparse import csr_matrix, lil_matrix

# initialize RakelD multi-label classifier
# with a gaussian naive bayes base classifier
#convert to dense
X_train_csr = lil_matrix(X_train).toarray()
y_train_csr = lil_matrix(y_train).toarray()
X_test_csr = lil_matrix(X_test).toarray()

classifier = RakelO(
    base_classifier=GaussianNB(),
    base_classifier_require_dense=[False, True],
    labelset_size=2
)

# train
classifier.fit(X_train, y_train)

# predict
predictions = classifier.predict(X_test)

# accuracy
print("Accuracy = ",accuracy_score(y_test,predictions))

#calculate weighted accuracy, precision, recall, f1 score
from sklearn.metrics import precision_score, recall_score, f1_score
print("Weighted Precision = ",precision_score(y_test, predictions, average="weighted"))
print("Weighted Recall = ",recall_score(y_test, predictions, average="weighted"))
print("Weighted F1 score = ",f1_score(y_test, predictions, average="weighted"))


TypeError: unsupported operand type(s) for +: 'NoneType' and 'int'

In [25]:
from skmultilearn.ext import Meka, download_meka

# download meka
download_meka()

# initialize meka
meka_classifier = Meka(
    meka_classifier = "meka.classifiers.multilabel.LC",
    weka_classifier = "weka.classifiers.bayes.NaiveBayes",
    meka_classpath = download_meka(),
    java_classpath = "/usr/local/lib/python3.6/dist-packages/meka/lib/meka.jar",
)


meka_classifier.fit(X_train, y_train)
predictions = meka_classifier.predict(X_test)


2022-12-16 17:51:21.431233: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-12-16 17:51:21.431278: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


MEKA 1.9.2 not found, downloading
