In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Memuat dataset
data = pd.read_csv('dataset.csv')

# Mengubah data kategorikal menjadi data numerik
label_encoders = {}
for column in data.columns:
    if data[column].dtype == 'object':
        le = LabelEncoder()
        data[column] = le.fit_transform(data[column])
        label_encoders[column] = le

# Misalnya kolom 'Expert Diagnose' adalah target
X = data.drop('Expert Diagnose', axis=1)
y = data['Expert Diagnose']

# Membagi dataset menjadi training set dan test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Membuat model Naive Bayes
model = MultinomialNB()

# Melatih model
model.fit(X_train, y_train)

# Memprediksi pada test set
y_pred = model.predict(X_test)

# Evaluasi model
print("Akurasi:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

# Menyimpan model dan label encoder
import joblib
joblib.dump(model, 'naive_bayes_model.pkl')
for column, le in label_encoders.items():
    joblib.dump(le, f'label_encoder_{column}.pkl')


Akurasi: 0.625
Classification Report:
               precision    recall  f1-score   support

           0       0.43      0.75      0.55         4
           1       1.00      0.60      0.75         5
           2       0.55      0.75      0.63         8
           3       1.00      0.43      0.60         7

    accuracy                           0.62        24
   macro avg       0.74      0.63      0.63        24
weighted avg       0.75      0.62      0.63        24

Confusion Matrix:
 [[3 0 1 0]
 [2 3 0 0]
 [2 0 6 0]
 [0 0 4 3]]


In [4]:
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import joblib

# Memuat dataset
data = pd.read_csv('dataset.csv')

# Mengonversi kolom target menjadi label numerik
target = 'Expert Diagnose'
le = LabelEncoder()
data[target] = le.fit_transform(data[target])

# Memisahkan fitur (X) dan target (y)
X = data.drop(target, axis=1)
y = data[target]

# Mengonversi semua kolom kategorikal ke numerik dengan LabelEncoder
label_encoders = {}
for column in X.columns:
    le = LabelEncoder()
    X[column] = le.fit_transform(X[column])
    label_encoders[column] = le

# Membagi dataset menjadi training set dan test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Mendefinisikan input layers untuk setiap fitur kategorikal
input_layers = []
embedding_layers = []
for column in X.columns:
    input_layer = tf.keras.layers.Input(shape=(1,), name=column)
    input_layers.append(input_layer)
    
    vocab_size = X[column].nunique()
    embedding_dim = min(50, (vocab_size + 1) // 2)  # Dimensi embedding bisa disesuaikan
    embedding_layer = tf.keras.layers.Embedding(input_dim=vocab_size, output_dim=embedding_dim)(input_layer)
    embedding_layer = tf.keras.layers.Reshape(target_shape=(embedding_dim,))(embedding_layer)
    embedding_layers.append(embedding_layer)

# Menggabungkan semua embedding layers
combined_layer = tf.keras.layers.Concatenate()(embedding_layers)

# Menambahkan beberapa Dense layers dan Dropout
dense_layer = tf.keras.layers.Dense(128, activation='relu')(combined_layer)
dense_layer = tf.keras.layers.Dropout(0.5)(dense_layer)
dense_layer = tf.keras.layers.Dense(64, activation='relu')(dense_layer)
dense_layer = tf.keras.layers.Dropout(0.5)(dense_layer)
output_layer = tf.keras.layers.Dense(len(le.classes_), activation='softmax')(dense_layer)

# Membuat dan mengkompilasi model
model = tf.keras.models.Model(inputs=input_layers, outputs=output_layer)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Melatih model
X_train_list = [X_train[col] for col in X_train.columns]
X_test_list = [X_test[col] for col in X_test.columns]

model.fit(X_train_list, y_train, epochs=30, batch_size=32)

# Memprediksi pada test set
y_pred = model.predict(X_test_list)
y_pred_classes = y_pred.argmax(axis=-1)

# Evaluasi model
print("Akurasi:", accuracy_score(y_test, y_pred_classes))
print("Classification Report:\n", classification_report(y_test, y_pred_classes, zero_division=0))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_classes))

# Menyimpan model dan label encoder
model.save('tensorflow_model_with_embeddings_and_dropout.h5')
for column, le in label_encoders.items():
    joblib.dump(le, f'label_encoder_{column}.pkl')


Epoch 1/30
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 6ms/step - accuracy: 0.1003 - loss: 2.2064
Epoch 2/30
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.2266 - loss: 2.1795  
Epoch 3/30
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.2578 - loss: 2.1643  
Epoch 4/30
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.3424 - loss: 2.1399  
Epoch 5/30
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.3268 - loss: 2.1136  
Epoch 6/30
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.3490 - loss: 2.0872  
Epoch 7/30
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.2669 - loss: 2.0840 
Epoch 8/30
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.3789 - loss: 2.0260 
Epoch 9/30
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m



Akurasi: 0.875
Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.75      0.86         4
           1       1.00      1.00      1.00         5
           2       0.80      1.00      0.89         8
           3       0.83      0.71      0.77         7

    accuracy                           0.88        24
   macro avg       0.91      0.87      0.88        24
weighted avg       0.88      0.88      0.87        24

Confusion Matrix:
 [[3 0 0 1]
 [0 5 0 0]
 [0 0 8 0]
 [0 0 2 5]]


In [55]:
import numpy as np
import tensorflow as tf
import joblib

# Fungsi untuk memuat label encoder dan mengonversi input data pasien
def encode_input_data(input_data, label_encoders):
    encoded_data = {}
    for column, value in input_data.items():
        le = label_encoders[column]
        if isinstance(value, str):
            encoded_data[column] = le.transform([value])[0]
        else:
            encoded_data[column] = value
    return encoded_data

# Fungsi untuk memuat model dan melakukan prediksi
def predict_patient(input_data, model, label_encoders):
    # Mengonversi input data
    encoded_input_data = encode_input_data(input_data, label_encoders)
    
    # Mengonversi input data menjadi list numpy
    input_list = [np.array([encoded_input_data[column]]) for column in encoded_input_data]
    
    # Melakukan prediksi
    prediction = model.predict(input_list)
    
    # Mengonversi prediksi menjadi label kelas
    predicted_class = np.argmax(prediction)
    
    # Mengembalikan label kelas prediksi
    return predicted_class

# Memuat model yang telah dilatih
model = tf.keras.models.load_model('tensorflow_model_with_embeddings_and_dropout.h5')

# Memuat label encoders untuk setiap fitur kategorikal
label_encoders = {}
for column in X.columns:
    label_encoders[column] = joblib.load(f'label_encoder_{column}.pkl')

# Contoh data pasien yang akan diklasifikasikan
input_data = {
    'Patient Number' : 'Patiant-01',
    'Sadness': 'Sometimes',
    'Euphoric': 'Most-Often',
    'Exhausted': 'Sometimes',
    'Sleep dissorder': 'Sometimes',
    'Mood Swing': 'YES',
    'Suicidal thoughts': 'NO',
    'Anorxia': 'NO',
    'Authority Respect': 'NO',
    'Try-Explanation': 'YES',
    'Aggressive Response': 'YES',
    'Ignore & Move-On': 'NO',
    'Nervous Break-down': 'YES',
    'Admit Mistakes': 'YES',
    'Overthinking': 'NO',
    'Sexual Activity': 6,
    'Concentration': 5,
    'Optimisim': 7
}

# Melakukan prediksi untuk data pasien
predicted_class = predict_patient(input_data, model, label_encoders)

# Menampilkan hasil prediksi
print("Hasil Prediksi:")
print(f"Kelas yang diprediksi untuk data pasien adalah: {predicted_class}")




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 975ms/step
Hasil Prediksi:
Kelas yang diprediksi untuk data pasien adalah: 0


In [54]:
# Memuat label encoder dari file .pkl
label_encoder = joblib.load('label_encoder_Expert Diagnose.pkl')

# Menampilkan kelas-kelas yang dikenali oleh label encoder
print("Kelas-kelas yang dikenali oleh label encoder untuk fitur 'Sexual Activity':")
print(label_encoder.classes_)


Kelas-kelas yang dikenali oleh label encoder untuk fitur 'Sexual Activity':
['Bipolar Type-1' 'Bipolar Type-2' 'Depression' 'Normal']
