# 1. Import File CSV (Hasil Ekstraksi)

In [None]:
import pandas as pd

train = pd.read_csv("C:.../.../Datasets Training GLSZM.csv") # import data training
test = pd.read_csv("C:.../.../Datasets Testing GLSZM.csv") # import data testing

print(train.shape)
print(test.shape)

# 2. Pengambilan fitur dan class dari data training & testing

In [None]:
'''
Penggunaan iloc dalam pandas adalah untuk seleksi berbasis posisi (integer-location based indexing) 
yang memungkinkan pemilihan bagian tertentu dari DataFrame berdasarkan posisi baris dan kolom.

Di sini, 0:,:-1 berarti kita memilih semua baris (0:) dan semua kolom 
kecuali kolom terakhir (:-1), karena kolom terakhir dianggap sebagai variabel class.
(-1) berarti hanya kolom terakhir yaitu kolom class.

'''
# Pengambilan fitur dan class
X_train = train.iloc[0:,:-1] # Fitur
y_train = train.iloc[0:,-1] # Class (anechoic, heterogeneous, hypoechoic)
# print(y_train)

X_test = test.iloc[0:,:-1] # Features
y_test = test.iloc[0:,-1] # Class Target
# print(y_test)


# 3. Normalisasi data fitur

In [None]:
'''
StandardScaler adalah sebuah metode untuk normalisasi data dengan 
skala 0-1 
'''
from sklearn.preprocessing import StandardScaler 
sc = StandardScaler()
sc.fit(X_train) # menghitung mean dan standar deviasi
X_train_scaled = sc.transform(X_train) # transform x_train menjadi data standar
X_test_scaled = sc.transform(X_test)

# 4. Class/label diubah dalam bentuk numerik

In [None]:
'''
LabelEncoder untuk mengubah label menjadi angka karena MLP memerlukan
output bentuk numerik
'''
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
le.fit(y_train)
y_train_preprocessed = le.transform(y_train)
y_test_preprocessed = le.transform(y_test)

print(y_train_preprocessed)
print(y_test_preprocessed)

# 5. Klasifikasi dan confussion matrix

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

def train_classification(clf, train_features, train_labels, test_features, test_labels):
    clf.fit(train_features, train_labels)
    test_predict = clf.predict(test_features)
    return test_labels, test_predict

def evaluation(clf, test_labels, test_predict, filename):
    # Confusion Matrix
    cm = confusion_matrix(test_labels, test_predict)
    plt.figure(3)
    fig = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=clf.classes_)
    fig.plot()
    plt.savefig('C:.../.../' + filename + ' Confusion Matrix.png') #sesuaikan dengan path
    
    # Mendapatkan nilai TP, FP, FN, TN untuk setiap kelas
    TP = cm.diagonal()
    FP = cm.sum(axis=0) - TP
    FN = cm.sum(axis=1) - TP
    TN = cm.sum() - (TP + FP + FN)
    accuracy = (TP.sum() + TN.sum()) / (TP.sum() + FP.sum() + FN.sum() + TN.sum())
#     accuracy = accuracy_score(test_labels, test_predict)
    precision = np.mean(TP / (TP + FP))
    sensitivity = np.mean(np.where((TP + FN) == 0, 0, TP / (TP + FN)))
    specificity = np.mean(TN / (TN + FP))
    f1score = (2 * sensitivity * precision) / (sensitivity + precision)
    
     # Cetak hasil confusion matrix
    for i in range(len(TP)):
        print(f'Kelas {i}:')
        print(f'TP (True Positive): {TP[i]}')
        print(f'FP (False Positive): {FP[i]}')
        print(f'FN (False Negative): {FN[i]}')
        print(f'TN (True Negative): {TN[i]}')

#     Cetak classification report
    print(f'Akurasi: {accuracy:.2%}')
    print(f'Presisi: {precision:.2%}')
    print(f'Sensitivitas: {sensitivity:.2%}')
    print(f'F1-score: {f1score:.2%}')

    # Simpan evaluasi ke dalam DataFrame
    evaluation_dict = {
        'Accuracy': [accuracy],
        'Precision': [precision],
        'Sensitivity': [sensitivity],
        'Specificity': [specificity],
        'F1-score': [f1score]
    }
    df_evaluation = pd.DataFrame(evaluation_dict)

    return df_evaluation

# 6. Membuat & menyimpan model klasifikasi dengan 100x run

In [None]:
import joblib
from sklearn.neural_network import MLPClassifier

# Melakukan 100 running dan menyimpan hasil evaluasi ke dalam DataFrame
n_runs = 100
evaluation_results = pd.DataFrame()

# Inisialisasi list untuk menyimpan scalers dan label encoders
scalers = []
label_encoders = []

for run in range(n_runs):
    # Lakukan training dan evaluasi
    clf_MLP = MLPClassifier(hidden_layer_sizes=(31, 10), activation='relu', max_iter=1000)
    test_labels_MLP, test_predict_MLP = train_classification(clf_MLP, X_train_scaled, y_train_preprocessed, X_test_scaled, y_test_preprocessed)
    df_evaluation_MLP = evaluation(clf_MLP, test_labels_MLP, test_predict_MLP, filename=f'MLP{run}')
    
    # Simpan model
    model_filename = f'C:.../.../clf_MLP_{run+1}.joblib' #sesuaikan dengan path
    joblib.dump(clf_MLP, model_filename)
    print(f"Model {run+1} saved to {model_filename}")

    # Gabungkan hasil evaluasi ke dalam DataFrame utama
    evaluation_results = pd.concat([evaluation_results, df_evaluation_MLP], ignore_index=True)

# Menyimpan hasil evaluasi dalam CSV
evaluation_results.to_csv('C:.../.../evaluation_results_100_run.csv', index=False) #sesuaikan dengan path

In [None]:
# Mencari akurasi tertinggi
max_accuracy = evaluation_results['Accuracy'].max()

# Mencari akurasi terendah
min_accuracy = evaluation_results['Accuracy'].min()

# Mencari rata-rata akurasi
mean_accuracy = evaluation_results['Accuracy'].mean()

# Mencari standar deviasi akurasi
std_accuracy = evaluation_results['Accuracy'].std()

print(f"Akurasi Tertinggi: {max_accuracy:.2%}")
print(f"Akurasi Terendah: {min_accuracy:.2%}")
print(f"Rata-rata Akurasi: {mean_accuracy:.2%}")
print(f"Standar Deviasi Akurasi: {std_accuracy:.2%}")

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Baca file CSV yang berisi hasil evaluasi
df = pd.read_csv('C:.../.../evaluation_results_100_run.csv')

# Buat grafik akurasi
plt.figure(figsize=(10, 5))
plt.plot(df['Accuracy'], marker='o', linestyle='-', label='Akurasi', color='blue')
plt.title('Grafik Akurasi MLP dalam 100 kali Run')
plt.xlabel('Run ke-')
plt.ylabel('Akurasi')
plt.legend()
plt.grid(True)
plt.savefig('C:.../.../Grafik Akurasi 100 Kali Run.png')
plt.show()

# Buat grafik loss curve
plt.figure(figsize=(10, 5))
plt.plot(clf_MLP.loss_curve_, label='Loss', color='blue')
plt.title('Loss Curve untuk Model MLP')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.savefig('C:.../.../Grafik Loss.png')
plt.show()

# Cari akurasi tertinggi, terendah, rata-rata, dan standar deviasi
max_accuracy = df['Accuracy'].max()
min_accuracy = df['Accuracy'].min()
mean_accuracy = df['Accuracy'].mean()
std_accuracy = df['Accuracy'].std()

# Tampilkan hasil
print(f"Akurasi Tertinggi: {max_accuracy:.2%}")
print(f"Akurasi Terendah: {min_accuracy:.2%}")
print(f"Rata-rata Akurasi: {mean_accuracy:.2%}")
print(f"Standar Deviasi Akurasi: {std_accuracy:.2%}")


# 8. Import data testing yang berbeda

In [None]:
import pandas as pd
test = pd.read_csv("C:.../.../Testing.csv")
print(test.shape)

# 9. Pengambilan fitur dan class data testing beda

In [None]:
# X_test_new = test.iloc[0:, :-1].values # Features
X_test_new = test.iloc[0:, :-1] # Features
y_test_new = test.iloc[0:,-1] # Class Target
# print(X_test_new)

In [None]:
# Normalize data
from sklearn.preprocessing import StandardScaler 
sc = StandardScaler()
scaler_test = sc.fit(X_test_new)
X_new_test_scaled = scaler_test.transform(X_test_new)
print(X_new_test_scaled)

# LabelEncoder
from sklearn.preprocessing import LabelEncoder
labels = ["Anechoic", "Heterogeneus", "Hipoechoic"]
le = LabelEncoder()
le.fit(labels)
y_new_test_preprocessed = le.transform(y_test_new)

print(y_new_test_preprocessed)

# 10. Menggunakan Model yang sudah disimpan untuk data testing yang baru

In [None]:
import joblib
import matplotlib.pyplot as plt

model_filename = 'C:.../.../clf_MLP_61.joblib'
clf_MLP_loaded = joblib.load(model_filename)

new_data_predictions = clf_MLP_loaded.predict(X_new_test_scaled)

evaluation(clf_MLP_loaded, y_new_test_preprocessed, new_data_predictions, filename="MLP_New")
plt.figure(figsize=(10,5))
plt.plot(clf_MLP_loaded.loss_curve_)
plt.title('Loss Curve')
plt.xticks(range(0, 701, 50)) 
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.grid(True)
plt.savefig('C:.../.../clf_MLP_61_loss.png')
plt.show()

In [None]:
# DataFrames untuk label sebenarnya dengan hasil klasifikasi
actual_labels_df = pd.DataFrame(y_new_test_preprocessed, columns=['Label Sebenarnya'])
predicted_labels_df = pd.DataFrame(new_data_predictions, columns=['Hasil Klasifikasi'])

# Mengubah label numerik menjadi label sebenarnya
label_mapping = {0: 'Anechoic', 1: 'Heterogeneous', 2: 'Hypoechoic'}
actual_labels_df['Label Sebenarnya'] = actual_labels_df['Label Sebenarnya'].map(label_mapping)
predicted_labels_df['Hasil Klasifikasi'] = predicted_labels_df['Hasil Klasifikasi'].map(label_mapping)

results_df = pd.concat([actual_labels_df, predicted_labels_df], axis=1)
results_df