In [None]:
# RANDOM FOREST CLASSIFIER

import pickle
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import os
from sklearn.metrics import classification_report

# Muat data
data_dict = pickle.load(open('../data.pickle', 'rb'))

# Periksa bentuk dari setiap elemen dalam data_dict['data']
# Buat daftar untuk menyimpan data yang valid
valid_data = []

for sample in data_dict['data']:
    if len(sample) == 42:  # Pastikan panjang fitur adalah 42
        valid_data.append(sample)

# Konversi ke array NumPy
data = np.array(valid_data)  # Harus berukuran (n_samples, 42)
labels = np.array(data_dict['labels'][:len(valid_data)])  # Sesuaikan dengan jumlah data valid

# Membuat labels_dict untuk pemetaan huruf
labels_dict = {i: label for i, label in enumerate(sorted(set(labels)))}

# Split data menjadi training dan testing
x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.30, shuffle=True, stratify=labels)

# Inisialisasi dan latih model
model = RandomForestClassifier()

# Data sudah memiliki bentuk yang sesuai
model.fit(x_train, y_train)

# Prediksi dan hitung skor akurasi
y_predict = model.predict(x_test)
score = accuracy_score(y_predict, y_test)
print('{}% of samples were classified correctly !'.format(score * 100))

# Cek apakah file model.p sudah ada
model_filename = 'model.p'
if os.path.exists(model_filename):
    # Jika ada, rename menjadi model1.p, model2.p, dst.
    i = 1
    new_filename = f'{os.path.splitext(model_filename)[0]}{i}{os.path.splitext(model_filename)[1]}'
    while os.path.exists(new_filename):
        i += 1
        new_filename = f'{os.path.splitext(model_filename)[0]}{i}{os.path.splitext(model_filename)[1]}'
    os.rename(model_filename, new_filename)

# Tentukan folder untuk menyimpan model
model_folder = '../model/'
if not os.path.exists(model_folder):
    os.makedirs(model_folder)

# Tentukan nama file model dengan increment
i = 1
model_filename = os.path.join(model_folder, 'rf_model.p')
while os.path.exists(model_filename):
    model_filename = os.path.join(model_folder, f'rf_model_{i}.p')
    i += 1

# Simpan model ke file
with open(model_filename, 'wb') as f:
    pickle.dump({'model': model}, f)

# Generate classification report
report = classification_report(y_test, y_predict, target_names=[labels_dict[i] for i in sorted(labels_dict.keys())])
print(report)

In [None]:
import pickle
import numpy as np
import os
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

# Load data
data_dict = pickle.load(open('../data.pickle', 'rb'))

# Filter hanya data dengan panjang fitur 42
valid_data = [sample for sample in data_dict['data'] if len(sample) == 42]
data = np.array(valid_data)
labels = np.array(data_dict['labels'][:len(valid_data)])

# Buat label dictionary
unique_labels = sorted(set(labels))
labels_dict = {i: label for i, label in enumerate(unique_labels)}

# Split data jadi 75% train dan 25% test
x_train_full, x_test, y_train_full, y_test = train_test_split(
    data, labels, test_size=0.30, stratify=labels, random_state=42
)

# Siapkan K-Fold pada data training
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

accuracies = []
all_y_true = []
all_y_pred = []

for train_index, val_index in skf.split(x_train_full, y_train_full):
    x_train, x_val = x_train_full[train_index], x_train_full[val_index]
    y_train, y_val = y_train_full[train_index], y_train_full[val_index]

    model = RandomForestClassifier(random_state=42)
    model.fit(x_train, y_train)

    y_pred = model.predict(x_val)

    acc = accuracy_score(y_val, y_pred)
    accuracies.append(acc)

    all_y_true.extend(y_val)
    all_y_pred.extend(y_pred)

# Tampilkan hasil K-Fold
print("K-Fold Accuracies:", accuracies)
print("Mean K-Fold Accuracy:", np.mean(accuracies))

# Classification report dari K-Fold
report = classification_report(all_y_true, all_y_pred, target_names=unique_labels)
print("Classification Report (K-Fold):\n", report)

# Confusion Matrix dari K-Fold
conf_matrix = confusion_matrix(all_y_true, all_y_pred)
print("Confusion Matrix (K-Fold):\n", conf_matrix)

# Evaluasi di data test
y_test_pred = model.predict(x_test)
test_acc = accuracy_score(y_test, y_test_pred)
print("Final Test Accuracy:", test_acc)

# Simpan model
model_folder = '../model/'
if not os.path.exists(model_folder):
    os.makedirs(model_folder)

i = 1
model_filename = os.path.join(model_folder, 'rf_model_kfold.p')
while os.path.exists(model_filename):
    model_filename = os.path.join(model_folder, f'rf_model_kfold_{i}.p')
    i += 1

with open(model_filename, 'wb') as f:
    pickle.dump({'model': model}, f)

K-Fold Accuracies: [0.9681397738951696, 0.9701952723535457, 0.9804727646454265, 0.9743062692702981, 0.9763617677286742, 0.9763617677286742, 0.9753086419753086, 0.9753086419753086, 0.9763374485596708, 0.970164609053498]
Mean K-Fold Accuracy: 0.9742956957185575
Classification Report (K-Fold):
               precision    recall  f1-score   support

           A       0.97      1.00      0.98       419
           B       0.99      1.00      1.00       420
           C       1.00      1.00      1.00       416
           D       0.99      0.99      0.99       374
           E       0.98      0.97      0.97       419
           F       0.99      0.99      0.99       414
           G       0.97      0.99      0.98       340
           H       0.98      0.97      0.97       316
           I       0.97      0.98      0.98       411
           J       0.99      0.97      0.98       262
           K       0.98      0.98      0.98       417
           L       0.99      0.98      0.98       419
    

In [3]:
import pickle
import numpy as np
import os
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

# Load data
data_dict = pickle.load(open('../data.pickle', 'rb'))

# Filter hanya data dengan panjang fitur 42
valid_data = [sample for sample in data_dict['data'] if len(sample) == 42]
data = np.array(valid_data)
labels = np.array(data_dict['labels'][:len(valid_data)])

# Buat label dictionary
unique_labels = sorted(set(labels))
labels_dict = {i: label for i, label in enumerate(unique_labels)}

# Split data jadi 75% train dan 25% test
x_train_full, x_test, y_train_full, y_test = train_test_split(
    data, labels, test_size=0.30, stratify=labels, random_state=42
)

# Siapkan K-Fold pada data training
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

accuracies = []
all_y_true = []
all_y_pred = []

# Simpan model pada setiap fold untuk evaluasi testing
final_model = None

for train_index, val_index in skf.split(x_train_full, y_train_full):
    x_train, x_val = x_train_full[train_index], x_train_full[val_index]
    y_train, y_val = y_train_full[train_index], y_train_full[val_index]

    model = RandomForestClassifier(random_state=42)
    model.fit(x_train, y_train)

    y_pred = model.predict(x_val)

    acc = accuracy_score(y_val, y_pred)
    accuracies.append(acc)

    all_y_true.extend(y_val)
    all_y_pred.extend(y_pred)

    final_model = model  # Simpan model setelah fold terakhir

# Tampilkan hasil K-Fold
print("K-Fold Accuracies:", accuracies)
print("Mean K-Fold Accuracy:", np.mean(accuracies))

# Classification report dari K-Fold
report = classification_report(all_y_true, all_y_pred, target_names=unique_labels)
print("Classification Report (K-Fold):\n", report)

# Confusion Matrix dari K-Fold
conf_matrix = confusion_matrix(all_y_true, all_y_pred)
print("Confusion Matrix (K-Fold):\n", conf_matrix)

# Evaluasi di data test
y_test_pred = final_model.predict(x_test)  # Menggunakan model terakhir dari fold
test_acc = accuracy_score(y_test, y_test_pred)
print("Final Test Accuracy:", test_acc)

# Simpan model
model_folder = '../model/'
if not os.path.exists(model_folder):
    os.makedirs(model_folder)

i = 1
model_filename = os.path.join(model_folder, 'rf_model_kfold.p')
while os.path.exists(model_filename):
    model_filename = os.path.join(model_folder, f'rf_model_kfold_{i}.p')
    i += 1

with open(model_filename, 'wb') as f:
    pickle.dump({'model': final_model}, f)

K-Fold Accuracies: [0.9681397738951696, 0.9701952723535457, 0.9804727646454265, 0.9743062692702981, 0.9763617677286742, 0.9763617677286742, 0.9753086419753086, 0.9753086419753086, 0.9763374485596708, 0.970164609053498]
Mean K-Fold Accuracy: 0.9742956957185575
Classification Report (K-Fold):
               precision    recall  f1-score   support

           A       0.97      1.00      0.98       419
           B       0.99      1.00      1.00       420
           C       1.00      1.00      1.00       416
           D       0.99      0.99      0.99       374
           E       0.98      0.97      0.97       419
           F       0.99      0.99      0.99       414
           G       0.97      0.99      0.98       340
           H       0.98      0.97      0.97       316
           I       0.97      0.98      0.98       411
           J       0.99      0.97      0.98       262
           K       0.98      0.98      0.98       417
           L       0.99      0.98      0.98       419
    