In [None]:
# RANDOM FOREST CLASSIFIER

import pickle
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import os
from sklearn.metrics import classification_report

# Muat data
data_dict = pickle.load(open('../data.pickle', 'rb'))

# Periksa bentuk dari setiap elemen dalam data_dict['data']
# Buat daftar untuk menyimpan data yang valid
valid_data = []

for sample in data_dict['data']:
    if len(sample) == 42:  # Pastikan panjang fitur adalah 42
        valid_data.append(sample)

# Konversi ke array NumPy
data = np.array(valid_data)  # Harus berukuran (n_samples, 42)
labels = np.array(data_dict['labels'][:len(valid_data)])  # Sesuaikan dengan jumlah data valid

# Membuat labels_dict untuk pemetaan huruf
labels_dict = {i: label for i, label in enumerate(sorted(set(labels)))}

# Split data menjadi training dan testing
x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, shuffle=True, stratify=labels)

# Inisialisasi dan latih model
model = RandomForestClassifier()

# Data sudah memiliki bentuk yang sesuai
model.fit(x_train, y_train)

# Prediksi dan hitung skor akurasi
y_predict = model.predict(x_test)
score = accuracy_score(y_predict, y_test)
print('{}% of samples were classified correctly !'.format(score * 100))

# Cek apakah file model.p sudah ada
model_filename = 'model.p'
if os.path.exists(model_filename):
    # Jika ada, rename menjadi model1.p, model2.p, dst.
    i = 1
    new_filename = f'{os.path.splitext(model_filename)[0]}{i}{os.path.splitext(model_filename)[1]}'
    while os.path.exists(new_filename):
        i += 1
        new_filename = f'{os.path.splitext(model_filename)[0]}{i}{os.path.splitext(model_filename)[1]}'
    os.rename(model_filename, new_filename)

# Tentukan folder untuk menyimpan model
model_folder = '../model/'
if not os.path.exists(model_folder):
    os.makedirs(model_folder)

# Tentukan nama file model dengan increment
i = 1
model_filename = os.path.join(model_folder, 'rf_model.p')
while os.path.exists(model_filename):
    model_filename = os.path.join(model_folder, f'rf_model_{i}.p')
    i += 1

# Simpan model ke file
with open(model_filename, 'wb') as f:
    pickle.dump({'model': model}, f)

# Generate classification report
report = classification_report(y_test, y_predict, target_names=[labels_dict[i] for i in sorted(labels_dict.keys())])
print(report)

In [None]:
# # Coba CNN

In [None]:
# K-FOLD CROSS VALIDATION + RANDOM FOREST CLASSIFIER

import pickle
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
import os

# Load data
data_dict = pickle.load(open('../data.pickle', 'rb'))

# Validasi panjang fitur
valid_data = [sample for sample in data_dict['data'] if len(sample) == 42]
data = np.array(valid_data)
labels = np.array(data_dict['labels'][:len(valid_data)])

# Buat label dictionary
unique_labels = sorted(set(labels))
labels_dict = {i: label for i, label in enumerate(unique_labels)}

# Siapkan StratifiedKFold
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

accuracies = []
all_y_true = []
all_y_pred = []

for train_index, test_index in skf.split(data, labels):
    x_train, x_test = data[train_index], data[test_index]
    y_train, y_test = labels[train_index], labels[test_index]

    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(x_train, y_train)

    y_predict = model.predict(x_test)
    
    all_y_true.extend(y_test)
    all_y_pred.extend(y_predict)
    
    acc = accuracy_score(y_test, y_predict)
    accuracies.append(acc)

# Cetak akurasi tiap fold dan rata-rata
print("Fold Accuracies:", accuracies)
print("Mean Accuracy:", np.mean(accuracies))

# Cetak classification report lengkap
report = classification_report(all_y_true, all_y_pred, target_names=unique_labels)
print("Classification Report:\n", report)

# Cetak confusion matrix
conf_matrix = confusion_matrix(all_y_true, all_y_pred)
print("Confusion Matrix:\n", conf_matrix)

# Latih ulang pada semua data untuk simpan model
model.fit(data, labels)

# Simpan model
model_folder = '../model/'
if not os.path.exists(model_folder):
    os.makedirs(model_folder)

i = 1
model_filename = os.path.join(model_folder, 'rf_model_kfold.p')
while os.path.exists(model_filename):
    model_filename = os.path.join(model_folder, f'rf_model_kfold_{i}.p')
    i += 1

with open(model_filename, 'wb') as f:
    pickle.dump({'model': model}, f)

Fold Accuracies: [0.9649495485926712, 0.9739638682252922, 0.975557917109458, 0.9628055260361318, 0.9750265674814028]
Mean Accuracy: 0.9704606854889912
Classification Report:
               precision    recall  f1-score   support

           A       0.98      0.99      0.99       400
           B       0.99      1.00      1.00       400
           C       1.00      0.99      0.99       394
           D       0.99      0.99      0.99       386
           E       0.97      0.96      0.97       398
           F       0.97      0.98      0.98       392
           G       0.96      0.98      0.97       365
           H       0.98      0.97      0.98       400
           I       0.97      0.99      0.98       395
           J       0.97      0.96      0.96       226
           K       0.96      0.96      0.96       337
           L       0.98      0.96      0.97       397
           M       0.96      0.96      0.96       318
           N       0.96      0.95      0.95       293
           O  