In [1]:
# Import library yang dibutuhkan
import pandas as pd
import os
import joblib # Untuk memuat model/objek Python
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np # Untuk np.delete
# import ast # Jika diperlukan untuk konversi list dari string

# --- Konfigurasi Path File ---
NAMA_FILE_PROCESSED_CSV = 'processed_data_v2.csv' # Menggunakan file v2
PATH_PROCESSED_DATA_DIR = os.path.join('..', 'data', 'processed')
PATH_PROCESSED_FILE = os.path.join(PATH_PROCESSED_DATA_DIR, NAMA_FILE_PROCESSED_CSV)

PATH_MODELS_DIR = os.path.join('..', 'models')
NAMA_FILE_TFIDF_VECTORIZER = 'tfidf_vectorizer.joblib'
PATH_TFIDF_VECTORIZER = os.path.join(PATH_MODELS_DIR, NAMA_FILE_TFIDF_VECTORIZER)

# Nama kolom yang relevan
COLUMN_FINDING_DESC = 'finding_description'
COLUMN_ACTION_TAKEN = 'rectification_steps'
COLUMN_MAN_HOURS = 'man_hours_per_step'
COLUMN_WORK_CENTRE = 'work_centres_per_step'
COLUMN_MATERIALS = 'materials_info'
COLUMN_PLANTS_PER_STEP = 'plants_per_step' # Kolom baru
COLUMN_ORDER_INFO = 'order_info'         # Kolom baru

# --- 1. Memuat Data yang Sudah Diproses dan TF-IDF Vectorizer ---
print(f"Mencoba memuat data yang sudah diproses dari: {PATH_PROCESSED_FILE}")
try:
    processed_df = pd.read_csv(PATH_PROCESSED_FILE)
    # Pastikan kolom list dibaca sebagai list, bukan string
    list_columns_to_eval = [COLUMN_ACTION_TAKEN, COLUMN_MAN_HOURS, COLUMN_WORK_CENTRE, COLUMN_PLANTS_PER_STEP]
    # import ast # Uncomment jika menggunakan ast.literal_eval
    # for col in list_columns_to_eval:
    #    if col in processed_df.columns and not processed_df.empty and isinstance(processed_df[col].iloc[0], str):
    #        try:
    #            processed_df[col] = processed_df[col].apply(ast.literal_eval)
    #        except (ValueError, SyntaxError) as e:
    #            print(f"Peringatan: Gagal konversi kolom '{col}' dari string ke list. Error: {e}")
    print("Data yang sudah diproses berhasil dimuat!")
    print(f"Kolom tersedia: {processed_df.columns.tolist()}")
except FileNotFoundError:
    print(f"ERROR: File tidak ditemukan di {PATH_PROCESSED_FILE}")
    processed_df = pd.DataFrame()
except Exception as e:
    print(f"Terjadi error saat memuat CSV yang sudah diproses: {e}")
    processed_df = pd.DataFrame()

print(f"\nMencoba memuat TF-IDF Vectorizer dari: {PATH_TFIDF_VECTORIZER}")
try:
    tfidf_vectorizer = joblib.load(PATH_TFIDF_VECTORIZER)
    print("TF-IDF Vectorizer berhasil dimuat!")
except FileNotFoundError:
    print(f"ERROR: File tidak ditemukan di {PATH_TFIDF_VECTORIZER}")
    tfidf_vectorizer = None
except Exception as e:
    print(f"Terjadi error saat memuat TF-IDF Vectorizer: {e}")
    tfidf_vectorizer = None

# --- 2. Menyiapkan Matriks TF-IDF untuk Data Historis ---
# df_for_tfidf akan menyimpan DataFrame yang bersih dan digunakan untuk TF-IDF dan pengambilan rekomendasi
df_for_tfidf = pd.DataFrame() 
historical_tfidf_features = None

if not processed_df.empty and tfidf_vectorizer:
    if COLUMN_FINDING_DESC in processed_df.columns:
        df_for_tfidf = processed_df.copy()
        df_for_tfidf.dropna(subset=[COLUMN_FINDING_DESC], inplace=True)
        df_for_tfidf[COLUMN_FINDING_DESC] = df_for_tfidf[COLUMN_FINDING_DESC].astype(str)
        df_for_tfidf.reset_index(drop=True, inplace=True) # Penting untuk sinkronisasi indeks

        if not df_for_tfidf.empty:
            historical_tfidf_features = tfidf_vectorizer.transform(df_for_tfidf[COLUMN_FINDING_DESC])
            print(f"Matriks TF-IDF untuk data historis berhasil dibuat. Shape: {historical_tfidf_features.shape}")
        else:
            print("Tidak ada data valid untuk TF-IDF setelah filtering.")
            historical_tfidf_features = None
    else:
        print(f"ERROR: Kolom '{COLUMN_FINDING_DESC}' tidak ditemukan di df_for_tfidf.")
else:
    print("Data atau TF-IDF Vectorizer tidak siap untuk evaluasi.")


# --- 3. Fungsi untuk Mendapatkan Rekomendasi (dengan opsi exclude index) (DIPERBARUI) ---
def get_recommendations_for_evaluation(finding_text_vector, original_df_index_to_exclude, top_n=1):
    if historical_tfidf_features is None: return "Matriks TF-IDF historis tidak tersedia."
    if df_for_tfidf.empty: return "Data historis (df_for_tfidf) tidak tersedia."

    # Buat salinan dari fitur historis dan data frame untuk dimodifikasi
    # Pastikan original_df_index_to_exclude adalah indeks yang valid untuk historical_tfidf_features dan df_for_tfidf
    
    # Dapatkan array indeks dari df_for_tfidf yang tidak sama dengan original_df_index_to_exclude
    indices_to_keep = [idx for idx in range(historical_tfidf_features.shape[0]) if idx != original_df_index_to_exclude]
    
    if not indices_to_keep:
        return [{"rank": 1, "similarity_score": 0, "message": "Tidak ada data historis lain untuk dibandingkan setelah eksklusi."}]

    temp_historical_features = historical_tfidf_features[indices_to_keep, :]
    temp_df_for_recommendation = df_for_tfidf.iloc[indices_to_keep].reset_index(drop=True)
        
    if temp_historical_features.shape[0] == 0:
        return [{"rank": 1, "similarity_score": 0, "message": "Tidak ada data historis lain untuk dibandingkan."}]

    similarities = cosine_similarity(finding_text_vector, temp_historical_features)
    most_similar_indices_in_temp = similarities[0].argsort()[-top_n:][::-1]

    recommendations_output = []
    for i, index_in_temp in enumerate(most_similar_indices_in_temp):
        if index_in_temp < len(temp_df_for_recommendation):
            similarity_score = similarities[0][index_in_temp]
            if similarity_score > 0: 
                recommendation_detail = temp_df_for_recommendation.iloc[index_in_temp]
                
                order_info_val = recommendation_detail.get(COLUMN_ORDER_INFO, 'N/A')
                plants_per_step_val = recommendation_detail.get(COLUMN_PLANTS_PER_STEP, [])

                recommendations_output.append({
                    "rank": i + 1,
                    "similarity_score": similarity_score,
                    "historical_finding": recommendation_detail[COLUMN_FINDING_DESC],
                    "order_info": order_info_val, # MENAMBAHKAN ORDER INFO
                    "rectification_steps": recommendation_detail[COLUMN_ACTION_TAKEN],
                    "man_hours_per_step": recommendation_detail[COLUMN_MAN_HOURS],
                    "work_centres_per_step": recommendation_detail[COLUMN_WORK_CENTRE],
                    "plants_per_step": plants_per_step_val, # MENAMBAHKAN PLANTS PER STEP
                    "materials_info": recommendation_detail[COLUMN_MATERIALS]
                })
            else:
                recommendations_output.append({
                    "rank": i + 1,
                    "similarity_score": similarity_score,
                    "message": "Tidak ada finding historis lain yang cukup mirip atau skor kemiripan nol."
                })
        else:
            print(f"Peringatan: Indeks {index_in_temp} di luar jangkauan untuk temp_df_for_recommendation (panjang: {len(temp_df_for_recommendation)}).")
            
    return recommendations_output

# --- 4. Melakukan Pemeriksaan Offline Sederhana (Leave-One-Out Style) (DIPERBARUI) ---
if not df_for_tfidf.empty and tfidf_vectorizer and historical_tfidf_features is not None:
    print("\n--- Memulai Pemeriksaan Offline Sederhana ---")
    
    num_samples_to_evaluate = min(5, len(df_for_tfidf)) 
    if num_samples_to_evaluate == 0:
        print("Tidak ada data sampel untuk dievaluasi di df_for_tfidf.")
    else:
        sample_indices_in_df_for_tfidf = df_for_tfidf.sample(n=num_samples_to_evaluate, random_state=42).index

        for original_idx_in_dffortfidf in sample_indices_in_df_for_tfidf:
            original_finding_series = df_for_tfidf.iloc[original_idx_in_dffortfidf]
            original_finding_text = original_finding_series[COLUMN_FINDING_DESC]
            original_finding_vector = historical_tfidf_features[original_idx_in_dffortfidf]

            print(f"\n\n================ EVALUASI UNTUK FINDING (Indeks di df_for_tfidf: {original_idx_in_dffortfidf}) =================")
            print(f"FINDING ASLI: \"{original_finding_text}\"")
            print(f"ORDER INFO ASLI: {original_finding_series.get(COLUMN_ORDER_INFO, 'N/A')}") # MENAMPILKAN ORDER INFO ASLI
            print(f"LANGKAH ASLI: {original_finding_series[COLUMN_ACTION_TAKEN]}")
            print(f"PLANT PER LANGKAH ASLI: {original_finding_series.get(COLUMN_PLANTS_PER_STEP, 'N/A')}") # MENAMPILKAN PLANT ASLI
            
            hasil_rekomendasi = get_recommendations_for_evaluation(original_finding_vector, original_idx_in_dffortfidf, top_n=1)

            print("\n--- REKOMENDASI TERATAS (dari finding lain): ---")
            if isinstance(hasil_rekomendasi, str):
                print(hasil_rekomendasi)
            else:
                for rec in hasil_rekomendasi:
                    print(f"Peringkat: {rec.get('rank')}")
                    print(f"Skor Kemiripan: {rec.get('similarity_score'):.4f}")
                    if "message" in rec:
                        print(rec['message'])
                    else:
                        print(f"Finding Historis Direkomendasikan: \"{rec.get('historical_finding')}\"")
                        print(f"Info Order Direkomendasikan: {rec.get('order_info')}") # MENAMPILKAN ORDER INFO REKOMENDASI
                        print(f"Langkah Rektifikasi Direkomendasikan: {rec.get('rectification_steps')}")
                        print(f"Plant per Langkah Direkomendasikan: {rec.get('plants_per_step')}") # MENAMPILKAN PLANT REKOMENDASI
            print("===================================================================")
else:
    print("\nTidak bisa menjalankan evaluasi karena data, vectorizer, atau fitur historis tidak siap.")

Mencoba memuat data yang sudah diproses dari: ..\data\processed\processed_data_v2.csv
Data yang sudah diproses berhasil dimuat!
Kolom tersedia: ['finding_description', 'rectification_steps', 'man_hours_per_step', 'work_centres_per_step', 'materials_info', 'plants_per_step', 'order_info']

Mencoba memuat TF-IDF Vectorizer dari: ..\models\tfidf_vectorizer.joblib
TF-IDF Vectorizer berhasil dimuat!
Matriks TF-IDF untuk data historis berhasil dibuat. Shape: (5053, 4771)

--- Memulai Pemeriksaan Offline Sederhana ---


FINDING ASLI: "laminate wall aisle side lav l36 dent"
ORDER INFO ASLI: 805264308
PLANT PER LANGKAH ASLI: ['wscb', 'wscb']

--- REKOMENDASI TERATAS (dari finding lain): ---
Peringkat: 1
Skor Kemiripan: 0.8278
Finding Historis Direkomendasikan: "laminate wall aisle side lav l61 dent"
Info Order Direkomendasikan: 805264316
Langkah Rektifikasi Direkomendasikan: ['general:', 'relaminate wall lavatory']
Plant per Langkah Direkomendasikan: ['wscb', 'wscb']


FINDING ASLI: "slide raft