In [35]:
# ============================================
# YANGI DATASET UCHUN PREDICT QILISH
# ============================================
# Bu notebook saqlangan modelni yuklab, yangi dataset uchun predict qilish uchun
# (default ustuni bo'lmagan dataset uchun)

import pandas as pd
import numpy as np
import pickle
from pathlib import Path
from sklearn.metrics import roc_auc_score, accuracy_score, f1_score, confusion_matrix, classification_report


In [36]:
# ============================================
# 1. SAQLANGAN MODEL VA ENCODERLARNI YUKLASH
# ============================================

model_folder = Path('../result')

# Model fayl yo'llari
model_file = model_folder / 'trained_model.pkl'
encoders_file = model_folder / 'label_encoders.pkl'
categorical_file = model_folder / 'categorical_columns.pkl'
feature_columns_file = model_folder / 'feature_columns.pkl'

# Tekshirish: fayllar mavjudmi?
if not model_file.exists():
    raise FileNotFoundError(f"Model fayli topilmadi: {model_file}. Avval train.ipynb ni ishga tushiring!")

if not encoders_file.exists():
    raise FileNotFoundError(f"Encoders fayli topilmadi: {encoders_file}. Avval train.ipynb ni ishga tushiring!")

if not categorical_file.exists():
    raise FileNotFoundError(f"Categorical columns fayli topilmadi: {categorical_file}. Avval train.ipynb ni ishga tushiring!")

if not feature_columns_file.exists():
    raise FileNotFoundError(f"Feature columns fayli topilmadi: {feature_columns_file}. Avval train.ipynb ni ishga tushiring!")

# Fayllarni yuklash
with open(model_file, 'rb') as f:
    model = pickle.load(f)

with open(encoders_file, 'rb') as f:
    label_encoders = pickle.load(f)

with open(categorical_file, 'rb') as f:
    categorical_columns = pickle.load(f)

with open(feature_columns_file, 'rb') as f:
    feature_columns = pickle.load(f)


In [37]:
# ============================================
# 2. YANGI DATASETNI YUKLASH
# ============================================

# Yangi dataset fayl yo'li (o'zgartiring)
new_dataset_path = 'dataset_for_models.csv'  # ← Bu yerga yangi dataset yo'lini kiriting

# Datasetni yuklash
df_new = pd.read_csv(new_dataset_path)


In [38]:
# ============================================
# 3. FEATURELARNI TAYYORLASH
# ============================================

def prepare_features(df, target_column='default'):
    """
    Datasetdan kerakli va kerak bo'lmagan ustunlarni ajratib oladi.
    Target ustuni bo'lmasa ham ishlaydi.
    """
    # ID ustunlar (model uchun kerak emas)
    id_columns = ['customer_ref', 'application_id']
    
    # Kerak bo'lmagan ustunlar (identifikatorlar yoki foydasiz)
    unnecessary_columns = ['loan_officer_id', 'previous_zip_code', 'referral_code']
    
    # Barcha olib tashlash kerak bo'lgan ustunlar
    columns_to_drop = id_columns + unnecessary_columns
    
    # Target ustunni qo'shish (agar mavjud bo'lsa)
    if target_column and target_column in df.columns:
        columns_to_drop.append(target_column)
    
    # Faqat mavjud ustunlarni olib tashlash
    columns_to_drop = [col for col in columns_to_drop if col in df.columns]
    
    # Feature ustunlar (X)
    X = df.drop(columns=columns_to_drop, errors='ignore')
    
    return X

# Featurelarni tayyorlash
X_new = prepare_features(df_new, target_column='default')


In [39]:
# ============================================
# 4. CATEGORICAL USTUNLARNI ENCODE QILISH
# ============================================

X_new_encoded = X_new.copy()

for col in categorical_columns:
    if col in X_new_encoded.columns:
        try:
            encoded_values = label_encoders[col].transform(X_new[col].astype(str))
            X_new_encoded[col] = pd.Series(encoded_values, dtype='int64', index=X_new_encoded.index)
        except ValueError:
            max_encoded = len(label_encoders[col].classes_) - 1
            encoded_values = X_new[col].astype(str).apply(
                lambda x: label_encoders[col].transform([x])[0] 
                if x in label_encoders[col].classes_ 
                else max_encoded + 1
            )
            X_new_encoded[col] = pd.Series(encoded_values, dtype='int64', index=X_new_encoded.index)

# Qolgan object type ustunlarni olib tashlash
remaining_object_cols = X_new_encoded.select_dtypes(include=['object']).columns.tolist()
if remaining_object_cols:
    X_new_encoded = X_new_encoded.drop(columns=remaining_object_cols)


In [40]:
# ============================================
# 5. FEATURELARNI MODEL UCHUN TAYYORLASH
# ============================================

# Training da bo'lgan barcha feature ustunlarni ta'minlash
X_final = pd.DataFrame(index=X_new_encoded.index)

for col in feature_columns:
    if col in X_new_encoded.columns:
        X_final[col] = X_new_encoded[col].astype('float64')
    else:
        X_final[col] = 0.0

# Ustunlar tartibini training dagi kabi qilish
X_final = X_final[feature_columns]

# ============================================
# 6. PREDICT QILISH
# ============================================

new_proba = model.predict_proba(X_final)[:, 1]
new_pred = (new_proba >= 0.5).astype(int)  # Prob >= 0.5 → default = 1

# ============================================
# 7. MODEL BAHOLASH (agar default ustuni bo'lsa)
# ============================================

if 'default' in df_new.columns:
    y_true = df_new['default'].values
    auc_score = roc_auc_score(y_true, new_proba)
    accuracy = accuracy_score(y_true, new_pred)
    f1 = f1_score(y_true, new_pred)
    
    print("=" * 60)
    print("MODEL NATIJALARI")
    print("=" * 60)
    print(f"ROC-AUC Score: {auc_score:.4f}")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print("\nConfusion Matrix:")
    print(confusion_matrix(y_true, new_pred))
    print("\nClassification Report:")
    print(classification_report(y_true, new_pred))
    print("=" * 60)




In [41]:
# ============================================
# 8. NATIJALARNI SAQLASH
# ============================================

results_df = pd.DataFrame({
    'customer_id': df_new['customer_ref'].values.astype(int),
    'prob': new_proba.round(5),
    'default': new_pred
})

output_file = model_folder / 'results.csv'
results_df.to_csv(output_file, index=False)
