In [4]:
# ============================================================
# 1. IMPORTS AND GLOBAL ARTIFACT LOADING
# ============================================================
import pandas as pd
import numpy as np
import gradio as gr
import pickle
import joblib 

# You need to import the Regressor class for joblib to load the model correctly
from xgboost import XGBRegressor 

# --- Global Constants ---
# This median is a fallback for new books with missing year data.
# It should match the median calculated during training.
GLOBAL_YEAR_MEDIAN = 2004.0 

# --- Feature List (Must match the training features exactly) ---
features = ["  num_pages", "language_code", "publisher", "authors", 
            "log_ratings_count", "log_reviews_count", "year"]
label_cols = ['language_code', 'publisher', 'authors']


# ============================================================
# 2. CUSTOM PREPROCESSING CLASS DEFINITIONS
# (These must be defined so the loaded objects work correctly)
# ============================================================

# --- Custom Label Encoder ---
class CustomLabelEncoder:
    def __init__(self):
        self.classes_ = []
        self._mapping = {}

    def fit(self, series):
        self.classes_ = series.unique().tolist()
        self._mapping = {val: i for i, val in enumerate(self.classes_)}
        return self

    def transform(self, series):
        return series.apply(lambda x: self._mapping.get(x, 0))

    def fit_transform(self, series):
        self.fit(series)
        return self.transform(series)

# --- Custom Standard Scaler ---
class CustomStandardScaler:
    def __init__(self):
        self.mean_ = None
        self.std_ = None

    def fit(self, X):
        self.mean_ = X.mean()
        self.std_ = X.std()
        return self

    def transform(self, X):
        epsilon = 1e-6
        return (X - self.mean_) / (self.std_ + epsilon)
    
    def fit_transform(self, X):
        self.fit(X)
        return self.transform(X)


# ============================================================
# 3. LOAD TRAINED MODEL AND ARTIFACTS
# (Ensure these file paths are correct)
# ============================================================
try:
    xgb_reg = joblib.load('xgb_regressor_model.joblib')
    with open('encoders.pkl', 'rb') as f:
        encoders = pickle.load(f)
    scaler = joblib.load('custom_scaler.joblib')
    print("Model and preprocessing artifacts loaded successfully.")
except FileNotFoundError:
    print("ERROR: Could not find model or artifact files. Please run the training notebook first.")
    # Exit or use dummy objects if necessary, but this error message is most helpful.
    # For execution in this environment, we'll proceed, but the user must handle this locally.


# ============================================================
# 4. PREDICTION FUNCTION
# ============================================================

def predict_next_book_rating(num_pages, publisher, language_code, authors, ratings_count, text_reviews_count, year):
    """
    Predicts the average rating for a new book using the loaded model and encoders.
    """
    
    data_dict = {
        "  num_pages": num_pages,
        "publisher": publisher,
        "language_code": language_code,
        "authors": authors,
        "ratings_count": ratings_count,
        "text_reviews_count": text_reviews_count,
        "year": year
    }
    
    new_df = pd.DataFrame([data_dict])

    # 1. CATEGORICAL ENCODING
    for col in label_cols:
        val = str(new_df[col].iloc[0]).strip()
        enc = encoders.get(col)
        
        # Safety check: if encoder is missing (shouldn't happen), skip
        if enc is None:
            continue
            
        # Map unseen labels to the first class (0)
        if val not in enc.classes_:
            val = enc.classes_[0]  
        new_df[col] = enc.transform(pd.Series([val]))[0]

    # 2. NUMERIC FEATURES & LOG TRANSFORM
    new_df['log_ratings_count'] = np.log1p(float(new_df['ratings_count'].iloc[0]))
    new_df['log_reviews_count'] = np.log1p(float(new_df['text_reviews_count'].iloc[0]))
    
    # 3. YEAR & num_pages (Force numeric)
    try:
        new_df['year'] = float(new_df['year'].iloc[0])
    except:
        new_df['year'] = GLOBAL_YEAR_MEDIAN # Use the fallback median constant
    new_df['  num_pages'] = float(new_df['  num_pages'].iloc[0])

    # 4. SELECT FEATURES
    new_X = new_df[features].astype(float)

    # 5. PREDICT (XGBoost uses unscaled X)
    try:
        pred_rating = xgb_reg.predict(new_X)[0]
    except NameError:
        # If model loading failed due to missing files, return an error message
        return "ERROR: Model not loaded correctly. Check the artifact file paths."
        
    # Clip the result to ensure it's a valid rating (0.0 to 5.0)
    final_rating = np.clip(pred_rating, 0.0, 5.0)

    return f"Predicted Average Rating: {final_rating:.2f} / 5.0"


# ============================================================
# 5. GRADIO UI IMPLEMENTATION AND LAUNCH
# ============================================================

input_components = [
    gr.Number(label="Number of Pages", value=400, precision=0),
    gr.Textbox(label="Publisher (e.g., Random House)", value="Scholastic"),
    gr.Textbox(label="Language Code (e.g., eng, fre)", value="eng"),
    gr.Textbox(label="Authors (e.g., J.K. Rowling)", value="J.K. Rowling"),
    gr.Number(label="Ratings Count (Total user ratings)", value=50000, precision=0),
    gr.Number(label="Text Reviews Count", value=1500, precision=0),
    gr.Number(label="Publication Year (e.g., 2005)", value=2010, precision=0)
]

output_component = gr.Textbox(label="Prediction Result")

iface = gr.Interface(
    fn=predict_next_book_rating,
    inputs=input_components,
    outputs=output_component,
    title="ðŸ“š Book Average Rating Predictor (UI Notebook)",
    description="Enter the details of a new book to predict its average rating (0.0 - 5.0).",
    examples=[
        [400, "Scholastic", "eng", "J.K. Rowling", 50000, 1500, 2010],
        [800, "Bantam Books", "eng", "George R.R. Martin", 200000, 8000, 2011],
        [150, "Unknown", "fre", "Unknown Author", 1000, 50, 1995]
    ]
)

print("Launching Gradio Interface in the Jupyter Notebook...")
# Use 'inline=True' for better display within the notebook environment
iface.launch(inline=True)

ERROR: Could not find model or artifact files. Please run the training notebook first.
Launching Gradio Interface in the Jupyter Notebook...
* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.


