# Interactive LLM Chatbot Arena Domain Classifier

## Simply run the two cells below to start interacting with our classifier! 🚀

In [1]:
import pandas as pd
import joblib
import os
from dash import Dash, dcc, html, Input, Output
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.metrics import classification_report

df = pd.read_csv("sampled_dataset.csv")
valid_domains = ['opinion', 'creative', 'factual', 'code', 'math']
df = df[df['Domain'].isin(valid_domains)].copy()
df = df[df['Open/Closed'].isin(['open', 'closed'])].copy()

# Prepare combined text and labels
df["combined_text"] = df["Open/Closed"] + " [SEP] " + df["prompt"].astype(str)
le = LabelEncoder()
df["domain_label"] = le.fit_transform(df["Domain"])

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    df["combined_text"], df["domain_label"], test_size=0.2, random_state=42, stratify=df["domain_label"]
)

# TF-IDF vectorization
vectorizer = TfidfVectorizer(max_features=3000, stop_words='english', ngram_range=(1, 2))
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# Train SVM with RBF kernel using GridSearch
param_grid = {'C': [1, 10], 'gamma': ['scale', 0.01]}
svm = SVC(kernel='rbf', random_state=42)
grid = GridSearchCV(svm, param_grid, cv=3, scoring='accuracy', verbose=1, n_jobs=-1)
grid.fit(X_train_vec, y_train)
best_svm_oc = grid.best_estimator_

y_pred = best_svm_oc.predict(X_test_vec)
#print(classification_report(y_test, y_pred, target_names=le.classes_, zero_division=0))


# Save files 
joblib.dump(best_svm_oc, "best_svm_oc.joblib")
joblib.dump(vectorizer, "tfidf_vectorizer.joblib")
joblib.dump(le, "label_encoder.joblib")
#print("Files saved: best_svm_oc.joblib, tfidf_vectorizer.joblib, label_encoder.joblib")


Fitting 3 folds for each of 4 candidates, totalling 12 fits


['label_encoder.joblib']

In [19]:
import joblib
import os
from dash import Dash, dcc, html, Input, Output, State

# Load trained model and vectorizer/encoder
model = joblib.load("best_svm_model.joblib")
vectorizer = joblib.load("tfidf_vectorizer.joblib")
label_encoder = joblib.load("label_encoder.joblib")

# Initialize Dash app
app = Dash(__name__)

app.layout = html.Div([
    html.H1("🤖 LLM Arena: Chatbot Domain Classifier", 
            style={"textAlign": "center", "fontFamily": "Arial", "marginTop": "30px"}),

    html.Div([
        dcc.Textarea(
            id='user_input',
            placeholder='💬 Type your prompt here...',
            style={
                'width': '100%',
                'height': '150px',
                'borderRadius': '10px',
                'padding': '10px',
                'fontSize': '16px',
                'fontFamily': 'Courier New',
                'boxShadow': '0 0 5px rgba(0,0,0,0.1)'
            }
        ),
        html.Button('🚀 Classify Domain', id='predict_button', n_clicks=0, 
                    style={
                        'marginTop': '10px',
                        'padding': '10px 20px',
                        'fontSize': '16px',
                        'borderRadius': '5px',
                        'backgroundColor': '#007BFF',
                        'color': 'white',
                        'border': 'none',
                        'cursor': 'pointer'
                    }),
        html.Div(id='prediction_output', 
                 style={
                     'marginTop': '20px', 
                     'fontWeight': 'bold',
                     'fontSize': '20px',
                     'color': '#2E8B57'
                 })
    ], style={
        'maxWidth': '700px', 
        'margin': 'auto', 
        'padding': '20px',
        'backgroundColor': '#F9F9F9',
        'borderRadius': '10px',
        'boxShadow': '0 0 10px rgba(0,0,0,0.05)'
    }),

    # ← This comma is the critical fix

    html.Div(id='correction_section', children=[
        html.Label("Was this prediction correct? If not, choose the correct domain:"),
        dcc.Dropdown(
            id='correction_dropdown',
            options=[{'label': d, 'value': d} for d in label_encoder.classes_],
            placeholder='Select correct domain...',
            style={'marginTop': '10px'}
        ),
        html.Button("✅ Submit Correction", id='submit_correction', n_clicks=0,
                    style={
                        'marginTop': '10px',
                        'padding': '8px 16px',
                        'backgroundColor': '#28a745',
                        'color': 'white',
                        'border': 'none',
                        'borderRadius': '5px',
                        'cursor': 'pointer'
                    }),
        html.Div(id='correction_feedback', style={'marginTop': '10px', 'color': '#444'})
    ])
])

@app.callback(
    Output('prediction_output', 'children'),
    Input('predict_button', 'n_clicks'),
    Input('user_input', 'value')
)
def classify_domain(n_clicks, text):
    if n_clicks > 0 and text:
        X = vectorizer.transform([text])
        pred = model.predict(X)
        domain = label_encoder.inverse_transform(pred)[0]
        return f"Predicted Domain: {domain}"
    return ""

@app.callback(
    Output('correction_feedback', 'children'),
    Input('submit_correction', 'n_clicks'),
    State('user_input', 'value'),
    State('correction_dropdown', 'value')
)
def save_correction(n_clicks, user_text, corrected_label):
    if n_clicks > 0 and user_text and corrected_label:
        with open("corrections.csv", "a") as f:
            f.write(f'"{user_text.strip()}",{corrected_label}\n')
        return "✅ Correction saved! Thanks for helping improve the model."
    return ""

if __name__ == '__main__':
    port = int(os.environ.get("PORT", 10000))  # Render sets PORT dynamically
    app.run_server(debug=False, host="0.0.0.0", port=port)


In [21]:
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import classification_report

# Load base dataset
df = pd.read_csv("sampled_dataset.csv")
df = df[df['Domain'].isin(['opinion', 'creative', 'factual', 'code', 'math'])].copy()
df = df[df['Open/Closed'].isin(['open', 'closed'])].copy()
df["combined_text"] = df["Open/Closed"] + " [SEP] " + df["prompt"].astype(str)
df["domain_label"] = df["Domain"]  # Will re-encode below

# Load corrections if file exists
try:
    corrections = pd.read_csv("corrections.csv", header=None, names=["prompt", "correct_domain"])
    corrections["combined_text"] = "open [SEP] " + corrections["prompt"].astype(str)  # default to 'open' or update if known
    corrections["domain_label"] = corrections["correct_domain"]
    df = pd.concat([df[["combined_text", "domain_label"]], corrections[["combined_text", "domain_label"]]], ignore_index=True)
    print(f"Loaded {len(corrections)} corrections from corrections.csv")
except FileNotFoundError:
    print("⚠️ No corrections.csv found. Proceeding with original dataset.")

# Encode domain labels
le = LabelEncoder()
df["domain_encoded"] = le.fit_transform(df["domain_label"])

# TF-IDF vectorization
vectorizer = TfidfVectorizer(max_features=3000, stop_words='english', ngram_range=(1, 2))
X = vectorizer.fit_transform(df["combined_text"])
y = df["domain_encoded"]

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# SVM + GridSearchCV
param_grid = {'C': [1, 10], 'gamma': ['scale', 0.01]}
svm = SVC(kernel='rbf', random_state=42)
grid = GridSearchCV(svm, param_grid, cv=3, scoring='accuracy', verbose=1, n_jobs=-1)
grid.fit(X_train, y_train)
best_model = grid.best_estimator_

# Evaluate
y_pred = best_model.predict(X_test)
print(classification_report(y_test, y_pred, target_names=le.classes_))


joblib.dump(best_model, "best_svm_model.joblib")
joblib.dump(vectorizer, "tfidf_vectorizer.joblib")
joblib.dump(le, "label_encoder.joblib")

print("Updated model, vectorizer, and label encoder saved")


Loaded 11 corrections from corrections.csv
Fitting 3 folds for each of 4 candidates, totalling 12 fits
              precision    recall  f1-score   support

        code       0.83      0.33      0.48        15
    creative       0.91      0.67      0.77        30
     factual       0.49      0.77      0.60        35
        math       0.40      0.20      0.27        10
     opinion       0.59      0.64      0.62        25

    accuracy                           0.61       115
   macro avg       0.65      0.52      0.55       115
weighted avg       0.66      0.61      0.60       115

Updated model, vectorizer, and label encoder saved
