### A/B Testing For Multinomial Regression

-- Instalation Package

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import (accuracy_score, precision_score, recall_score, f1_score, 
                           classification_report, confusion_matrix, roc_auc_score)
import mlflow
import warnings
from scipy import stats
import time
from datetime import datetime
import json
import os
warnings.filterwarnings('ignore')

-- Prepare and Load Data

In [12]:
class ABTestingFramework:
    """
    Framework A/B Testing untuk model Multinomial Regression
    """
    def __init__(self, data_path="data/winequality-red.csv", random_state=42):
        self.data_path = data_path
        self.random_state = random_state
        self.results = {}
        self.experiments = {}
        
        # Load dan prepare data
        self.load_data()
        self.prepare_data()
        
    def load_data(self):
        """Load dataset wine quality"""
        print("Loading wine quality dataset...")
        self.df = pd.read_csv(self.data_path, sep=";")
        
        # Create quality groups
        def simplify_quality(q):
            if q <= 4:  # low
                return 0
            if q <= 6:  # medium
                return 1 
            else:  # high
                return 2
        
        self.df["qualityg"] = self.df["quality"].apply(simplify_quality)
        
        # Prepare features dan target
        self.X = self.df.drop(columns=["quality", "qualityg"])
        self.y = self.df["qualityg"]
        
        print(f"Dataset loaded: {self.df.shape}")
        print(f"Target distribution: {np.bincount(self.y)}")
        
    def prepare_data(self):
        """Prepare data untuk A/B testing"""
        # Split data dengan stratification
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
            self.X, self.y, test_size=0.2, random_state=self.random_state, stratify=self.y
        )
        
        print(f"Training set: {self.X_train.shape}")
        print(f"Test set: {self.X_test.shape}")
        
    def create_model_variant(self, variant_name, **params):
        """
        Membuat variant model dengan parameter yang berbeda
        
        Parameters:
        - variant_name: Nama variant (A, B, C, dll)
        - **params: Parameter untuk model (C, solver, max_iter, dll)
        """
        default_params = {
            'C': 1.0,
            'solver': 'lbfgs',
            'max_iter': 1000,
            'random_state': self.random_state
        }
          # Update dengan parameter yang diberikan
        model_params = {**default_params, **params}
        
        # Buat pipeline
        model = Pipeline([
            ('scaler', StandardScaler()),
            ('multinomial_lr', LogisticRegression(**model_params))
        ])
        
        self.experiments[variant_name] = {
            'model': model,
            'params': model_params,
            'created_at': datetime.now().isoformat()
        }
        
        print(f"Model variant '{variant_name}' created with params: {model_params}")
        return model

IndentationError: unindent does not match any outer indentation level (<string>, line 39)