In [1]:
from sklearn.datasets import load_iris
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Question1 solution

In [2]:
class IrisDataProcessor:
    def __init__(self):
        self.data = load_iris()
        self.df = None
        self.X_train = None
        self.X_test = None
        self.y_train = None
        self.y_test = None
        self.scaler = StandardScaler()

    def prepare_data(self):
        self.df = pd.DataFrame(self.data.data, columns=self.data.feature_names)
        self.df['target'] = self.data.target

        print(self.df.shape)
        features = self.df.drop(columns=['target'])
        scaled_features = self.scaler.fit_transform(features)
        self.df[features.columns] = scaled_features

        X = self.df.drop(columns=['target'])
        y = self.df['target']

        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        print("Training set shape:", self.X_train.shape)
        print("Test set shape:", self.X_test.shape)

    def get_feature_stats(self):
        return self.df.describe()

In [3]:
processor = IrisDataProcessor()
processor.prepare_data()
feature_stats = processor.get_feature_stats()
print(feature_stats)

(150, 5)
Training set shape: (120, 4)
Test set shape: (30, 4)
       sepal length (cm)  sepal width (cm)  petal length (cm)  \
count       1.500000e+02      1.500000e+02       1.500000e+02   
mean       -1.468455e-15     -1.823726e-15      -1.610564e-15   
std         1.003350e+00      1.003350e+00       1.003350e+00   
min        -1.870024e+00     -2.433947e+00      -1.567576e+00   
25%        -9.006812e-01     -5.923730e-01      -1.226552e+00   
50%        -5.250608e-02     -1.319795e-01       3.364776e-01   
75%         6.745011e-01      5.586108e-01       7.627583e-01   
max         2.492019e+00      3.090775e+00       1.785832e+00   

       petal width (cm)      target  
count      1.500000e+02  150.000000  
mean      -9.473903e-16    1.000000  
std        1.003350e+00    0.819232  
min       -1.447076e+00    0.000000  
25%       -1.183812e+00    0.000000  
50%        1.325097e-01    1.000000  
75%        7.906707e-01    2.000000  
max        1.712096e+00    2.000000  


# Question 3 solution

In [5]:
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import numpy as np
import joblib

class IrisModelOptimizer:
    def __init__(self, experiment):
        self.experiment = experiment
        iris = load_iris()
        X, y = iris.data, iris.target
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size=0.2, random_state=42)
        self.model = LogisticRegression(max_iter=200)

    def quantize_model(self):
        self.model.fit(self.X_train, self.y_train)
        original_accuracy = accuracy_score(self.y_test, self.model.predict(self.X_test))
        self.model.coef_ = np.round(self.model.coef_, decimals=2)
        self.model.intercept_ = np.round(self.model.intercept_, decimals=2)

        quantized_accuracy = accuracy_score(self.y_test, self.model.predict(self.X_test))
        joblib.dump(self.model, "quantized_logistic_model.joblib")

        return original_accuracy, quantized_accuracy

    def run_tests(self):
        original_accuracy, quantized_accuracy = self.quantize_model()
        print("Original Accuracy:", original_accuracy)
        print("Quantized Accuracy:", quantized_accuracy)

        loaded_model = joblib.load("quantized_logistic_model.joblib")
        test_accuracy = accuracy_score(self.y_test, loaded_model.predict(self.X_test))
        print("All tests passed successfully")


experiment_name = "LogisticRegression_Iris_Quantization"
optimizer = IrisModelOptimizer(experiment_name)
optimizer.run_tests()

Original Accuracy: 1.0
Quantized Accuracy: 1.0
All tests passed successfully.
