In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

# 1. Create a mock dataset
np.random.seed(42)
data = {
    'age': np.random.randint(18, 70, 100),
    'salary': np.random.randint(20000, 120000, 100),
    'missing_values': np.random.randint(0, 2, 100),  # 0: no missing, 1: missing
    'outlier_score': np.random.rand(100),            # 0-1, higher means more outlier
    'quality_issue': np.random.randint(0, 2, 100)    # 0: good, 1: issue
}
df = pd.DataFrame(data)

# 2. Train a machine learning model
X = df[['age', 'salary', 'missing_values', 'outlier_score']]
y = df['quality_issue']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# 3. Evaluate the model performance
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

# Error handling: Check for empty data or training issues
try:
    assert not df.empty, "Dataset is empty."
    assert len(set(y)) > 1, "Not enough classes to train."
except AssertionError as e:
    print(f"Data Error: {e}")

# Unit test for prediction shape
import unittest

class TestMLModel(unittest.TestCase):
    def test_prediction_shape(self):
        self.assertEqual(len(y_pred), len(y_test))

if __name__ == "__main__":
    unittest.main(argv=[''], exit=False)

.
----------------------------------------------------------------------
Ran 1 test in 0.001s

OK


Accuracy: 0.45
Classification Report:
               precision    recall  f1-score   support

           0       0.33      0.22      0.27         9
           1       0.50      0.64      0.56        11

    accuracy                           0.45        20
   macro avg       0.42      0.43      0.41        20
weighted avg       0.42      0.45      0.43        20

