In [2]:
import pandas as pd
import numpy as np

In [4]:
pip install graphviz

Note: you may need to restart the kernel to use updated packages.


In [5]:
import time
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, mean_squared_error, mean_absolute_error, classification_report, confusion_matrix, log_loss

In [6]:
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/blood-transfusion/transfusion.data"
data = pd.read_csv(url)
data.columns = ["Recency", "Frequency", "Monetary", "Time", "Target"]

In [7]:
print(data.isnull().sum())

Recency      0
Frequency    0
Monetary     0
Time         0
Target       0
dtype: int64


In [8]:
print("Duplicates:", data.duplicated().sum())

Duplicates: 215


In [9]:
data = data.drop_duplicates()

In [10]:
X = data[['Recency', 'Frequency', 'Monetary', 'Time']].values
y = data['Target'].values

In [11]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [13]:
nb_model = GaussianNB()
nb_model.fit(X_train, y_train)

In [14]:
nb_train_probs = nb_model.predict_proba(X_train)[:, 1] 
nb_test_probs = nb_model.predict_proba(X_test)[:, 1]

In [15]:
X_train_augmented = pd.DataFrame(X_train)
X_train_augmented = np.hstack((X_train, nb_train_probs.reshape(-1, 1)))

X_test_augmented = pd.DataFrame(X_test)
X_test_augmented = np.hstack((X_test, nb_test_probs.reshape(-1, 1)))

In [16]:
start_train = time.time()
lr_model = LogisticRegression()
lr_model.fit(X_train_augmented, y_train)
end_train = time.time()

In [17]:
start_test = time.time()
y_pred = lr_model.predict(X_test_augmented)
y_pred_proba = lr_model.predict_proba(X_test_augmented)[:, 1]
end_test = time.time()
accuracy = accuracy_score(y_test, y_pred)

In [18]:
lr_accuracy = accuracy_score(y_test, y_pred) 
lr_log_loss = log_loss(y_test, y_pred_proba)  
lr_log_loss_pred = (y_pred_proba >= 0.5).astype(int)
log_loss_accuracy = accuracy_score(y_test, lr_log_loss_pred)

In [19]:
lr_mse = mean_squared_error(y_test, y_pred_proba) 
lr_mse_pred = (y_pred_proba >= 0.5).astype(int)
mse_accuracy = accuracy_score(y_test, lr_mse_pred)

In [20]:
lr_mae = mean_absolute_error(y_test, y_pred_proba) 
lr_mae_pred = (y_pred_proba >= 0.5).astype(int)
mae_accuracy = accuracy_score(y_test, lr_mae_pred)

In [21]:
print("\nLog Loss Metrics:")
print(f"  Log Loss: {lr_log_loss:.4f}")
print(f"  Log Loss Accuracy: {log_loss_accuracy:.4f}")
print(f"Classification Report:\n{classification_report(y_test, lr_log_loss_pred)}")


Log Loss Metrics:
  Log Loss: 0.4711
  Log Loss Accuracy: 0.7570
Classification Report:
              precision    recall  f1-score   support

           0       0.79      0.93      0.85        80
           1       0.54      0.26      0.35        27

    accuracy                           0.76       107
   macro avg       0.66      0.59      0.60       107
weighted avg       0.72      0.76      0.72       107



In [22]:
print("\nMean Squared Error (MSE) Metrics:")
print(f"  MSE: {lr_mse:.4f}")
print(f"  MSE Accuracy: {mse_accuracy:.4f}")
print(f"Classification Report:\n{classification_report(y_test, lr_mse_pred)}")


Mean Squared Error (MSE) Metrics:
  MSE: 0.1560
  MSE Accuracy: 0.7570
Classification Report:
              precision    recall  f1-score   support

           0       0.79      0.93      0.85        80
           1       0.54      0.26      0.35        27

    accuracy                           0.76       107
   macro avg       0.66      0.59      0.60       107
weighted avg       0.72      0.76      0.72       107



In [23]:
print("\nMean Absolute Error (MAE) Metrics:")
print(f"  MAE: {lr_mae:.4f}")
print(f"  MAE Accuracy: {mae_accuracy:.4f}")
print(f"Classification Report:\n{classification_report(y_test, lr_mae_pred)}")


Mean Absolute Error (MAE) Metrics:
  MAE: 0.3358
  MAE Accuracy: 0.7570
Classification Report:
              precision    recall  f1-score   support

           0       0.79      0.93      0.85        80
           1       0.54      0.26      0.35        27

    accuracy                           0.76       107
   macro avg       0.66      0.59      0.60       107
weighted avg       0.72      0.76      0.72       107



In [24]:
print(f"Hybrid Model Accuracy: {accuracy * 100:.2f}%")

Hybrid Model Accuracy: 75.70%


In [25]:
print(f"Training Time: {end_train - start_train:.4f} seconds")
print(f"Testing Time: {end_test - start_test:.4f} seconds")

Training Time: 0.0240 seconds
Testing Time: 0.0020 seconds


In [26]:
print("\nClassification Report:")
print(classification_report(y_test, y_pred))


Classification Report:
              precision    recall  f1-score   support

           0       0.79      0.93      0.85        80
           1       0.54      0.26      0.35        27

    accuracy                           0.76       107
   macro avg       0.66      0.59      0.60       107
weighted avg       0.72      0.76      0.72       107



In [27]:
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))


Confusion Matrix:
[[74  6]
 [20  7]]
