### Balanced Data

In [13]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, r2_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
import numpy as np

(200, 2)


### Imbalanced Data

In [33]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# Tạo dữ liệu cân bằng cho 3 classes
X = np.random.rand(300, 2)
y = np.array([0] * 100 + [1] * 100 + [2] * 100)  # 100 mẫu mỗi class

# Chia dữ liệu
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Huấn luyện mô hình
model = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=200)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print(y_test)
print(y_pred)

[0 0 2 2 2 0 1 0 2 0 0 2 2 1 1 2 0 2 0 0 1 0 0 1 1 0 0 1 1 1 0 1 2 0 2 1 2
 0 0 1 2 2 1 2 0 0 1 2 1 2 1 1 2 0 1 2 2 2 1 1]
[0 2 0 2 0 2 2 2 1 0 0 1 0 2 1 2 2 2 0 1 0 2 2 1 2 2 2 2 0 2 0 1 1 0 0 2 2
 2 0 2 2 2 1 2 0 2 2 2 2 2 1 2 2 2 2 2 2 1 2 2]


In [35]:
# Tính toán các metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

# In ra confusion matrix
cm = confusion_matrix(y_test, y_pred)

print("Confusion Matrix:\n", cm)
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")

# Tính toán TP, TN, FP, FN cho từng lớp
for i in range(3):
    TP = cm[i, i]
    FP = np.sum(cm[:, i]) - TP
    FN = np.sum(cm[i, :]) - TP
    TN = np.sum(cm) - (TP + FP + FN)
    
    print(f"Class {i}:")
    print(f"  True Positive (TP): {TP}")
    print(f"  False Positive (FP): {FP}")
    
    print(f"  True Negative (TN): {TN}")
    print(f"  False Negative (FN): {FN}")
    print()

Confusion Matrix:
 [[ 8  1 11]
 [ 2  5 13]
 [ 4  4 12]]
Accuracy: 0.42
Precision: 0.47
Recall: 0.42
F1 Score: 0.41
Class 0:
  True Positive (TP): 8
  False Positive (FP): 6
  True Negative (TN): 34
  False Negative (FN): 12

Class 1:
  True Positive (TP): 5
  False Positive (FP): 5
  True Negative (TN): 35
  False Negative (FN): 15

Class 2:
  True Positive (TP): 12
  False Positive (FP): 24
  True Negative (TN): 16
  False Negative (FN): 8



### Imbalanced Data

In [17]:
y_imbalanced = np.array([0] * 250 + [1] * 30 + [2] * 20)  # 250 mẫu class 0, 30 mẫu class 1, 20 mẫu class 2

X_train, X_test, y_train, y_test = train_test_split(X, y_imbalanced, test_size=0.2, random_state=42, stratify=y_imbalanced)

model.fit(X_train, y_train)
y_pred = model.predict(X_test)

In [18]:
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

cm = confusion_matrix(y_test, y_pred)

print("Confusion Matrix (Imbalanced Data):\n", cm)
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")

for i in range(3):
    TP = cm[i, i]
    FP = np.sum(cm[:, i]) - TP
    FN = np.sum(cm[i, :]) - TP
    TN = np.sum(cm) - (TP + FP + FN)
    
    print(f"Class {i}:")
    print(f"  True Positive (TP): {TP}")
    print(f"  True Negative (TN): {TN}")
    print(f"  False Positive (FP): {FP}")
    print(f"  False Negative (FN): {FN}")
    print()

Confusion Matrix (Imbalanced Data):
 [[50  0  0]
 [ 6  0  0]
 [ 4  0  0]]
Accuracy: 0.83
Precision: 0.69
Recall: 0.83
F1 Score: 0.76
Class 0:
  True Positive (TP): 50
  True Negative (TN): 0
  False Positive (FP): 10
  False Negative (FN): 0

Class 1:
  True Positive (TP): 0
  True Negative (TN): 54
  False Positive (FP): 0
  False Negative (FN): 6

Class 2:
  True Positive (TP): 0
  True Negative (TN): 56
  False Positive (FP): 0
  False Negative (FN): 4



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
