<a href="https://colab.research.google.com/github/Volks44/Multimodal-OOD/blob/main/Multimodal_OOD.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Libraries

In [None]:
!pip install datasets --q
import keras
import numpy as np
import tensorflow as tf
from datasets import load_dataset
from keras.datasets import cifar10
from tensorflow.keras.layers import Input, Embedding, Dense, GlobalAveragePooling1D
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from collections import Counter
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# Dataset

In [None]:
# Define CIFAR-10 class names
CIFAR10_CLASS_NAMES = ['airplane', 'automobile', 'bird', 'cat', 'deer',
                       'dog', 'frog', 'horse', 'ship', 'truck']

# Load the CIFAR-10 dataset
(train_images, train_labels), (test_images, test_labels) = cifar10.load_data()

In [None]:
# Normalize pixel values to be between 0 and 1
train_images, test_images = train_images / 255.0, test_images / 255.0

# Flatten the labels to be 1D arrays
train_labels = train_labels.flatten()
test_labels = test_labels.flatten()

In [None]:
print(f"Train images shape: {train_images.shape}")
print(f"Train labels shape: {train_labels.shape}")
print(f"Test images shape: {test_images.shape}")
print(f"Test labels shape: {test_labels.shape}")
print(f"Number of classes: {len(CIFAR10_CLASS_NAMES)}")

Train images shape: (50000, 32, 32, 3)
Train labels shape: (50000,)
Test images shape: (10000, 32, 32, 3)
Test labels shape: (10000,)
Number of classes: 10


# Subsets

In [None]:
# Define Class Subsets for hypothetical pre-trained classifiers
# S_all = set(CIFAR10_CLASS_NAMES) # This is the set of all classes

# Define known classes for each classifier
# These are sets of class *names*
classifier_knowledge = {
    "image_classifier_1": {"airplane", "automobile", "bird", "cat"},
    "image_classifier_2": {"cat", "deer", "dog", "frog", "horse"},
    "image_classifier_3": {"horse", "ship", "truck", "automobile"}, # Overlap example
    # For text classifiers, the "text" modality is the class name itself.
    # So, their knowledge is also about class names.
    "text_classifier_1": {"airplane", "ship", "truck", "dog", "frog"},
    "text_classifier_2": {"bird", "cat", "deer", "horse", "automobile"},
}

print("Classifier Knowledge Subsets:")
for clf_name, known_classes in classifier_knowledge.items():
    print(f"  {clf_name}: {known_classes}")

Classifier Knowledge Subsets:
  image_classifier_1: {'airplane', 'automobile', 'cat', 'bird'}
  image_classifier_2: {'cat', 'deer', 'frog', 'dog', 'horse'}
  image_classifier_3: {'ship', 'truck', 'horse', 'automobile'}
  text_classifier_1: {'ship', 'frog', 'airplane', 'truck', 'dog'}
  text_classifier_2: {'cat', 'deer', 'bird', 'automobile', 'horse'}


In [None]:
# Function to get "text" data (class labels as text)
def get_text_label(numeric_label):
    if 0 <= numeric_label < len(CIFAR10_CLASS_NAMES):
        return CIFAR10_CLASS_NAMES[numeric_label]
    return "unknown_label"

# Example: Get text label for the first training sample
print(f"Example: First training image is a '{get_text_label(train_labels[1])}' (Label: {train_labels[0]})")

Example: First training image is a 'truck' (Label: 6)


In [None]:
# Demonstrate how to identify OOD samples for each classifier
def check_ood_status(image_data, numeric_label, classifier_name, knowledge_base):
    """
    Checks if a given sample is OOD for a specific classifier.
    Args:
        image_data: The image data (not directly used for OOD check in this example, as OOD is based on label for now, but would be used by actual model). numeric_label: The ground truth numeric label of the sample.
        classifier_name: Name of the classifier (e.g., "image_classifier_1").
        knowledge_base: A dictionary mapping classifier names to their known class sets.
    Returns:
        A tuple (is_ood: bool, ground_truth_class_name: str)
    """
    ground_truth_class_name = get_text_label(numeric_label)
    known_classes = knowledge_base.get(classifier_name)

    if known_classes is None:
        raise ValueError(f"Knowledge base for '{classifier_name}' not found.")

    # For this simulation, OOD is determined if the true class name
    # is not in the classifier's known set.
    # An actual OOD detection mechanism for a real model would look at the
    # model's output (e.g., softmax scores, logits, uncertainty).
    is_ood = ground_truth_class_name not in known_classes
    return is_ood, ground_truth_class_name

# --- Main Demonstration Loop ---
print("OOD Status Check for a few test samples:")
num_samples_to_check = 5

for i in range(num_samples_to_check):
    sample_image = test_images[i]
    sample_numeric_label = test_labels[i]
    sample_text_label = get_text_label(sample_numeric_label)

    print(f"\nTest Sample #{i+1}: True Class = '{sample_text_label}' (Label: {sample_numeric_label})")

    # Check OOD status for each defined classifier
    for clf_name in classifier_knowledge.keys():
        # In your actual task, you would feed `sample_image` to image classifiers
        # and `sample_text_label` (or its embedding) to text classifiers.
        # The OOD determination here is based on the ground truth label for simplicity.
        is_ood, _ = check_ood_status(sample_image, sample_numeric_label, clf_name, classifier_knowledge)

        if is_ood:
            print(f"  - For '{clf_name}': This sample is OOD.")
        else:
            print(f"  - For '{clf_name}': This sample is In-Distribution.")

OOD Status Check for a few test samples:

Test Sample #1: True Class = 'cat' (Label: 3)
  - For 'image_classifier_1': This sample is In-Distribution.
  - For 'image_classifier_2': This sample is In-Distribution.
  - For 'image_classifier_3': This sample is OOD.
  - For 'text_classifier_1': This sample is OOD.
  - For 'text_classifier_2': This sample is In-Distribution.

Test Sample #2: True Class = 'ship' (Label: 8)
  - For 'image_classifier_1': This sample is OOD.
  - For 'image_classifier_2': This sample is OOD.
  - For 'image_classifier_3': This sample is In-Distribution.
  - For 'text_classifier_1': This sample is In-Distribution.
  - For 'text_classifier_2': This sample is OOD.

Test Sample #3: True Class = 'ship' (Label: 8)
  - For 'image_classifier_1': This sample is OOD.
  - For 'image_classifier_2': This sample is OOD.
  - For 'image_classifier_3': This sample is In-Distribution.
  - For 'text_classifier_1': This sample is In-Distribution.
  - For 'text_classifier_2': This sam

In [None]:
def filter_data_by_known_classes(images, labels, known_classes, class_names):
    """
    Filters a dataset to only include samples with labels in `known_classes`.

    Args:
        images (np.ndarray): Image data.
        labels (np.ndarray): Integer labels.
        known_classes (set): Set of class names known to the classifier.
        class_names (list): List of all class names in order of label index.

    Returns:
        Tuple: (filtered_images, filtered_labels, new_label_mapping)
    """
    # Find numeric labels corresponding to known class names
    known_class_indices = [i for i, name in enumerate(class_names) if name in known_classes]

    # Create a label mapping (e.g., 2 -> 0, 3 -> 1)
    label_map = {old: new for new, old in enumerate(sorted(known_class_indices))}

    # Mask and remap labels
    mask = np.isin(labels, known_class_indices)
    filtered_images = images[mask]
    filtered_labels = np.array([label_map[lbl] for lbl in labels[mask]])

    return filtered_images, filtered_labels, label_map

In [None]:
clf_name = "image_classifier_1"
known_classes = classifier_knowledge[clf_name]

filtered_train_images, filtered_train_labels, label_map = filter_data_by_known_classes(
    train_images, train_labels, known_classes, CIFAR10_CLASS_NAMES
)

print(f"Filtered {clf_name}: {filtered_train_images.shape[0]} training samples")

Filtered image_classifier_1: 20000 training samples


In [None]:
def build_cnn_model(num_classes):
    model = tf.keras.Sequential([
        tf.keras.layers.Conv2D(32, (3,3), activation='relu', input_shape=(32,32,3)),
        tf.keras.layers.MaxPooling2D(2,2),
        tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
        tf.keras.layers.MaxPooling2D(2,2),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    return model

In [None]:
num_classes = len(label_map)

model = build_cnn_model(num_classes)

# Train on filtered subset
model.fit(filtered_train_images, filtered_train_labels, epochs=5, batch_size=64, validation_split=0.1)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/5
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 83ms/step - accuracy: 0.5661 - loss: 1.0032 - val_accuracy: 0.7200 - val_loss: 0.7075
Epoch 2/5
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 49ms/step - accuracy: 0.7440 - loss: 0.6484 - val_accuracy: 0.7665 - val_loss: 0.5979
Epoch 3/5
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 47ms/step - accuracy: 0.7759 - loss: 0.5653 - val_accuracy: 0.7525 - val_loss: 0.6506
Epoch 4/5
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 47ms/step - accuracy: 0.8003 - loss: 0.5048 - val_accuracy: 0.8085 - val_loss: 0.5084
Epoch 5/5
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 50ms/step - accuracy: 0.8198 - loss: 0.4666 - val_accuracy: 0.8275 - val_loss: 0.4822


<keras.src.callbacks.history.History at 0x7cea16ff1bd0>

In [None]:
def predict_with_ood_check(model, image, known_classes, label_map_inv, threshold=0.7):
    """
    Predicts the class and checks for OOD based on softmax max score.

    Args:
        model: Trained Keras model.
        image: Input image (normalized).
        known_classes: Set of known class names.
        label_map_inv: Reverse label map {new: old}.
        threshold: Softmax max threshold for OOD.

    Returns:
        Tuple: (is_ood: bool, predicted_class: str)
    """
    logits = model(np.expand_dims(image, axis=0), training=False)
    probs = tf.nn.softmax(logits).numpy()[0]
    max_prob = np.max(probs)
    pred_index = np.argmax(probs)
    orig_label = label_map_inv[pred_index]
    class_name = CIFAR10_CLASS_NAMES[orig_label]
    return max_prob < threshold, class_name

In [None]:
def evaluate_model_with_ood(model, x_test, y_test, known_classes, label_map_inv, threshold=0.7):
    y_pred = []
    y_true = []
    ood_count = 0
    total = len(x_test)

    for i in range(total):
        image = x_test[i]
        label = y_test[i]
        class_name = CIFAR10_CLASS_NAMES[label]

        is_ood, pred_class = predict_with_ood_check(model, image, known_classes, label_map_inv, threshold)

        if is_ood:
            y_pred.append("OOD")
            ood_count += 1
        else:
            y_pred.append(pred_class)

        y_true.append(class_name if class_name in known_classes else "OOD")

    # Classification report on ID vs OOD
    print("Confusion Matrix:")
    print(confusion_matrix(y_true, y_pred, labels=list(known_classes) + ["OOD"]))

    print("\nClassification Report:")
    print(classification_report(y_true, y_pred, labels=list(known_classes) + ["OOD"]))

    print(f"\nTotal samples: {total}")
    print(f"OOD detections: {ood_count} / {total} ({100 * ood_count / total:.2f}%)")

    return #y_true, y_pred


In [None]:
##NEW

# Select a classifier
clf_name = 'image_classifier_1'
clf_info = trained_image_classifiers[clf_name]

# Run evaluation
evaluate_model_with_ood(
    model=clf_info["model"],
    x_test=filtered_train_images,
    y_test=filtered_train_labels,
    known_classes=clf_info["known_classes"],
    label_map_inv=clf_info["label_map_inv"],
    threshold=0.3
)

Confusion Matrix:
[[4196  260   36  393  115]
 [  90 4793   25   53   39]
 [ 154  159 3226 1209  252]
 [ 273   90  194 4356   87]
 [   0    0    0    0    0]]

Classification Report:
              precision    recall  f1-score   support

    airplane       0.89      0.84      0.86      5000
  automobile       0.90      0.96      0.93      5000
         cat       0.93      0.65      0.76      5000
        bird       0.72      0.87      0.79      5000
         OOD       0.00      0.00      0.00         0

    accuracy                           0.83     20000
   macro avg       0.69      0.66      0.67     20000
weighted avg       0.86      0.83      0.84     20000


Total samples: 20000
OOD detections: 493 / 20000 (2.46%)


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
# trained_models = {}

# for clf_name, known_classes in classifier_knowledge.items():
#     print(f"\nTraining {clf_name}")
#     f_images, f_labels, label_map = filter_data_by_known_classes(
#         train_images, train_labels, known_classes, CIFAR10_CLASS_NAMES
#     )

#     model = build_cnn_model(len(label_map))
#     model.fit(f_images, f_labels, epochs=5, batch_size=64, verbose=0)

#     trained_models[clf_name] = {
#         "model": model,
#         "known_classes": known_classes,
#         "label_map": label_map,
#         "label_map_inv": {v: k for k, v in label_map.items()}
#     }

In [None]:
def train_classifier_on_subset(classifier_name, known_classes, images, labels):
    # Filter images/labels
    f_images, f_labels, label_map = filter_data_by_known_classes(images, labels, known_classes, CIFAR10_CLASS_NAMES)

    model = build_cnn_model(num_classes=len(label_map))
    model.fit(f_images, f_labels, epochs=5, batch_size=64, validation_split=0.1, verbose=0)

    return {
        "model": model,
        "known_classes": known_classes,
        "label_map": label_map,
        "label_map_inv": {v: k for k, v in label_map.items()}
    }


In [None]:
trained_image_classifiers = {}

for clf_name in [name for name in classifier_knowledge if "image" in name]:
    info = train_classifier_on_subset(clf_name, classifier_knowledge[clf_name], train_images, train_labels)
    trained_image_classifiers[clf_name] = info
    print(f"Trained {clf_name}")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Trained image_classifier_1
Trained image_classifier_2
Trained image_classifier_3


In [None]:
def softmax_ood_predict(sample_image, clf_info, threshold=0.7):
    model = clf_info["model"]
    label_map_inv = clf_info["label_map_inv"]

    logits = model(np.expand_dims(sample_image, axis=0), training=False)
    probs = tf.nn.softmax(logits).numpy()[0]
    max_prob = np.max(probs)
    pred_index = np.argmax(probs)

    orig_label_index = label_map_inv[pred_index]
    pred_class_name = CIFAR10_CLASS_NAMES[orig_label_index]

    is_ood = max_prob < threshold
    return is_ood, pred_class_name, max_prob

In [None]:
sample_image = test_images[3]
true_label = test_labels[3]
true_class = CIFAR10_CLASS_NAMES[true_label]

print(f"Test sample true class: {true_class}")

for clf_name, clf_info in trained_image_classifiers.items():
    is_ood, pred_class, conf = softmax_ood_predict(sample_image, clf_info)
    status = "OOD" if is_ood else "In-Distribution"
    print(f"{clf_name}: {status} | Predicted: {pred_class} | Confidence: {conf:.2f}")


Test sample true class: airplane
image_classifier_1: OOD | Predicted: airplane | Confidence: 0.47
image_classifier_2: OOD | Predicted: deer | Confidence: 0.35
image_classifier_3: OOD | Predicted: ship | Confidence: 0.44


In [None]:
def aggregate_predictions(sample_image, classifiers, threshold=0.3):
    votes = []
    for clf_info in classifiers.values():
        is_ood, pred_class, _ = softmax_ood_predict(sample_image, clf_info, threshold)
        if not is_ood:
            votes.append(pred_class)

    if votes:
        majority = Counter(votes).most_common(1)[0][0]
    else:
        majority = "OOD_ALL"

    return majority, votes

In [None]:
agg_pred, all_votes = aggregate_predictions(test_images[0], trained_image_classifiers)
print(f"Aggregated prediction: {agg_pred}")
print(f"Individual votes: {all_votes}")


Aggregated prediction: cat
Individual votes: ['cat', 'ship']


In [None]:
def prepare_text_data(labels, known_classes, class_names):
    text_labels = [class_names[label] for label in labels]
    filtered_texts, filtered_indices = [], []

    for i, txt in enumerate(text_labels):
        if txt in known_classes:
            filtered_texts.append(txt)
            filtered_indices.append(i)

    return np.array(filtered_texts), np.array(filtered_indices)


In [None]:
def build_text_classifier(num_classes, vocab_size, embed_dim=50, max_len=1):
    inp = Input(shape=(max_len,))
    x = Embedding(input_dim=vocab_size, output_dim=embed_dim)(inp)
    x = GlobalAveragePooling1D()(x)
    x = Dense(64, activation='relu')(x)
    out = Dense(num_classes)(x)
    return Model(inp, out)

In [None]:
trained_text_classifiers = {}
tokenizer = Tokenizer()
tokenizer.fit_on_texts(CIFAR10_CLASS_NAMES)  # global vocab

max_len = 1  # only single-word class names

for clf_name in [name for name in classifier_knowledge if "text" in name]:
    known_classes = classifier_knowledge[clf_name]
    text_data, indices = prepare_text_data(train_labels, known_classes, CIFAR10_CLASS_NAMES)

    labels_subset = train_labels[indices]
    text_sequences = tokenizer.texts_to_sequences(text_data)
    text_padded = pad_sequences(text_sequences, maxlen=max_len)

    label_map = {cls: i for i, cls in enumerate(sorted(known_classes))}
    y = np.array([label_map[CIFAR10_CLASS_NAMES[l]] for l in labels_subset])

    model = build_text_classifier(len(label_map), vocab_size=len(tokenizer.word_index) + 1)
    model.compile(optimizer='adam', loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy'])
    model.fit(text_padded, y, epochs=5, batch_size=64, verbose=0)

    trained_text_classifiers[clf_name] = {
        "model": model,
        "known_classes": known_classes,
        "label_map": label_map,
        "label_map_inv": {v: k for k, v in label_map.items()}
    }

    print(f"Trained {clf_name}")

Trained text_classifier_1
Trained text_classifier_2


In [None]:
def softmax_ood_text_predict(text_input, clf_info, threshold=0.7):
    model = clf_info["model"]
    label_map_inv = clf_info["label_map_inv"]

    seq = tokenizer.texts_to_sequences([text_input])
    padded = pad_sequences(seq, maxlen=1)

    logits = model(padded, training=False)
    probs = tf.nn.softmax(logits).numpy()[0]
    max_prob = np.max(probs)
    pred_index = np.argmax(probs)
    pred_class = clf_info["label_map_inv"][pred_index]

    is_ood = max_prob < threshold
    return is_ood, pred_class, max_prob

In [None]:
sample_label = test_labels[3]
true_class = CIFAR10_CLASS_NAMES[sample_label]
sample_text = true_class  # input is class name as string

print(f"Test sample true class (as text): {sample_text}")

for clf_name, clf_info in trained_text_classifiers.items():
    is_ood, pred_class, conf = softmax_ood_text_predict(sample_text, clf_info, threshold=0.7)
    status = "OOD" if is_ood else "In-Distribution"
    print(f"{clf_name}: {status} | Predicted: {pred_class} | Confidence: {conf:.2f}")

Test sample true class (as text): airplane
text_classifier_1: In-Distribution | Predicted: airplane | Confidence: 1.00
text_classifier_2: OOD | Predicted: cat | Confidence: 0.22


In [None]:
def aggregate_text_predictions(text_input, classifiers, threshold=0.1):
    from collections import Counter

    votes = []
    for clf_info in classifiers.values():
        is_ood, pred_class, _ = softmax_ood_text_predict(text_input, clf_info, threshold)
        if not is_ood:
            votes.append(pred_class)

    if votes:
        majority = Counter(votes).most_common(1)[0][0]
    else:
        majority = "OOD_ALL"

    return majority, votes


In [None]:
sample_label = test_labels[0]
sample_text = CIFAR10_CLASS_NAMES[sample_label]

agg_pred, all_votes = aggregate_text_predictions(sample_text, trained_text_classifiers)
print(f"Aggregated prediction: {agg_pred}")
print(f"Individual votes: {all_votes}")

Aggregated prediction: frog
Individual votes: ['frog', 'cat']


In [None]:
def aggregate_multimodal_predictions(sample_image, sample_label, img_clfs, txt_clfs, threshold=0.1):
    votes = []
    text_label = get_text_label(sample_label)

    for clf_info in img_clfs.values():
        is_ood, pred_class, _ = softmax_ood_predict(sample_image, clf_info, threshold)
        if not is_ood:
            votes.append(pred_class)

    for clf_info in txt_clfs.values():
        is_ood, pred_class, _ = softmax_ood_text_predict(text_label, clf_info, threshold)
        if not is_ood:
            votes.append(pred_class)

    if votes:
        final_pred = Counter(votes).most_common(1)[0][0]
    else:
        final_pred = "OOD_ALL"

    return final_pred, votes

In [None]:
idx = 0
sample_img = test_images[idx]
sample_label = test_labels[idx]
true_class = CIFAR10_CLASS_NAMES[sample_label]

final_pred, votes = aggregate_multimodal_predictions(
    sample_img, sample_label,
    trained_image_classifiers,
    trained_text_classifiers
)

print(f"Sample true class: {true_class}")
print(f"Votes: {votes}")
print(f"Final prediction: {final_pred}")


Sample true class: cat
Votes: ['cat', 'cat', 'ship', 'frog', 'cat']
Final prediction: cat


In [None]:
def evaluate_multimodal_system(
    test_images,
    test_labels,
    img_clfs,
    txt_clfs,
    threshold=0.3,
    max_samples=None
):
    preds = []
    true = []
    ood_count = 0
    all_class_names = CIFAR10_CLASS_NAMES

    N = max_samples if max_samples else len(test_labels)

    for i in range(N):
        img = test_images[i]
        label = test_labels[i]
        true_class = CIFAR10_CLASS_NAMES[label]

        final_pred, _ = aggregate_multimodal_predictions(img, label, img_clfs, txt_clfs, threshold)

        if final_pred in ["OOD_ALL", "OOD_MAJORITY"]:
            ood_count += 1
        else:
            preds.append(final_pred)
            true.append(true_class)

    acc = accuracy_score(true, preds)
    print(f"\n Evaluation on {N} test samples:")
    print(f" In-distribution Accuracy: {acc * 100:.2f}%")

    print("\n Classification Report (for in-distribution predictions):")
    print(classification_report(true, preds, labels=all_class_names, zero_division=0))

    return


In [None]:
evaluate_multimodal_system(
    test_images=test_images,
    test_labels=test_labels,
    img_clfs=trained_image_classifiers,
    txt_clfs=trained_text_classifiers,
    threshold=0.1,
    max_samples=1000 # or simply max_samples=100
)




 Evaluation on 1000 test samples:
 In-distribution Accuracy: 75.10%

 Classification Report (for in-distribution predictions):
              precision    recall  f1-score   support

    airplane       0.65      0.87      0.75       103
  automobile       0.71      0.87      0.78        89
        bird       0.51      0.73      0.60       100
         cat       0.59      0.81      0.68       103
        deer       0.87      0.46      0.60        90
         dog       1.00      0.47      0.63        86
        frog       0.96      0.83      0.89       112
       horse       0.73      0.89      0.80       102
        ship       1.00      0.86      0.92       106
       truck       1.00      0.66      0.80       109

    accuracy                           0.75      1000
   macro avg       0.80      0.74      0.75      1000
weighted avg       0.80      0.75      0.75      1000

