In [4]:
import glob
from joblib import load
import numpy as np

# Function to load text from a given file name
def load_text_from_file(file_name):
    with open(file_name, 'r') as file:
        return file.read()

# Get the file name from the user
file_name = input("Please enter the file name with the test data: ")

# Load the content of the file into 'test'
test = [load_text_from_file(file_name)]

# Load all .joblib model files
vectorizer_files = glob.glob('Vectorizers/*_vectorizer.joblib')

for vectorizer_file in vectorizer_files:
    
    
    # Identify the corresponding model file
    part_file = vectorizer_file.replace('Vectorizers', 'Classifiers')
    model_file = part_file.replace('_vectorizer.joblib', '.joblib')
    
    # Load the model
    model = load(model_file)
    
    # Load the vectorizer
    vectorizer = load(vectorizer_file)

    # Transform the text using the loaded vectorizer
    test_transformed = vectorizer.transform(test)
    
    # Check if the model has the predict_proba method
    if hasattr(model, 'predict_proba'):
        # Get probabilities with the model
        probabilities = model.predict_proba(test)

        print(probabilities)
        print(f'Model: {model_file}')
        for i, input_string in enumerate(test):
            max_probability = np.max(probabilities[i])
            predicted_class = model.classes_[np.argmax(probabilities[i])]
            print(f'Prediction: {predicted_class}, Certainty: {max_probability:.4f}')
    else:
        # Make predictions with the model
        predictions = model.predict(test)
        for i, input_string in enumerate(test):
            print(f'Model: {model_file} does not support probability estimates')
            print(f'Nonetheless, the prediction is: {predictions[i]}')
        

    


[[0.51265775 0.48734225]]
Model: Classifiers/AdaBoostClassifier.joblib
Prediction: Phishing Email, Certainty: 0.5127
[[0.95941015 0.04058985]]
Model: Classifiers/LogisticRegression.joblib
Prediction: Phishing Email, Certainty: 0.9594
[[0.6 0.4]]
Model: Classifiers/RandomForestClassifier.joblib
Prediction: Phishing Email, Certainty: 0.6000
Model: Classifiers/SGDClassifier.joblib does not support probability estimates
Nonetheless, the prediction is: Phishing Email
[[1. 0.]]
Model: Classifiers/DecisionTreeClassifier.joblib
Prediction: Phishing Email, Certainty: 1.0000
