In [2]:
import glob
from joblib import load, dump
import numpy as np
import csv

# Function to load text from a given file name
def load_text_from_file(file_name):
    with open(file_name, 'r') as file:
        return file.read()

def retrain(X_new, correct_labels, model, model_name):
    # If the model supports partial fitting, you can update it directly
    if hasattr(model, 'partial_fit'):
        model.partial_fit(X_new, correct_labels)
    else:
        # If the model does not support partial fitting, you might need to retrain it from scratch
        # This would involve combining the new data with the existing training data
        # and then retraining the model
        print("Model doesn't support partial fitting. Retraining from scratch...")
        # new_element = ['55', X_new[0], correct_labels[0]]
        # # Open your CSV file in append mode
        # with open('Phishing_Email.csv', 'a', newline='') as file:
        #     writer = csv.writer(file)
        #     # Add the new element
        #     writer.writerow(new_element)
        
        pass

    # Save the updated model
    dump(model, model_name)

# Get the file name from the user
file_name = input("Please enter the file name with the test data: ")

# Load the content of the file into 'test'
test = [load_text_from_file(file_name)]

# Load all .joblib model files
vectorizer_files = glob.glob('Vectorizers/*_vectorizer.joblib')

for vectorizer_file in vectorizer_files:
    
    
    # Identify the corresponding model file
    part_file = vectorizer_file.replace('Vectorizers', 'Classifiers')
    model_file = part_file.replace('_vectorizer.joblib', '.joblib')
    
    # Load the model
    model = load(model_file)
    
    # Load the vectorizer
    vectorizer = load(vectorizer_file)

    # Transform the text using the loaded vectorizer
    test_transformed = vectorizer.transform(test)
    
    # Check if the model has the predict_proba method
    if hasattr(model, 'predict_proba'):
        # Get probabilities with the model
        probabilities = model.predict_proba(test)

        print(probabilities)
        print(f'Model: {model_file}')
        for i, input_string in enumerate(test):
            max_probability = np.max(probabilities[i])
            predicted_class = model.classes_[np.argmax(probabilities[i])]
            print(f'Prediction: {predicted_class}, Certainty: {max_probability:.4f}')
            users_input = input("Do you think I'm right ? (y/n): ")
            if users_input == 'y':
                correct_answer = predicted_class
                retrain(test, [correct_answer], model, model_file)
            else:
                if predicted_class == "Phishing Email":
                    correct_answer = "Safe Email"
                else:
                    correct_answer = "Phishing Email"
                retrain(test, [correct_answer], model, model_file)
        

    else:
        # Make predictions with the model
        predictions = model.predict(test)
        for i, input_string in enumerate(test):
            print(f'Model: {model_file} does not support probability estimates')
            print(f'Nonetheless, the prediction is: {predictions[i]}')
        

    


Please enter the file name with the test data:  test-body.txt


[[0.53228078 0.46771922]]
Model: Classifiers/AdaBoostClassifier.joblib
Prediction: Phishing Email, Certainty: 0.5323


Do you think I'm right ? (y/n):  n


Model doesn't support partial fitting. Retraining from scratch...
[[0.57668691 0.42331309]]
Model: Classifiers/LogisticRegression.joblib
Prediction: Phishing Email, Certainty: 0.5767


Do you think I'm right ? (y/n):  n


Model doesn't support partial fitting. Retraining from scratch...
[[0.6 0.4]]
Model: Classifiers/RandomForestClassifier.joblib
Prediction: Phishing Email, Certainty: 0.6000


Do you think I'm right ? (y/n):  n


Model doesn't support partial fitting. Retraining from scratch...
Model: Classifiers/SGDClassifier.joblib does not support probability estimates
Nonetheless, the prediction is: Safe Email
[[1. 0.]]
Model: Classifiers/DecisionTreeClassifier.joblib
Prediction: Phishing Email, Certainty: 1.0000


Do you think I'm right ? (y/n):  n


Model doesn't support partial fitting. Retraining from scratch...
