This is the demo code for Group 40

All you need to edit are the paths in Cell code 1 (and possibly uncomment the pip install line to install the packages)
- Cell code 2 runs category A. Which is a Linear Regression model
- Cell code 3 runs category B. Which is a Bidirectional LSTM model

In [1]:
# Import necessary libraries
import pandas as pd
import pickle
import joblib
import nltk
from nltk.corpus import stopwords
import nltk
from tensorflow.keras.models import load_model
from keras.preprocessing.sequence import pad_sequences
nltk.download('stopwords')

# IMPORTANT: If you need to download the packages, please uncomment the below line before running
# %pip3 install pandas scikit-learn nltk tensorflow keras

# IMPORTANT: THIS IS THE ONLY THING YOU NEED TO UPDATE IN THE CODE
input_file_path = 'dev.csv'  # Path to the data file needed

lstm_model_path = 'lstm_model.h5'               # Path to the saved LSTM model file
output_file_path = 'Group_40_b.csv'  # Path to save the LSTM predictions file
tokenizer = joblib.load('lstm_tokenizer.pkl') # Path to the LSTM tokenizer used to train the LSTM model

lr_model_path = 'lr_model.pkl'               # Path to the saved LR model file
output_file_path_lr = 'Group_40_a.csv'  # Path to save the LR predictions file
vectorizer = joblib.load('tfidf_vectorizer.pkl') # Path to the vectorizer used when training the LR model

2024-04-24 12:14:34.487036: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
[nltk_data] Downloading package stopwords to /Users/daim/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [2]:
# Define a function to load the model
def load_lr_model(model_path):
    """Load the pre-trained model from the specified path using pickle."""
    with open(model_path, 'rb') as file:
        model = pickle.load(file)
    return model

# Define a function for preprocessing the data (adjust according to your actual preprocessing steps)
def preprocess(data):
    """Preprocess the input data and prepare it for model prediction."""
    # Example: Convert data types, handle missing values, extract features, etc.
    data['Claim'] = data['Claim'].fillna('')
    data['Evidence'] = data['Evidence'].fillna('')
    processed_text = data['Claim'] + " " + data['Evidence']
    return processed_text

# Define a function to make predictions
def make_predictions(model, data):
    """Use the loaded model to make predictions on the processed data."""
    predictions = model.predict(data)
    return predictions

# Main execution function
def run_demo(input_file_path, model_path, output_file_path_lr):
    """Load the test data, preprocess it, load the model, make predictions, print metrics, and save the predictions."""
    # Load the input data
    try:
        input_data = pd.read_csv(input_file_path)
    except Exception as e:
        print(f"Error loading the input file: {e}")
        return

    # Preprocess the data
    processed_data = preprocess(input_data)
    X_dev_tfidf = vectorizer.transform(processed_data)

    # Load the model
    lr_model = load_lr_model(model_path)

    # Make predictions
    predictions = make_predictions(lr_model, X_dev_tfidf)

    # Save the predictions to a CSV file
    predictions_df = pd.DataFrame(predictions, columns=['prediction'])
    try:
        predictions_df.to_csv(output_file_path_lr, index=False)
        print(f"Predictions saved successfully to {output_file_path_lr}")
    except Exception as e:
        print(f"Error saving the predictions: {e}")

# Run the demo
run_demo(input_file_path, lr_model_path, output_file_path_lr)


Predictions saved successfully to Group_40_a.csv


In [3]:
# Define a function to load the model
def load_lstm_model(model_path):
    """Load the pre-trained model from the specified path."""
    lstm_model = load_model(model_path)
    return lstm_model

# Define a function for preprocessing the data (adjust according to your actual preprocessing steps)
def preprocess(data):
    """Preprocess the input data and prepare it for model prediction."""
    # Example: Convert data types, handle missing values, extract features, etc.
    data['Claim'] = data['Claim'].fillna('')
    data['Evidence'] = data['Evidence'].fillna('')
    processed_text = data['Claim'] + " " + data['Evidence']
    return processed_text

# Define a function to make predictions
def make_predictions(model, data):
    """Use the loaded model to make predictions on the processed data."""
    predictions = model.predict(data)
    return predictions

# Main execution function
def run_demo(input_file_path, model_path, output_file_path):
    """Load the test data, preprocess it, load the model, make predictions, print metrics, and save the predictions."""
    # Load the input data
    try:
        input_data = pd.read_csv(input_file_path)
    except Exception as e:
        print(f"Error loading the input file: {e}")
        return

    # Preprocess the data
    processed_data = preprocess(input_data)
    x_tok_seq = tokenizer.texts_to_sequences(processed_data)
    x_padded = pad_sequences(x_tok_seq, maxlen=307, padding='post')

    # Load the model
    lstm_model = load_lstm_model(model_path)

    # Make predictions
    predictions = (lstm_model.predict(x_padded) > 0.5).astype(int)

    # Save the predictions to a CSV file
    predictions_df = pd.DataFrame(predictions, columns=['prediction'])
    try:
        predictions_df.to_csv(output_file_path, index=False)
        print(f"Predictions saved successfully to {output_file_path}")
    except Exception as e:
        print(f"Error saving the predictions: {e}")

# Run the demo
run_demo(input_file_path, lstm_model_path, output_file_path)


Predictions saved successfully to Group_40_b.csv
