In [0]:

import pandas as pd

def check_ner_format(predictions_file_path, test_file_path):
    """
    Check the format of the NER prediction file.
    The file should be in CSV format with columns: id, tokens, ner_tags
    """
    try:
        df = pd.read_csv(predictions_file_path)

    except Exception as e:
        return False, f"Error reading predictions CSV file: {e}"
    
    try:
        test_df = pd.read_csv(test_file_path)
    except Exception as e:
        return False, f"Error reading test CSV file: {e}"
    
    # Check expected columns
    expected_columns = ['id', 'tokens', 'ner_tags']
    pred_columns = list(df.columns)
    for expected_col in expected_columns:
        if expected_col not in pred_columns:
            return False, f"Incorrect columns. Expected: {expected_columns}, Found: {pred_columns}"
    
    # Check number of rows
    expected_ner_num_rows = len(test_df)
    predictions_ner_num_rows = len(df)
    if predictions_ner_num_rows != expected_ner_num_rows:
        return False, f"Incorrect number of predictions. Expected: {expected_ner_num_rows}, Found: {predictions_ner_num_rows}"

    return True, "NER prediction file format is correct."

def check_nli_format(predictions_file_path, test_file_path):
    """
    Check the format of the NLI prediction file.
    The file should be in CSV format with columns: Premise, hypothesis, label
    """
    try:
        df = pd.read_csv(predictions_file_path)
    except Exception as e:
        return False, f"Error reading predictions CSV file: {e}"
    
    try:
        test_df = pd.read_csv(test_file_path)
    except Exception as e:
        return False, f"Error reading test CSV file: {e}"
    
    # Check expected columns
    expected_columns = ['premise', 'hypothesis', 'label']
    pred_columns = list(df.columns)
    for expected_col in expected_columns:
        if expected_col not in pred_columns:
            return False, f"Incorrect columns. Expected: {expected_columns}, Found: {pred_columns}"
    
    # Check number of rows
    expected_nli_num_rows = len(test_df)
    predictions_nli_num_rows = len(df)
    if predictions_nli_num_rows != expected_nli_num_rows:
        return False, f"Incorrect number of predictions. Expected: {expected_nli_num_rows}, Found: {predictions_nli_num_rows}"
    
    return True, "NLI prediction file format is correct."

# Check NER prediction file
ner_predictions_file_path = 'predictions_NERLens.csv' # replace with file path
ner_test_file_path = 'predictions_NERLens.csv' # replace with file path
is_valid, message = check_ner_format(ner_predictions_file_path, ner_test_file_path)
print(f"NER File Check: {message}")

# Check NLI prediction file
nli_predictions_file_path = 'predictions_NLILens.csv' # replace with file path
nli_test_file_path = 'predictions_NLILens.csv' # replace with file path
is_valid, message = check_nli_format(nli_predictions_file_path, nli_test_file_path)
print(f"NLI File Check: {message}")

# Note: The paths 'predictions_NERLens.csv' and 'predictions_NLILens.csv' should be updated to the actual file paths.