In [None]:
import pandas as pd
import numpy as np
import warnings
from matplotlib import pyplot as plt
import seaborn as sns
warnings.filterwarnings('ignore')

In [None]:
datasets = ['sem_eval_task_8', 'tacred', 'retacred', 'FewRel', 'NYT10', 'WebNLG', 'crossRE']

In [None]:
joint_methods = ["SPN4RE", "TDEER", "UniRel", "RIFRE"]

In [None]:
results = pd.DataFrame(columns=['Method', 'Dataset', 'fold', 'precision', 'recall', 'f1'])

for keys in joint_methods:
    for data in datasets:
        try:
            df = pd.read_csv(f'/blue/woodard/share/Relation-Extraction/Methods/{keys}/cv_output/{data}/pr_metrics.csv')

            for index, row in df.iterrows():
                new_row = {'Method':keys, 'Dataset': data, 'fold': int(row['fold']),
                           'precision': np.round(row['precision'],2), 
                            'recall': np.round(row['recall'],2), 
                           'f1': np.round(row['f1'],2)
                          }
                results = results.append(new_row, ignore_index=True)
        except:
            print(f'Missing {data}, {keys}')
            continue


In [None]:
g_df = results.groupby(['Method', 'Dataset'], as_index=False).mean()

In [None]:
# Create the boxplot with horizontal orientation and transparent boxes
plt.figure(figsize=(14, 4))
ax = sns.boxplot(y='Method', x='f1', data=g_df, palette='Set3',
                 boxprops={'facecolor': 'none', 'edgecolor': 'black'},  # Transparent box with black edges
                 whiskerprops={'color': 'black'},
                 capprops={'color': 'black'},
                 medianprops={'color': 'black'})

# Overlay with stripplot to show individual data points with different colors
ax = sns.stripplot(y='Method', x='f1', data=g_df, hue='Dataset', dodge=False, palette='bright',
                   marker='o', alpha=0.7, size=8)

# Increase fontsize of x and y ticks
ax.tick_params(axis='x', labelsize=14)  # Change labelsize as needed
ax.tick_params(axis='y', labelsize=14)  # Change labelsize as needed

# Get the handles and labels for the legend
handles, labels = ax.get_legend_handles_labels()

ax.set_xlabel('F1 Score', fontsize=14)
ax.set_ylabel('Methods', fontsize=14)

# Set x-axis limits
ax.set_xlim(0, 1)  # Set x limits to 0-1


# Define custom names for the 'Dataset' hue categories
custom_hue_labels = ['FewRel', 'NYT10', 'WebNLG', 'CrossRE', 'RETACRED', 'SemEval', 'TACRED']  # Replace with your custom hue labels

# Update the legend with custom hue labels
plt.legend(handles, custom_hue_labels, title='Datasets', loc='upper center', fontsize=14, title_fontsize=14,
           bbox_to_anchor=(0.5, 1.23), ncol=len(custom_hue_labels), frameon=False)


# Rotate y-axis labels if needed
ax.set_yticklabels(ax.get_yticklabels(), rotation=0)


# Save the plot as a PNG file
plt.savefig('./images/joint_all.png', format='png', dpi=600, bbox_inches='tight')  # Save with 300 DPI and tight bounding box

# Show plot
plt.show()