In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df = pd.read_excel('../data/results_overview.xlsx', sheet_name='all_plot2')
print(df)

In [None]:
# Melt the DataFrame to combine the 'unit' and 'session' values for each model
df_melted = df.melt(id_vars=['text_interview', 'type', 'feature_model', 'unit_session'],
                    value_vars=model_cols,
                    var_name='Model', value_name='Accuracy')

df_melted = df_melted[df_melted.unit_session == 'session']

# Create a new column that combines 'text_interview', 'unit_session', and 'Model'
df_melted['combined_category'] = df_melted['text_interview'] + '_' + df_melted['feature_model'] + '_' + df_melted['type']

# Plot the bar chart
plt.figure(figsize=(12, 6))
ax = sns.barplot(data=df_melted, x='Model', y='Accuracy', hue='combined_category', ci=None)
ax.set_title('Accuracy of Models')
ax.set_xlabel('Type')
ax.set_ylabel('Accuracy')
plt.xticks(rotation=45)

# Add text annotations for each bar
for p in ax.patches:
    height = p.get_height()
    ax.annotate(f"{height:.3f}", (p.get_x() + p.get_width() / 2, height),
                ha='center', va='bottom')

# Add distance between the models
ax.legend(title='Model', bbox_to_anchor=(1.05, 1), loc='upper left')

plt.tight_layout()
plt.show()


In [None]:
# Define a custom color palette with the desired colors
custom_palette = ["#0378F4", "#E13B41", "#A0CDFE", "#FAA4A4"]

model_cols = ['SVM', 'XGBoost', 'DecisionTree', 'LogisticRegression']


# Melt the DataFrame to combine the 'unit' and 'session' values for each model
df_melted = df.melt(id_vars=['text_interview', 'type', 'feature_model', 'unit_session'],
                    value_vars=model_cols,
                    var_name='Model', value_name='Accuracy')

df_melted = df_melted[df_melted.unit_session == 'session']

# Create a new column that combines 'text_interview', 'unit_session', and 'Model'
df_melted['combined_category'] = df_melted['feature_model'] + '_' + df_melted['type']

# Get the maximum value of Accuracy for setting y-axis limit
max_accuracy = df_melted['Accuracy'].max()

# Create separate plots for 'text_interview == interview' and 'text_interview == text'
g = sns.catplot(data=df_melted, x='Model', y='Accuracy', hue='combined_category',
                col='text_interview', kind='bar', errorbar=None, sharey=False, height=6, aspect=1.2, palette=custom_palette, legend=False, dodge=0.3)

# Set y-axis limit to ensure alignment
for ax in g.axes.flat:
    ax.set_ylim(0, max_accuracy)

# Set plot titles and labels
g.fig.suptitle(' ', y=1.02)
g.set_axis_labels(' ', ' ')
g.set_xticklabels([])  # Remove x-axis labels
g.set_titles("")

# Add text annotations for each bar
for ax in g.axes.flat:
    for p in ax.patches:
        height = p.get_height()
        percentage = height * 100
        ax.annotate(f"{percentage:.0f}%", (p.get_x() + p.get_width() / 2, height),
                    ha='center', va='bottom', fontsize=11)

# Remove y-axis label for the second plot
g.axes[0, 1].set_ylabel('')

# Move the y-axis of the second plot to the right side
for ax in g.axes[:, 1]:  # Selecting the second column of axes
    ax.yaxis.set_label_position("right")
    ax.yaxis.tick_right()
    ax.spines['right'].set_visible(True)

# Add a horizontal line at y=0.5 in both subplots
for ax in g.axes.flat:
    ax.axhline(y=0.5, color='gray', linestyle='--')    
    
plt.tight_layout()
plt.show()


In [None]:
df = pd.read_excel('../data/results_overview.xlsx', sheet_name='personalisation_plot')
df = df.drop(['feature_model'], axis = 1)

In [None]:
df = pd.read_excel('../data/results_overview.xlsx', sheet_name='personalisation_plot')
df = df.drop(['feature_model'], axis = 1)

# Define a custom color palette with the desired colors
custom_palette = ["#C8C8C8", "#FAA4A4", "#7F7F7F", "#E13B41"]

#model_cols = ['personalisation', 'no_personalisation']
model_cols = ['personalisation', 'no_personalisation']


# Melt the DataFrame to combine the 'unit' and 'session' values for each model
df_melted = df.melt(id_vars=['text_interview', 'type', 'unit_session'],
                    value_vars=model_cols,
                    var_name='personalisation', value_name='Accuracy')

df_melted.type[df_melted.type == 'question'] = 'phrase'
df_melted.type[df_melted.type == 'windowed'] = 'word'

df_melted['combined_category'] = df_melted['text_interview'] + '_' + df_melted['type']



# Get the maximum value of Accuracy for setting y-axis limit
max_accuracy = df_melted['Accuracy'].max()

# Create a single plot for 'no_personalisation' and 'personalisation'
#g = sns.catplot(data=df_melted, x='personalisation_unit_session', y='Accuracy', hue='combined_category',
#                kind='bar', errorbar=None, height=6, aspect=1.2, palette=custom_palette, legend=True, dodge=1)

# Create separate plots for 'text_interview == interview' and 'text_interview == text'
g = sns.catplot(data=df_melted, x='personalisation', y='Accuracy', hue='combined_category',
                col='unit_session', kind='bar', errorbar=None, sharey=False, height=6, aspect=1.2, palette=custom_palette, legend=False, dodge=1)

# Set y-axis limit to ensure alignment
g.set(ylim=(0, max_accuracy))

# Set plot title and labels
g.fig.suptitle(' ', y=1.02)
g.set_axis_labels(' ', ' ')
g.set_xticklabels([])  # Remove x-axis labels



# Add text annotations for each bar
for ax in g.axes.flat:
    for p in ax.patches:
        height = p.get_height()
        percentage = height * 100
        ax.annotate(f"{percentage:.1f}%", (p.get_x() + p.get_width() / 2, height),
                    ha='center', va='bottom', fontsize=11)

# Add a horizontal line at y=0.5
for ax in g.axes.flat:
    if ax.get_title() == 'unit_session = session':
        ax.axhline(y=0.5, color='gray', linestyle='--')
        
for ax in g.axes.flat:
    ax.set_title('')
    
plt.tight_layout()
plt.savefig('output_plot.png')

plt.tight_layout()
plt.show()
