# Visualization Notebook

This python notebook contains code for the visualizations.

1. Plotting the accuracy scores as heatmap for the classification task
2. Plotting the tSNE plot for Whisper L2 Norm (Best Performing)

---

## Plotting the results table

In [None]:
import os
import pandas as pd
import numpy as np
from plotly.graph_objs import Figure, Scatter
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import re
import plotly.io as pio
import plotly.express as px
from sklearn.manifold import TSNE

In [2]:
def get_k_shot_value(filename):
  """Extracts the k-shot value from a filename using regular expressions.

  Args:
    filename: The filename to extract the k-shot value from.

  Returns:
    The integer k-shot value or None if not found.
  """
  match = re.search(r"(\d+)-result", filename)
  if match:
    return int(match.group(1))
  else:
    return None

In [3]:
def plot_heatmaps(parent_folder_path, model):
    feature_sets = ["whisper", "wav2vec"]
    data = {feature_set: [] for feature_set in feature_sets}
    
    for feature_set in feature_sets:
        folder_path = os.path.join(parent_folder_path, feature_set)
        csv_files = sorted(
            [f for f in os.listdir(folder_path) if f.endswith(".csv")],
            key=lambda f: get_k_shot_value(f) or float('inf'))
        
        for csv_file in csv_files:
            df = pd.read_csv(os.path.join(folder_path, csv_file))
            k_shot = int(csv_file.split("-")[0])
            
            for i, language in enumerate(df["Unnamed: 0"].tolist()):
                accuracy = df["accuracy"].tolist()[i]
                data[feature_set].append({
                    "Language": language,
                    "Shot Size": k_shot,
                    "Accuracy": accuracy
                })

    # Plot heatmaps for each feature set
    for feature_set, rows in data.items():
        df = pd.DataFrame(rows)
        
        # Sort languages alphabetically
        df = df.sort_values(by="Language")
        
        df_pivot = df.pivot(index="Language", columns="Shot Size", values="Accuracy")
        
        fig = go.Figure(data=go.Heatmap(
            z=df_pivot.values,
            x=df_pivot.columns,
            y=df_pivot.index,
            colorscale='blues',
            colorbar=dict(title='Accuracy'),
            zmin=0.65,  # Set minimum value for color scaling
            zmax=0.90,  # Set maximum value for color scaling
            text=df_pivot.values,  # Text to display inside heatmap cells
            hoverinfo="text+z",
            texttemplate="%{text:.2f}",  # Format text to show two decimal places
            textfont={"size": 16},  # Increase the font size
            textfont_color="black"  # Force all text to be black
        ))


        # Customize layout
        fig.update_layout(
            # title={
            #     'text': f"{model} - Heatmap of Accuracy for {feature_set.capitalize()}",
            #     'x': 0.5,
            #     'xanchor': 'center'
            #     },
            xaxis_title="Shot Size",
            yaxis_title="Language",
            yaxis=dict(tickmode='linear'),
            autosize=False,
            width=600,
            height=600,
        )

        # Show the plot
        fig.show()

        # Save the plot as a PDF
        model_safe = re.sub(r'[\s:]', '-', model)
        output_dir = "./plots"
        os.makedirs(output_dir, exist_ok=True)  # Create directory if it doesn't exist
        pio.write_image(fig, f"{output_dir}/{model_safe}_{feature_set}_heatmap.pdf", format="pdf")

In [4]:
plot_heatmaps("./results/Temporal-Mean/", "Temporal Mean")

In [5]:
plot_heatmaps("./results/L2-Norm/", "L2-Norm")

---
## Plotting the tSNE Plot

In [None]:
df = pd.read_csv("./data/whisper-l2-feats.csv").drop(['Unnamed: 0'], axis=1)
df = df[df['train_test']=='test'].drop(['train_test'], axis=1)

features = df.drop(['language', 'abuse'], axis=1)

In [None]:
tsne = TSNE(n_components=2, random_state=42)
tsne_result = tsne.fit_transform(features)


df['tsne1'] = tsne_result[:, 0]
df['tsne2'] = tsne_result[:, 1]

In [None]:
fig = px.scatter(df, x='tsne1', y='tsne2', color='language',
                 title='')
fig.show()
pio.write_image(fig, "./plots/wav2vec-l2-tSNE-feats.pdf")

---