In [15]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [16]:
!pip install kaleido==0.2.1



In [17]:
import pandas as pd
import kaleido
import numpy as np
import matplotlib.pyplot as plt
import os
import glob
import plotly.express as px
import plotly.io as pio
%cd /content/drive/My Drive/GermanicPOS/Ablation/Plotting

/content/drive/My Drive/GermanicPOS/Ablation/Plotting


In [18]:
csv_files = glob.glob('*.csv')
print(csv_files)
dfs = [pd.read_csv(f) for f in csv_files]


['resultsGOTH.csv', 'resultsOS.csv', 'resultsOI.csv', 'resultsOHG.csv', 'resultsOE.csv', 'OHG_definitenesscontrast.csv']


In [19]:
dfs[0]['lang'] = "GOTH"
dfs[1]['lang'] = "OS"
dfs[2]['lang'] = "OIce"
dfs[3]['lang'] = "OHG"
dfs[4]['lang'] = "OE"
dfs[5]['lang'] = 'OHG'

In [20]:
df = pd.concat(dfs[0:5])

In [21]:
df = df.rename(columns={'lang': 'Language'})
df = df.rename(columns={'sample_size': 'Sample Size'})

In [22]:
grouped = df.groupby(['tag', 'Sample Size', 'Language'])['accuracy'].agg(['mean', 'std']).reset_index()
eval_tag = 'ADJ'
grouped = grouped[grouped['tag'] == eval_tag]
grouped = grouped.reset_index()
grouped = grouped[grouped['Language'].isin(['OIce', 'OE', 'OHG'])]
grouped = grouped.reset_index()
grouped['standard_error'] = grouped['std'] / 10
color_map = {'OIce': 'red', 'OS': 'green', 'OE': 'blue', 'OHG': 'purple'}

fig = px.line(grouped, x='Sample Size', y='mean', color='Language',
              error_y='standard_error', range_x = [0, 10000],
              title=str('Adjective - Ablation Study'), color_discrete_map=color_map)

fig.update_layout(
    xaxis_title='Sample Size',
    yaxis_title='Mean Accuracy',
    font = dict(color = '#555555', family="Verdana"),
    plot_bgcolor='#FFFFFF',
    paper_bgcolor='#FFFFFF',
    xaxis=dict(
        showgrid=True,
        gridcolor='#CCCCCC',
        gridwidth=1,
        zeroline=True,
        zerolinecolor='#CCCCCC',
        zerolinewidth=1
    ),
    yaxis=dict(
        showgrid=True,
        gridcolor='#CCCCCC',
        gridwidth=1,
        zeroline=True,
        zerolinecolor='#CCCCCC',
        zerolinewidth=1
    ),
    width=800,
    height=500,
)

fig.write_image("adj_ablation.png")
fig.show()

In [23]:
grouped = df.groupby(['tag', 'Sample Size', 'Language'])['accuracy'].agg(['mean', 'std']).reset_index()
eval_tag = 'D'
grouped = grouped[grouped['tag'] == eval_tag]
grouped = grouped.reset_index()
grouped = grouped[grouped['Language'].isin(['OIce', 'OE', 'OHG'])]
grouped = grouped.reset_index()
grouped['standard_error'] = grouped['std'] / 10
color_map = {'OIce': 'red', 'OS': 'green', 'OE': 'blue', 'OHG': 'purple'}

fig = px.line(grouped, x='Sample Size', y='mean', color='Language',
              error_y='standard_error', range_x = [0, 10000],
              title=str('Determiner - Ablation Study'), color_discrete_map=color_map)

fig.update_layout(
    xaxis_title='Sample Size',
    yaxis_title='Mean Accuracy',
    font = dict(color = '#555555', family="Verdana"),
    plot_bgcolor='#FFFFFF',
    paper_bgcolor='#FFFFFF',
    xaxis=dict(
        showgrid=True,
        gridcolor='#CCCCCC',
        gridwidth=1,
        zeroline=True,
        zerolinecolor='#CCCCCC',
        zerolinewidth=1
    ),
    yaxis=dict(
        showgrid=True,
        gridcolor='#CCCCCC',
        gridwidth=1,
        zeroline=True,
        zerolinecolor='#CCCCCC',
        zerolinewidth=1
    ),
    width=800,
    height=500,
)

fig.write_image("d_ablation.png")
fig.show()

In [24]:
grouped = df.groupby(['tag', 'Sample Size', 'Language'])['accuracy'].agg(['mean', 'std']).reset_index()
eval_tag = 'N'
grouped = grouped[grouped['tag'] == eval_tag]
grouped = grouped.reset_index()
#grouped = grouped[grouped['Language'].isin(['OIce', 'OE', 'OHG'])]
#grouped = grouped.reset_index()
grouped['standard_error'] = grouped['std'] / 10
color_map = {'OIce': 'red', 'OS': 'green', 'OE': 'blue', 'OHG': 'purple'}

fig = px.line(grouped, x='Sample Size', y='mean', color='Language',
              error_y='standard_error', range_x = [0, 5000],
              title=str('Noun - Ablation Study'), color_discrete_map=color_map)

fig.update_layout(
    xaxis_title='Sample Size',
    yaxis_title='Mean Accuracy',
    font = dict(color = '#555555', family="Verdana"),
    plot_bgcolor='#FFFFFF',
    paper_bgcolor='#FFFFFF',
    xaxis=dict(
        showgrid=True,
        gridcolor='#CCCCCC',
        gridwidth=1,
        zeroline=True,
        zerolinecolor='#CCCCCC',
        zerolinewidth=1
    ),
    yaxis=dict(
        showgrid=True,
        gridcolor='#CCCCCC',
        gridwidth=1,
        zeroline=True,
        zerolinecolor='#CCCCCC',
        zerolinewidth=1
    ),
    width=400,
    height=500,
)

pio.write_image(fig, "n_ablation.png")
fig.show()

In [25]:
grouped = df.groupby(['tag', 'Sample Size', 'Language'])['accuracy'].agg(['mean', 'std']).reset_index()
eval_tag = 'CONJ'
grouped = grouped[grouped['tag'] == eval_tag]
grouped = grouped.reset_index()
#grouped = grouped[grouped['Language'].isin(['OIce', 'OE', 'OHG'])]
#grouped = grouped.reset_index()
grouped['standard_error'] = grouped['std'] / 10
color_map = {'OIce': 'red', 'OS': 'green', 'OE': 'blue', 'OHG': 'purple'}

fig = px.line(grouped, x='Sample Size', y='mean', color='Language',
              error_y='standard_error', range_x = [0, 5000],
              title=str('Conjunction - Ablation Study'), color_discrete_map=color_map)

fig.update_layout(
    xaxis_title='Sample Size',
    yaxis_title='Mean Accuracy',
    font = dict(color = '#555555', family="Verdana"),
    plot_bgcolor='#FFFFFF',
    paper_bgcolor='#FFFFFF',
    xaxis=dict(
        showgrid=True,
        gridcolor='#CCCCCC',
        gridwidth=1,
        zeroline=True,
        zerolinecolor='#CCCCCC',
        zerolinewidth=1
    ),
    yaxis=dict(
        showgrid=True,
        gridcolor='#CCCCCC',
        gridwidth=1,
        zeroline=True,
        zerolinecolor='#CCCCCC',
        zerolinewidth=1
    ),
    width=400,
    height=500,
)


fig.write_image("conj_ablation.png")
fig.show()

In [26]:
grouped = df.groupby(['tag', 'Sample Size', 'Language'])['accuracy'].agg(['mean', 'std']).reset_index()
eval_tag = 'C'
grouped = grouped[grouped['tag'] == eval_tag]
grouped = grouped.reset_index()
grouped = grouped[grouped['Language'].isin(['OIce', 'OE', 'OS'])]
grouped = grouped.reset_index()
grouped['standard_error'] = grouped['std'] / 10
color_map = {'OIce': 'red', 'OS': 'green', 'OE': 'blue', 'OHG': 'purple'}

fig = px.line(grouped, x='Sample Size', y='mean', color='Language',
              error_y='standard_error', range_x = [0, 10000],
              title=str('Complementizer - Ablation Study'), color_discrete_map=color_map)

fig.update_layout(
    xaxis_title='Sample Size',
    yaxis_title='Mean Accuracy',
    font = dict(color = '#555555', family="Verdana"),
    plot_bgcolor='#FFFFFF',
    paper_bgcolor='#FFFFFF',
    xaxis=dict(
        showgrid=True,
        gridcolor='#CCCCCC',
        gridwidth=1,
        zeroline=True,
        zerolinecolor='#CCCCCC',
        zerolinewidth=1
    ),
    yaxis=dict(
        showgrid=True,
        gridcolor='#CCCCCC',
        gridwidth=1,
        zeroline=True,
        zerolinecolor='#CCCCCC',
        zerolinewidth=1
    ),
    width=800,
    height=500,
)

fig.write_image("c_ablation.png")
fig.show()

In [29]:
df = dfs[5]

In [32]:
grouped = df.groupby(['tag', 'sample_size', 'lang'])['accuracy'].agg(['mean', 'std']).reset_index()
grouped = grouped[grouped['tag'].isin(['DD', 'DI'])]
grouped = grouped.reset_index()
grouped['standard_error'] = grouped['std'] / 10

color_map = {'DD': 'orange', 'DI': 'brown'}

# Rename tags for the plot legend
grouped['tag'] = grouped['tag'].replace({'DD': 'Definite', 'DI': 'Indefinite'})

fig = px.line(grouped, x='sample_size', y='mean', color='tag',
              error_y='standard_error', range_x = [0, 20000],
              title=str('Definite vs Indefinite Determiners - Ablation Study (OHG)'), color_discrete_map=color_map)

fig.update_layout(
    xaxis_title='Sample Size',
    yaxis_title='Mean Accuracy',
    font = dict(color = '#555555', family="Verdana"),
    plot_bgcolor='#FFFFFF',
    paper_bgcolor='#FFFFFF',
    xaxis=dict(
        showgrid=True,
        gridcolor='#CCCCCC',
        gridwidth=1,
        zeroline=True,
        zerolinecolor='#CCCCCC',
        zerolinewidth=1
    ),
    yaxis=dict(
        showgrid=True,
        gridcolor='#CCCCCC',
        gridwidth=1,
        zeroline=True,
        zerolinecolor='#CCCCCC',
        zerolinewidth=1
    ),
    width=800,
    height=500,
)

fig.write_image("dd_di_ohg_ablation.png")
fig.show()