In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.cm as cm
import matplotlib.colors as mcolors

plt.rcParams['font.family'] = 'Cambria'

In [None]:
length_results = pd.read_csv()
sentiment_results = pd.read_csv()
word_counts_results = pd.read_csv()
reg_length_results = pd.read_csv()
reg_sentiment_results = pd.read_csv()
reg_word_counts_results = pd.read_csv()

reg_length_results = pd.concat([reg_length_results, length_results], ignore_index = True)
reg_sentiment_results = pd.concat([reg_sentiment_results, sentiment_results], ignore_index = True)   
reg_word_counts_results = pd.concat([reg_word_counts_results, word_counts_results], ignore_index = True)

In [None]:
reg_sentiment_results['Framework'] = reg_sentiment_results['Filename'].str.split('_').str[0]
reg_sentiment_results['Date'] = reg_sentiment_results['Filename'].str.split('_').str[1]
new_order = ['Filename', 'Framework', 'Date', 'Positive', 'Negative', 'Neutral']
reg_sentiment_results = reg_sentiment_results[new_order]

frontier_mapping = {
    'Amazon': 2, 'Anthropic': 1, 'Cohere': 1,
    'Deepmind': 1, 'Deloitte': 2, 'G42': 2,
    'Grammarly': 2, 'IBM': 2, 'KPMG': 2, 'Magic': 2,
    'META': 1, 'Microsoft': 1,'Naver': 1, 'NVDIA': 2,
    'OpenAI': 1, 'PaloAlto': 2, 'PwC': 2, 'xAI': 1,
    'AIVerify': 0, 'ATI': 0, 'CSA': 0, 'DoS': 0,
    'HMG': 0, 'ISOIEC': 0, 'NIST': 0, 'OECD': 0, 
    'UNESCO': 0, 'WEF': 0
}

reg_sentiment_results['Frontier'] = reg_sentiment_results['Framework'].map(frontier_mapping)
reg_sentiment_results['Frontier'] = reg_sentiment_results['Frontier'].astype(int)
reg_sentiment_results.head()

In [None]:
another_mapping_scheme_ik = {
    0: 'AI Governance and Standards Bodies',
    1: 'Frontier AI Firms',
    2: 'AI-Adopting/Enterprise AI Companies'
}
rsr_table = reg_sentiment_results.groupby('Frontier')[['Positive', 'Negative', 'Neutral']].mean().round(2).reset_index()
rsr_table['Frontier'] = rsr_table['Frontier'].map(another_mapping_scheme_ik)
rsr_table = rsr_table.rename(columns = {'Frontier' : 'Publishing Entity'})
apply_percentage_sign = ['Positive', 'Negative', 'Neutral']
rsr_table[apply_percentage_sign] = rsr_table[apply_percentage_sign].applymap(lambda x: f"{x:.2f}%")
rsr_table = rsr_table.style.set_table_styles(
    [
        {'selector': 'th', 'props': [('font-family', 'Cambria'), ('font-size', '12px')]}, # header
        {'selector': 'td', 'props': [('font-family', 'Cambria'), ('font-size', '12px')]}  # cells
    ]
)
rsr_table


In [None]:
sentiment_results_sorted = reg_sentiment_results.sort_values(by = 'Positive', ascending = [False])

norm = mcolors.Normalize(vmin = -30, vmax = 130) 
colors = cm.Blues(norm(sentiment_results_sorted['Positive']))

plt.figure(figsize = (10, 6), dpi = 500)
plt.barh(sentiment_results_sorted['Framework'], sentiment_results_sorted['Positive'], color = colors)
plt.xlabel('Positive Sentiment (%)')
plt.ylabel('Framework')
plt.title('Frameworks by Magnitude of Positive-Coded Language')
plt.xlim(0, 100)
plt.gca().invert_yaxis()  

# plt.savefig()

plt.tight_layout()
plt.show()

In [None]:
sentiment_results_sorted = reg_sentiment_results.sort_values(by = ['Frontier', 'Positive'], ascending = [True, False])

norm = mcolors.Normalize(vmin = -30, vmax = 130)  
colors = cm.Blues(norm(sentiment_results_sorted['Positive'])) 

plt.figure(figsize = (10, 6), dpi = 500)
plt.barh(sentiment_results_sorted['Framework'], sentiment_results_sorted['Positive'], color=colors)
plt.xlabel('Positive Sentiment (%)')
plt.ylabel('Framework')
plt.title('Frameworks by Magnitude of Positive-Coded Language (Grouped by Frontier)')
plt.xlim(0, 100)
plt.gca().invert_yaxis() 

plt.axhline(y = 9.5, color = 'black', linestyle = '--', linewidth = 0.8, alpha = 0.5)
plt.axhline(y = 17.5, color = 'black', linestyle = '--', linewidth = 0.8, alpha = 0.5)

# plt.savefig()

plt.tight_layout()
plt.show()

In [None]:
sentiment_results_sorted = reg_sentiment_results.sort_values(by = 'Negative', ascending = [False])

norm = mcolors.Normalize(vmin = -30, vmax = 130) 
colors = cm.Reds(norm(sentiment_results_sorted['Negative']))  

plt.figure(figsize = (10, 6), dpi = 500)
plt.barh(sentiment_results_sorted['Framework'], sentiment_results_sorted['Negative'], color = colors)
plt.xlabel('Negative Sentiment (%)')
plt.ylabel('Framework')
plt.title('Frameworks by Magnitude of Negative-Coded Language')
plt.xlim(0, 100)
plt.gca().invert_yaxis()  

# plt.savefig()

plt.tight_layout()
plt.show()

In [None]:
sentiment_results_sorted = reg_sentiment_results.sort_values(by = ['Frontier', 'Negative'], ascending = [True, False])

norm = mcolors.Normalize(vmin = -30, vmax = 130) 
colors = cm.Reds(norm(sentiment_results_sorted['Negative'])) 

plt.figure(figsize = (10, 6), dpi = 500)
plt.barh(sentiment_results_sorted['Framework'], sentiment_results_sorted['Negative'], color = colors)
plt.xlabel('Negative Sentiment (%)')
plt.ylabel('Framework')
plt.title('Frameworks by Magnitude of Negative-Coded Language (Grouped by Frontier)')
plt.xlim(0, 100)
plt.gca().invert_yaxis()

plt.axhline(y = 9.5, color = 'black', linestyle = '--', linewidth = 0.8, alpha = 0.5)
plt.axhline(y = 17.5, color = 'black', linestyle = '--', linewidth = 0.8, alpha = 0.5)

# plt.savefig()

plt.tight_layout()
plt.show()

In [None]:
sentiment_results_sorted = reg_sentiment_results.sort_values(by = 'Neutral', ascending = [False])

# Normalize the Positive values for the colormap
norm = mcolors.Normalize(vmin = -30, vmax = 130)  
colors = cm.Grays(norm(sentiment_results_sorted['Neutral']))  

plt.figure(figsize = (10, 6), dpi = 500)
plt.barh(sentiment_results_sorted['Framework'], sentiment_results_sorted['Neutral'], color = colors)
plt.xlabel('Neutral Sentiment (%)')
plt.ylabel('Framework')
plt.title('Frameworks by Magnitude of Neutral-Aggregated Language')
plt.xlim(0, 100)
plt.gca().invert_yaxis()  

# plt.savefig()

plt.tight_layout()
plt.show()

In [None]:
sentiment_results_sorted = reg_sentiment_results.sort_values(by = ['Frontier', 'Neutral'], ascending = [True, False])

norm = mcolors.Normalize(vmin = -30, vmax = 130)  
colors = cm.Grays(norm(sentiment_results_sorted['Neutral'])) 

plt.figure(figsize = (10, 6), dpi = 500)
plt.barh(sentiment_results_sorted['Framework'], sentiment_results_sorted['Neutral'], color = colors)
plt.xlabel('Neutral Sentiment (%)')
plt.ylabel('Framework')
plt.title('Frameworks by Magnitude of Neutral-Aggregated Language (Grouped)')
plt.xlim(0, 100)
plt.gca().invert_yaxis() 

plt.axhline(y = 9.5, color = 'black', linestyle = '--', linewidth = 0.8, alpha = 0.5)
plt.axhline(y = 17.5, color = 'black', linestyle = '--', linewidth = 0.8, alpha = 0.5)

# plt.savefig()

plt.tight_layout()
plt.show()

In [None]:
reg_sentiment_results['Intensity'] = reg_sentiment_results['Positive'] + reg_sentiment_results['Negative']

sentiment_results_sorted = reg_sentiment_results.sort_values(by = 'Intensity', ascending = [False])

norm = mcolors.Normalize(vmin = -30, vmax = 130)  
colors = cm.Purples(norm(sentiment_results_sorted['Intensity']))  

plt.figure(figsize = (10, 6), dpi = 500)
plt.barh(sentiment_results_sorted['Framework'], sentiment_results_sorted['Intensity'], color = colors)
plt.xlabel('Intensity Sentiment (%)')
plt.ylabel('Framework')
plt.title('Frameworks by Magnitude of Intense-Aggregated Language')
plt.xlim(0, 100)
plt.gca().invert_yaxis()  

# plt.savefig()

plt.tight_layout()
plt.show()

In [None]:
sentiment_results_sorted = reg_sentiment_results.sort_values(by = ['Frontier', 'Intensity'], ascending = [True, False])

norm = mcolors.Normalize(vmin = -30, vmax = 130) 
colors = cm.Purples(norm(sentiment_results_sorted['Intensity']))

plt.figure(figsize = (10, 6), dpi = 500)
plt.barh(sentiment_results_sorted['Framework'], sentiment_results_sorted['Intensity'], color = colors)
plt.xlabel('Intense Sentiment (%)')
plt.ylabel('Framework')
plt.title('Frameworks by Magnitude of Intense-Aggregated Language (Grouped)')
plt.xlim(0, 100)
plt.gca().invert_yaxis() 

plt.axhline(y = 9.5, color = 'black', linestyle = '--', linewidth = 0.8, alpha = 0.5)
plt.axhline(y = 17.5, color = 'black', linestyle = '--', linewidth = 0.8, alpha = 0.5)

# plt.savefig()

plt.tight_layout()
plt.show()

In [None]:
reg_word_counts_results['Framework'] = reg_word_counts_results['Filename'].str.split('_').str[0]
reg_word_counts_results['Date'] = reg_word_counts_results['Filename'].str.split('_').str[1]
new_order = ['Filename', 'Framework', 'Date', "risk", "safe", "bias", "security", "ethic",
                      "accountab", "transparen", "explainab", "policy",
                      "compliance", "governance", "protect", "sustainab",
                      "fair", "catastroph", "responsib", "prepare"]
reg_word_counts_results = reg_word_counts_results[new_order]

reg_word_counts_results = reg_word_counts_results.merge(reg_length_results, on = 'Filename')

In [None]:
group_0_frameworks = ['AiVerify', 'ATI', 'CSA', 'DoS', 'HMG', 'ISOIEC', 'NIST', 'OECD', 'UNESCO', 'WEF']
group_1_frameworks = ['Anthropic', 'Cohere', 'Deepmind', 'OpenAI', 'Naver', 'META', 'xAI', 'Microsoft']
group_2_frameworks = ['Deloitte', 'G42', 'Grammarly', 'IBM', 'KPMG', 'Magic', 'NVDIA', 'PaloAlto', 'PwC', 'Amazon']

group_0_results = reg_word_counts_results[reg_word_counts_results['Framework'].isin(group_0_frameworks)]
group_1_results = reg_word_counts_results[reg_word_counts_results['Framework'].isin(group_1_frameworks)]
group_2_results = reg_word_counts_results[reg_word_counts_results['Framework'].isin(group_2_frameworks)]

word_totals_group_0 = group_0_results.iloc[:, 3:].sum(axis = 0).reset_index()
word_totals_group_0.columns = ['Word', 'Total']
word_totals_group_0 = word_totals_group_0[word_totals_group_0['Word'] != 'Length'] 
word_totals_group_0_sorted = word_totals_group_0.sort_values(by = 'Total', ascending = False)

word_totals_group_1 = group_1_results.iloc[:, 3:].sum(axis = 0).reset_index()
word_totals_group_1.columns = ['Word', 'Total']
word_totals_group_1 = word_totals_group_1[word_totals_group_1['Word'] != 'Length'] 
word_totals_group_1_sorted = word_totals_group_1.sort_values(by = 'Total', ascending = False)

word_totals_group_2 = group_2_results.iloc[:, 3:].sum(axis = 0).reset_index()
word_totals_group_2.columns = ['Word', 'Total']
word_totals_group_2 = word_totals_group_2[word_totals_group_2['Word'] != 'Length']
word_totals_group_2_sorted = word_totals_group_2.sort_values(by = 'Total', ascending = False)

In [None]:
word_mapping = {
    'risk': 'Risk', 
    'security': 'Security',
    'safe': 'Safe/Safety',
    'responsib': 'Responsible/Responsibility',
    'ethic': 'Ethics/Ethical',
    'transparen': 'Transparent/Transparency',
    'governance': 'Governance',
    'policy': 'Policy/Policymaker',
    'bias': 'Bias',
    'protect': 'Protect/Protection',
    'compliance': 'Compliance',
    'catastroph': 'Catastrophe/Catastrophic',
    'fair': 'Fair/Fairness',
    'prepare': 'Prepare/Preparedness',
    'accountab': 'Accountable/Accountability',
    'explainab': 'Explainable/Explainability',
    'sustainab': 'Sustainable/Sustainability',
}

word_totals_group_0_sorted['Word'] = word_totals_group_0_sorted['Word'].replace(word_mapping)
word_totals_group_1_sorted['Word'] = word_totals_group_1_sorted['Word'].replace(word_mapping)
word_totals_group_2_sorted['Word'] = word_totals_group_2_sorted['Word'].replace(word_mapping)

plt.figure(figsize = (12, 6), dpi = 500, constrained_layout = True)
plt.bar(word_totals_group_0_sorted['Word'], word_totals_group_0_sorted['Total'], color = 'mediumaquamarine', alpha = .8)
plt.xticks(rotation = 45, ha = 'right')
plt.xlabel('Word')
plt.ylabel('Count')
plt.title('Word Usage (AI Governance and Standards Bodies)')

# plt.savefig()

plt.show()

In [None]:
plt.figure(figsize = (12, 6), dpi = 500, constrained_layout = True)
plt.bar(word_totals_group_1_sorted['Word'], word_totals_group_1_sorted['Total'], color = 'mediumseagreen', alpha = .8)
plt.xticks(rotation = 45, ha = 'right')
plt.xlabel('Word')
plt.ylabel('Count')
plt.title('Word Usage (Frontier AI Companies)')

# plt.savefig()

plt.show()

In [None]:
plt.figure(figsize = (12, 6), dpi = 500, constrained_layout = True)
plt.bar(word_totals_group_2_sorted['Word'], word_totals_group_2_sorted['Total'], color = 'seagreen', alpha = .8)
plt.xticks(rotation = 45, ha = 'right')
plt.xlabel('Word')
plt.ylabel('Count')
plt.title('Term Usage (AI-Adopting/Enterprise AI Companies)')

# plt.savefig()

plt.show()

In [None]:
# Melt the DataFrame to reshape it
melted_df = reg_word_counts_results.melt(
    id_vars=["Filename", "Framework", "Date", 'Length'], 
    var_name="Word", 
    value_name="Count"
)

melted_df['Word'] = melted_df['Word'].replace(word_mapping)

melted_df['Normalised_Count'] = melted_df['Count'] / melted_df['Length']

In [None]:
frontier_mapping = {
    'Amazon': 2, 'Anthropic': 1, 'Cohere': 1,
    'Deepmind': 1, 'Deloitte': 2, 'G42': 2,
    'Grammarly': 2, 'IBM': 2, 'KPMG': 2, 'Magic': 2,
    'META': 1, 'Microsoft': 1,'Naver': 1, 'NVDIA': 2,
    'OpenAI': 1, 'PaloAlto': 2, 'PwC': 2, 'xAI': 1,
    'AIVerify': 0, 'ATI': 0, 'CSA': 0, 'DoS': 0,
    'HMG': 0, 'ISOIEC': 0, 'NIST': 0, 'OECD': 0, 
    'UNESCO': 0, 'WEF': 0
}

melted_df['Frontier'] = melted_df['Framework'].map(frontier_mapping)

grouped_words = melted_df.groupby(['Frontier', 'Word'])['Normalised_Count'].mean().reset_index()
grouped_words['Frontier'] = grouped_words['Frontier'].astype(int)

top_words = (
    grouped_words.groupby('Frontier', group_keys=False)
    .apply(lambda x: x.nlargest(8, 'Normalised_Count'))
)

In [None]:
heatmap_data = grouped_words.pivot(index = 'Word', columns = 'Frontier', values = 'Normalised_Count').fillna(0)

plt.figure(figsize = (10, 8), dpi = 500)
sns.heatmap(heatmap_data, annot = True, 
            fmt=".4f", cmap="Oranges", 
            cbar_kws={'label': 'Normalised Count (%)'}, 
            vmin=0, vmax=heatmap_data.values.max() * 1.5,  
            #linewidth = 0.3, 
            #linecolor = "black"
)
plt.title('Term Frequency Heatmap Among Frameworks (Normalised)')
plt.ylabel('Term')
plt.xlabel('Framework Publisher')
plt.xticks(ticks = [0.5, 1.5, 2.5], labels = ['AI Governance and Standards Bodies', 'Frontier AI Companies', 'AI-Adopting/Enterprise AI Companies'])
plt.tight_layout()

# plt.savefig()

plt.show()