This is a notebook for computing the data statistics.

The result will be saved into the `global_explanations_data` dictionary of `xai_models/models/modules/explainers/ai_comment_statistics.py`

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import colors
from sklearn import preprocessing

import sys
sys.path.append("/data/hua/workspace/projects/convxai")
from convxai.xai_models.models.modules.explainers.example_explainer import ExampleExplainer
from convxai.writing_models.models.diversity_model import diversity_model_label_mapping, label_mapping


2022-11-28 18:54:01 INFO: Checking for updates to resources.json in case models have been updated.  Note: this behavior can be turned off with download_method=None or download_method=DownloadMethod.REUSE_RESOURCES
2022-11-28 18:54:01,239 - Checking for updates to resources.json in case models have been updated.  Note: this behavior can be turned off with download_method=None or download_method=DownloadMethod.REUSE_RESOURCES


Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.4.1.json:   0%|   …

2022-11-28 18:54:03 INFO: Loading these models for language: en (English):
| Processor    | Package   |
----------------------------
| tokenize     | combined  |
| pos          | combined  |
| lemma        | combined  |
| depparse     | combined  |
| sentiment    | sstplus   |
| constituency | wsj       |
| ner          | ontonotes |

2022-11-28 18:54:03,246 - Loading these models for language: en (English):
| Processor    | Package   |
----------------------------
| tokenize     | combined  |
| pos          | combined  |
| lemma        | combined  |
| depparse     | combined  |
| sentiment    | sstplus   |
| constituency | wsj       |
| ner          | ontonotes |

2022-11-28 18:54:03 INFO: Use device: gpu
2022-11-28 18:54:03,269 - Use device: gpu
2022-11-28 18:54:03 INFO: Loading: tokenize
2022-11-28 18:54:03,271 - Loading: tokenize
2022-11-28 18:54:07 INFO: Loading: pos
2022-11-28 18:54:07,560 - Loading: pos
2022-11-28 18:54:08 INFO: Loading: lemma
2022-11-28 18:54:08,152 - Loading: 

Initializations

In [114]:
global_explanations_data = {
    "ACL": {
        "paper_count": 0,
        "sentence_count": 0,
        "sentence_length": {},
        "sentence_score_range": {},
        "abstract_score_range": {},
        "Aspect_Patterns_dict": {}
    }, 
    "CHI": {
        "paper_count": 0,
        "sentence_count": 0,
        "sentence_length": {},
        "sentence_score_range": {},
        "abstract_score_range": {},
        "Aspect_Patterns_dict": {}
    }, 
    "ICLR": {
        "paper_count": 0,
        "sentence_count": 0,
        "sentence_length": {},
        "sentence_score_range": {},
        "abstract_score_range": {},
        "Aspect_Patterns_dict": {}
    }}

Load Data

In [115]:
### Load data
acl_df = pd.read_csv("./ACL.csv")
chi_df = pd.read_csv("./CHI.csv")
iclr_df = pd.read_csv("./ICLR.csv")

acl_example_explainer = ExampleExplainer("ACL")
chi_example_explainer = ExampleExplainer("CHI")
iclr_example_explainer = ExampleExplainer("ICLR")


type = 'percentage'

print(diversity_model_label_mapping)

{0: 'background', 1: 'purpose', 2: 'method', 3: 'finding', 4: 'other'}


In [116]:
def get_thresholds(score_list, label_list=None, label = None, type='percentage', threshold_level = 0):  
    """Type = ['deviation', 'percentage'].
    percentage: [0, 33, 50, 66, 99.9]
    deviation_score: [-2, -1, 0, 1, 2]
    threshold_level: [0,1,2,3,4]
    """
    thresholds = {
        # 'percentage': [1, 33, 50, 66, 99],
        # 'percentage': [0, 33, 50, 66, 99.9],
        'percentage': [20, 40, 50, 60, 80],
        'deviation': [-2, -1, 0, 1, 2]
    }
    if label is not None:
        if type == 'deviation':
            return round(np.mean(np.nan_to_num(score_list)[np.where(label_list == label)[0]]) + thresholds[type][threshold_level] * np.std(score_list[np.where(label_list == label)[0]]), 4)
        elif type == 'percentage':
            return round(np.percentile(np.nan_to_num(score_list)[np.where(label_list == label)[0]], thresholds[type][threshold_level], axis=0), 4)
    else:
        if type == 'deviation':
            return round(np.mean(np.nan_to_num(score_list)) + thresholds[type][threshold_level] * np.std(score_list), 4)
        elif type == 'percentage':
            return round(np.percentile(np.nan_to_num(score_list), thresholds[type][threshold_level], axis=0), 4)

def save_statistics_to_dict(score_list, label_list=None, label = None, type = 'percentage', threshold_levels=[0, 1, 2, 3, 4]):
    scores = [get_thresholds(score_list, label_list=label_list, label = label, type=type, threshold_level = threshold_level) for threshold_level in threshold_levels]
    return scores


In [117]:
def visualize_distribution(score_list, n_bins = 24):
    fig, axs = plt.subplots(1, 1, tight_layout=True)
    axs.hist(score_list, bins=n_bins)

### \#Paper Count | \#Sentence Count

In [118]:
acl_id = acl_df['id']
chi_id = chi_df['id']
iclr_id = iclr_df['id']
print(f"======>>> ACL Paper Count ={acl_id.max() + 1}")
print(f"======>>> CHI Paper Count ={chi_id.max() + 1}")
print(f"======>>> ICLR Paper Count ={iclr_id.max() + 1}")

print('==============================================================')
print(f"======>>> ACL Sentence Count ={len(acl_id)}")
print(f"======>>> CHI Sentence Count ={len(chi_id)}")
print(f"======>>> ICLR Sentence Count ={len(iclr_id)}")



global_explanations_data["ACL"]["paper_count"] = acl_id.max() + 1
global_explanations_data["CHI"]["paper_count"] = chi_id.max() + 1
global_explanations_data["ICLR"]["paper_count"] = iclr_id.max() + 1

global_explanations_data["ACL"]["sentence_count"] = len(acl_id)
global_explanations_data["CHI"]["sentence_count"] = len(chi_id)
global_explanations_data["ICLR"]["sentence_count"] = len(iclr_id)
print('==============================================================')
print(f"global_explanations_data:", global_explanations_data)

global_explanations_data: {'ACL': {'paper_count': 3221, 'sentence_count': 20744, 'sentence_length': {}, 'sentence_score_range': {}, 'abstract_score_range': {}, 'Aspect_Patterns_dict': {}}, 'CHI': {'paper_count': 3235, 'sentence_count': 21643, 'sentence_length': {}, 'sentence_score_range': {}, 'abstract_score_range': {}, 'Aspect_Patterns_dict': {}}, 'ICLR': {'paper_count': 3479, 'sentence_count': 25873, 'sentence_length': {}, 'sentence_score_range': {}, 'abstract_score_range': {}, 'Aspect_Patterns_dict': {}}}


### Sentence Length

In [119]:

acl_label_list = np.array(acl_df['aspect'])
chi_label_list = np.array(chi_df['aspect'])
iclr_label_list = np.array(iclr_df['aspect'])

acl_token_count_list = acl_df['token_count']
chi_token_count_list = chi_df['token_count']
iclr_token_count_list = iclr_df['token_count']

global_explanations_data["ACL"]["sentence_length"]["all"] = save_statistics_to_dict(acl_token_count_list, type =type)
global_explanations_data["CHI"]["sentence_length"]["all"] = save_statistics_to_dict(chi_token_count_list, type =type)
global_explanations_data["ICLR"]["sentence_length"]["all"] = save_statistics_to_dict(iclr_token_count_list, type =type)

for label_idx, label in diversity_model_label_mapping.items():
    print(f"label_idx={label_idx}, label={label}")
    global_explanations_data["ACL"]["sentence_length"][label] = save_statistics_to_dict(acl_token_count_list, acl_label_list, label_idx, type =type)
    global_explanations_data["CHI"]["sentence_length"][label] = save_statistics_to_dict(chi_token_count_list, chi_label_list, label_idx, type = type)
    global_explanations_data["ICLR"]["sentence_length"][label] = save_statistics_to_dict(iclr_token_count_list, iclr_label_list, label_idx, type = type)

global_explanations_data

label_idx=0, label=background
label_idx=1, label=purpose
label_idx=2, label=method
label_idx=3, label=finding
label_idx=4, label=other


{'ACL': {'paper_count': 3221,
  'sentence_count': 20744,
  'sentence_length': {'all': [17.0, 22.0, 24.0, 27.0, 33.0],
   'background': [16.0, 20.0, 22.0, 24.0, 30.0],
   'purpose': [19.0, 23.0, 26.0, 28.0, 34.0],
   'method': [18.0, 23.0, 25.0, 27.0, 35.0],
   'finding': [17.0, 22.0, 25.0, 27.0, 34.0],
   'other': [2.0, 3.0, 3.5, 4.0, 5.4]},
  'sentence_score_range': {},
  'abstract_score_range': {},
  'Aspect_Patterns_dict': {}},
 'CHI': {'paper_count': 3235,
  'sentence_count': 21643,
  'sentence_length': {'all': [17.0, 21.0, 24.0, 26.0, 33.0],
   'background': [15.0, 20.0, 22.0, 24.0, 29.0],
   'purpose': [19.0, 24.0, 26.0, 28.0, 35.0],
   'method': [18.0, 23.0, 25.0, 28.0, 35.0],
   'finding': [17.0, 22.0, 24.0, 27.0, 33.0],
   'other': [2.0, 3.8, 6.5, 7.2, 10.0]},
  'sentence_score_range': {},
  'abstract_score_range': {},
  'Aspect_Patterns_dict': {}},
 'ICLR': {'paper_count': 3479,
  'sentence_count': 25873,
  'sentence_length': {'all': [18.0, 23.0, 25.0, 28.0, 35.0],
   'backgr

### Sentence Score Range

In [120]:
# acl_ppl_range
# visualize_distribution(acl_paper_score)

In [121]:

acl_ppl_range = acl_df['perplexity']
chi_ppl_range = chi_df['perplexity']
iclr_ppl_range = iclr_df['perplexity']

global_explanations_data["ACL"]["sentence_score_range"]["all"] = save_statistics_to_dict(acl_ppl_range, type =type)
global_explanations_data["CHI"]["sentence_score_range"]["all"] = save_statistics_to_dict(chi_ppl_range, type =type)
global_explanations_data["ICLR"]["sentence_score_range"]["all"] = save_statistics_to_dict(iclr_ppl_range, type =type)
global_explanations_data



for label_idx, label in diversity_model_label_mapping.items():
    print(f"label_idx={label_idx}, label={label}")
    global_explanations_data["ACL"]["sentence_score_range"][label] = save_statistics_to_dict(acl_ppl_range, acl_label_list, label_idx, type = type)
    global_explanations_data["CHI"]["sentence_score_range"][label] = save_statistics_to_dict(chi_ppl_range, chi_label_list, label_idx, type = type)
    global_explanations_data["ICLR"]["sentence_score_range"][label] = save_statistics_to_dict(iclr_ppl_range, iclr_label_list, label_idx, type = type)

global_explanations_data




label_idx=0, label=background
label_idx=1, label=purpose
label_idx=2, label=method
label_idx=3, label=finding
label_idx=4, label=other


{'ACL': {'paper_count': 3221,
  'sentence_count': 20744,
  'sentence_length': {'all': [17.0, 22.0, 24.0, 27.0, 33.0],
   'background': [16.0, 20.0, 22.0, 24.0, 30.0],
   'purpose': [19.0, 23.0, 26.0, 28.0, 34.0],
   'method': [18.0, 23.0, 25.0, 27.0, 35.0],
   'finding': [17.0, 22.0, 25.0, 27.0, 34.0],
   'other': [2.0, 3.0, 3.5, 4.0, 5.4]},
  'sentence_score_range': {'all': [27.6981,
    40.9609,
    48.5276,
    57.8652,
    85.9444],
   'background': [27.191, 39.669, 46.89, 56.4117, 83.9507],
   'purpose': [27.6238, 38.2317, 43.4746, 50.4653, 70.827],
   'method': [35.569, 49.9949, 57.9529, 66.9595, 98.9945],
   'finding': [23.6077, 37.394, 45.3252, 54.585, 84.4604],
   'other': [75.6189, 99.9501, 133.1788, 183.2107, 320.2562]},
  'abstract_score_range': {},
  'Aspect_Patterns_dict': {}},
 'CHI': {'paper_count': 3235,
  'sentence_count': 21643,
  'sentence_length': {'all': [17.0, 21.0, 24.0, 26.0, 33.0],
   'background': [15.0, 20.0, 22.0, 24.0, 29.0],
   'purpose': [19.0, 24.0, 26.

### Abstract Score Range

In [122]:
###### Aspect List ######
acl_paper_score = []
for i in range(acl_id.max()+1):
    acl_df_filter_ppl=acl_df.query(f"id == {i}")['perplexity']
    acl_paper_score.append(np.mean(acl_df_filter_ppl))
chi_paper_score = []
for i in range(chi_id.max()+1):
    chi_df_filter_ppl=chi_df.query(f"id == {i}")['perplexity']
    chi_paper_score.append(np.mean(chi_df_filter_ppl))
iclr_paper_score = []
for i in range(iclr_id.max()+1):
    iclr_df_filter_ppl=iclr_df.query(f"id == {i}")['perplexity']
    iclr_paper_score.append(np.mean(iclr_df_filter_ppl))

# def normalize(data):
#     normalized_data = (data - np.min(data)) / (np.percentile(data, 99.9) - np.min(data))
#     return normalized_data
# acl_paper_score = normalize(acl_paper_score)
# chi_paper_score = normalize(chi_paper_score)
# iclr_paper_score = normalize(iclr_paper_score)


global_explanations_data["ACL"]["abstract_score_range"] = save_statistics_to_dict(acl_paper_score, type = type)
global_explanations_data["CHI"]["abstract_score_range"] = save_statistics_to_dict(chi_paper_score, type = type)
global_explanations_data["ICLR"]["abstract_score_range"] = save_statistics_to_dict(iclr_paper_score, type = type)
global_explanations_data


{'ACL': {'paper_count': 3221,
  'sentence_count': 20744,
  'sentence_length': {'all': [17.0, 22.0, 24.0, 27.0, 33.0],
   'background': [16.0, 20.0, 22.0, 24.0, 30.0],
   'purpose': [19.0, 23.0, 26.0, 28.0, 34.0],
   'method': [18.0, 23.0, 25.0, 27.0, 35.0],
   'finding': [17.0, 22.0, 25.0, 27.0, 34.0],
   'other': [2.0, 3.0, 3.5, 4.0, 5.4]},
  'sentence_score_range': {'all': [27.6981,
    40.9609,
    48.5276,
    57.8652,
    85.9444],
   'background': [27.191, 39.669, 46.89, 56.4117, 83.9507],
   'purpose': [27.6238, 38.2317, 43.4746, 50.4653, 70.827],
   'method': [35.569, 49.9949, 57.9529, 66.9595, 98.9945],
   'finding': [23.6077, 37.394, 45.3252, 54.585, 84.4604],
   'other': [75.6189, 99.9501, 133.1788, 183.2107, 320.2562]},
  'abstract_score_range': [40.7945, 51.096, 56.5085, 62.6393, 80.277],
  'Aspect_Patterns_dict': {}},
 'CHI': {'paper_count': 3235,
  'sentence_count': 21643,
  'sentence_length': {'all': [17.0, 21.0, 24.0, 26.0, 33.0],
   'background': [15.0, 20.0, 22.0, 24

### Aspect_Patterns_dict

In [123]:
global_explanations_data["ACL"]["Aspect_Patterns_dict"] = {
                    "00122233": "'background' (25%)   -&gt; 'purpose' (12.5%) -&gt; 'method'  (37.5%) -&gt; 'finding' (25%)",
                    "001233": "'background' (33.3%) -&gt; 'purpose' (16.7%) -&gt; 'method'  (16.7%) -&gt; 'finding' (33.3%)",
                    "0002233": "'background' (42.9%) -&gt; 'method'  (28.6%) -&gt; 'finding' (28.5%)",
                    "000133": "'background' (50%)   -&gt; 'purpose' (16.7%) -&gt; 'finding' (33.3%)",
                    "00323333": "'background' (25%)   -&gt; 'finding' (12.5%) -&gt; 'method'  (12.5%) -&gt; 'finding' (50%)",
                }

global_explanations_data["CHI"]["Aspect_Patterns_dict"] = {
                    "0001333": "'background' (42.9%) -&gt; 'purpose' (14.3%)  -&gt; 'finding' (42.9%)",
                    "001222333": "'background' (22.2%) -&gt; 'purpose' (11.2%) -&gt; 'method' (33.3%) -&gt; 'finding' (33.3%)",
                    "001233": "'background' (33.3%) -&gt; 'purpose' (16.7%) -&gt; 'method' (16.7%)  -&gt; 'finding' (33.3%)",
                    "002333": "'background' (33.3%) -&gt; 'method' (16.7%)  -&gt;  'finding' (50%)",
                    "000300100323333": "'background' (20%)   -&gt; 'finding' (6.7%)  -&gt;  'background' (13.3%) -&gt; 'purpose' (6.7%) -&gt; 'background' (13.3%) -&gt; 'finding' (6.7%) -&gt; 'method' (6.7%) -&gt; 'finding' (26.7%)"
                }

global_explanations_data["ICLR"]["Aspect_Patterns_dict"] = {
                    "001233": "'background' (33.3%) -&gt; 'purpose' (16.7%) -&gt; 'method' (16.7%) -&gt; 'finding' (33.3%)",
                    "23333": "'Method' (20%) -&gt; 'finding' (80%)",
                    "0001333": "'background' (42.9%) -&gt; 'purpose' (14.2) -&gt; 'finding' (42.9%)",
                    "00000232333": "'background' (45.5%) -&gt; 'method' (9.1%) -&gt; 'finding' (9.1%) -&gt; 'method' (9.1%) -&gt; 'finding' (27.3%)",
                    "001222333": "'Background' (22.2%) -&gt; 'purpose' (11.1%) -&gt; 'method' (33.3%) -&gt; 'finding' (33.4%)",
                }
global_explanations_data

{'ACL': {'paper_count': 3221,
  'sentence_count': 20744,
  'sentence_length': {'all': [17.0, 22.0, 24.0, 27.0, 33.0],
   'background': [16.0, 20.0, 22.0, 24.0, 30.0],
   'purpose': [19.0, 23.0, 26.0, 28.0, 34.0],
   'method': [18.0, 23.0, 25.0, 27.0, 35.0],
   'finding': [17.0, 22.0, 25.0, 27.0, 34.0],
   'other': [2.0, 3.0, 3.5, 4.0, 5.4]},
  'sentence_score_range': {'all': [27.6981,
    40.9609,
    48.5276,
    57.8652,
    85.9444],
   'background': [27.191, 39.669, 46.89, 56.4117, 83.9507],
   'purpose': [27.6238, 38.2317, 43.4746, 50.4653, 70.827],
   'method': [35.569, 49.9949, 57.9529, 66.9595, 98.9945],
   'finding': [23.6077, 37.394, 45.3252, 54.585, 84.4604],
   'other': [75.6189, 99.9501, 133.1788, 183.2107, 320.2562]},
  'abstract_score_range': [40.7945, 51.096, 56.5085, 62.6393, 80.277],
  'Aspect_Patterns_dict': {'00122233': "'background' (25%)   -&gt; 'purpose' (12.5%) -&gt; 'method'  (37.5%) -&gt; 'finding' (25%)",
   '001233': "'background' (33.3%) -&gt; 'purpose' (16

Save to .py file

In [124]:
import json
def np_encoder(object):
    if isinstance(object, np.generic):
        return object.item()

with open(f'ai_comment_statistics_{type}.json', 'w') as fp:
    json.dump(global_explanations_data, fp, indent=2, default=np_encoder)

