In [1]:
import json
import pandas as pd
import numpy as np
import math
from statistics import NormalDist

import seaborn as sns; sns.set_theme()
import matplotlib.pyplot as plt

pd.options.display.float_format = '{:.2f}'.format

### SETUP 

In [2]:
def param_default():
    return {
        'dataset' : 'code_completion_random_cut_5k_30_512_tokens',
        #'dataset' : 'code_completion_docstring_random_cut_3.8k_30_150_tokens',
        #'dataset' : 'code_completion_docstring_signature_3.8k_30_150_tokens',
        #'dataset' : 'code_completion_docstring_5k_30_150_tokens',
        'global_ast_results': '/workspaces/code-rationales/data/global_ast_results/gpt',
        'global_taxonomy_results': '/workspaces/code-rationales/data/global_taxonomy_results/gpt',
        'num_experiments' : 30, 
        'bootstrapping': 30
    }
params = param_default()

### Taxonomy Classification

In [3]:
def global_groups() -> dict:
    return {
        'sc_semantic': ['exceptions', 'oop', 'asserts', 'types', 'conditionals', 'loops', 'bool', 'structural', 'statements', 'with'], 
        'sc_nl': ['identifier', 'comment', 'string'],
        'sc_not_semantic': ['punctuation', 'operators', 'indentation', 'functional', 'return', 'expression', 'unknown'], 
        'sc_errors' : ['errors'], 
        'nl_semantic': ['nl_verb', 'nl_noun', 'nl_pronoun', 'nl_adjetive'],
        'nl_not_semantic' : ['nl_adverb', 'nl_determier', 'nl_preposition', 'nl_particle', 'nl_modal', 'nl_conjunction', 'nl_cardinal', 'nl_list', 'nl_other']
    }

### Load Aggregation results

In [4]:
get_experiment_path =  lambda results_folder, dataset, exp: results_folder + '/' + dataset + '_exp_' + str(exp) +'.txt'


In [5]:
def get_experiments_results(results_folder):
    experiment_paths = [get_experiment_path(results_folder, params['dataset'], exp) for exp in range(params['num_experiments'])]
    experiment_global_results = []
    for experiment_path in experiment_paths:
        with open(experiment_path, 'r') as file:
            experiment_global_results.append(json.loads(file.read()))
    return experiment_global_results


### Dictionary Flatting

In [6]:
def flat_global_results(experiment_global_result: dict):
    flatten_results = { key: [] for key in experiment_global_result.keys() }
    for target_key, rationales in experiment_global_result.items():
        for rational_key, rational_values in rationales.items():
            flatten_results[rational_key] += rational_values
    return flatten_results

### Statistics

In [7]:
def bootstrapping( np_data, np_func, size ):
    """Create a bootstrap sample given data and a function
    For instance, a bootstrap sample of means, or mediands. 
    The bootstrap replicates are a long as the original size
    we can choose any observation more than once (resampling with replacement:np.random.choice)
    """
    
    #Cleaning NaNs
    #np_data_clean = np_data[ np.logical_not( np.isnan(np_data) ) ] 
    
    #The size of the bootstrap replicate is as big as size
    #Creating the boostrap replicates as long as the orignal data size
    #This strategy might work as imputation 
    bootstrap_repl = [ np_func( np.random.choice( np_data, size=len(np_data) ) ) for i in range( size ) ]
    
    #logging.info("Covariate: " + cov) #Empirical Mean
    #logging.info("Empirical Mean: " + str(np.mean(np_data_clean))) #Empirical Mean
    #logging.info("Bootstrapped Mean: " + str( np.mean(bootstrap_repl) ) ) #Bootstrapped Mean
    
    return np.array( bootstrap_repl )

In [8]:
def confidence_intervals_large_samples(data, confidence=0.95):
    """
    @confidence: confidence interval 
    @return: tuple (lowerbound, uperbound, h-value)
    """
    dist = NormalDist.from_samples( data )
    z = NormalDist().inv_cdf((1 + confidence) / 2.)
    h = dist.stdev * z / ((len(data) - 1) ** .5)
    return dist.mean - h, dist.mean + h, h

In [9]:
def boostrap_dictionary(dictionary: dict, size):
    boostrapped_dict = {}
    for key, values in dictionary.items():
        if values:
            boostrapped_dict[key] = bootstrapping(values, np.mean, size)
    return boostrapped_dict

### Data processing

In [10]:
def get_experiments_dfs(experients_global_results):
    experiment_dfs = []
    for experiment in experients_global_results:
        experiment = flat_global_results(experiment)
        experiment = boostrap_dictionary(experiment, params['bootstrapping'])
        experiment_dfs.append(pd.DataFrame(experiment))
    return experiment_dfs

### Analysis - Rational Concept - Countings

In [11]:
def create_frequencies_dataframe(experients_global_results: list):
    frequencies_df = pd.DataFrame(columns=experients_global_results[0].keys())
    for experiment_global_result in experients_global_results:
        rationales_results = flat_global_results(experiment_global_result)
        frequencies_df = frequencies_df.append({key: len(value) for key, value in rationales_results.items()}, ignore_index=True)
    frequencies_df = frequencies_df.fillna(0)   
    return frequencies_df


### Analysis - Rational Concept - Bootstrapping

In [12]:
def create_bootstapped_dataframe(experients_global_results:list):
    bootstrapped_df = pd.DataFrame(columns=experients_global_results[0].keys())
    for experiment_global_result in experients_global_results:
        rationales_results = flat_global_results(experiment_global_result)
        rationales_results = boostrap_dictionary(rationales_results, params['bootstrapping'])
        bootstrapped_df = bootstrapped_df.append({key: np.median(value) for key, value in rationales_results.items()}, ignore_index=True)
    bootstrapped_df = bootstrapped_df.fillna(0)
    return bootstrapped_df

### Calculate statistics and get results DF

In [13]:
### CREATE GROUP FREQUENCY RESULTS
def calculate_results(global_dataframe):
    results_df = pd.DataFrame(columns=['type', 'group', 'category', 'mean', 'median', 'std', 'ci'])
    for category in global_dataframe.columns:
        experiments_values = global_dataframe[category].tolist()
        try:
            group = [key for key, value in global_groups().items() if category in value][0]
        except Exception as e:
            print(e)
        results_df = results_df.append({
            'type': group.split('_')[0], 
            'group': group, 
            'category': category, 
            'median' : np.median(experiments_values),
            'mean' : np.mean(experiments_values), 
            'std' : np.std(experiments_values),
            'ci' : confidence_intervals_large_samples(experiments_values)
        }, ignore_index=True)
    return results_df.sort_values(by='median', ascending=False).reset_index(drop=True)

### Execute Analysis

In [14]:
experients_global_results = get_experiments_results(params['global_taxonomy_results'])
global_frequencies_dataframe = create_frequencies_dataframe(experients_global_results)
global_bootstrapped_dataframe = create_bootstapped_dataframe(experients_global_results)

In [15]:
calculate_results(global_frequencies_dataframe)

Unnamed: 0,type,group,category,mean,median,std,ci
0,sc,sc_errors,errors,179914.333333,179867.0,6623.76916,"(177462.35905934658, 182366.3076073201, 2451.9..."
1,sc,sc_not_semantic,unknown,165232.733333,165521.5,5660.635211,"(163137.29015337466, 167328.176513292, 2095.44..."
2,sc,sc_not_semantic,expression,117287.033333,114360.5,22630.593328,"(108909.6833228153, 125664.38334385138, 8377.3..."
3,sc,sc_nl,identifier,69317.2,69290.0,2107.56889,"(68537.02415537018, 70097.37584462982, 780.175..."
4,sc,sc_semantic,structural,69068.333333,68666.5,4086.335215,"(67555.6615328142, 70581.00513385245, 1512.671..."
5,nl,nl_semantic,nl_noun,68193.7,67616.5,2174.666812,"(67388.68597421749, 68998.71402578251, 805.014..."
6,sc,sc_not_semantic,punctuation,36566.266667,36409.0,1064.522144,"(36172.203883291535, 36960.329450041805, 394.0..."
7,sc,sc_semantic,statements,26452.033333,26832.5,4754.09339,"(24692.17208099689, 28211.894585669776, 1759.8..."
8,sc,sc_semantic,types,8419.033333,8428.0,1833.568824,"(7740.2863122416, 9097.780354425066, 678.74702..."
9,sc,sc_nl,string,6743.8,6815.0,833.601,"(6435.219166864138, 7052.380833135862, 308.580..."


In [16]:
calculate_results(global_bootstrapped_dataframe)

Unnamed: 0,type,group,category,mean,median,std,ci
0,sc,sc_semantic,oop,0.104211,0.103279,0.015051,"(0.09863951626339343, 0.10978250999320346, 0.0..."
1,sc,sc_semantic,bool,0.100055,0.096488,0.033093,"(0.08780436337654557, 0.11230492198344161, 0.0..."
2,sc,sc_semantic,loops,0.09218,0.091597,0.01539,"(0.08648305476856498, 0.09787727553686422, 0.0..."
3,sc,sc_not_semantic,functional,0.084218,0.079638,0.020536,"(0.0766157556641589, 0.09181940809301875, 0.00..."
4,sc,sc_semantic,conditionals,0.070443,0.072185,0.012948,"(0.06564963849333061, 0.07523570807851052, 0.0..."
5,sc,sc_semantic,exceptions,0.106895,0.071418,0.125494,"(0.06043955872886983, 0.15335009716699438, 0.0..."
6,sc,sc_not_semantic,operators,0.066394,0.065529,0.005335,"(0.06441923901584683, 0.068369042524367, 0.001..."
7,nl,nl_semantic,nl_noun,0.06529,0.065174,0.002122,"(0.06450502703911538, 0.06607575419553526, 0.0..."
8,sc,sc_nl,identifier,0.064774,0.064984,0.002166,"(0.06397181893584901, 0.06557570457411317, 0.0..."
9,sc,sc_not_semantic,unknown,0.064975,0.064935,0.002062,"(0.06421195366814596, 0.06573821398026312, 0.0..."


### Visualization 

In [None]:
def print_heatmap(
        np_rationales, 
        fig_size = (10,10), 
        font = 15
):
        plt.rcParams.update({'font.size': font})
        ax = plt.axes()
        sns.set(rc = {'figure.figsize':fig_size})
        sns.heatmap(np_rationales, cmap="YlGnBu", 
                yticklabels=yticklabels, 
                xticklabels=xticklabels,
                ax=ax)
        plt.show
        pass