# Experiments [1.1.1] [1.1.2] - Rationales Frequencies

In [7]:
def param_default():
    return {
        'model': 'codeparrot',
        'dataset' : 'DC_SG_BD', #### CHANGE
        'modality' : 'nl_sc',
        'datasets' : {
            'SG_BD' : 'code_completion_random_cut_5k_30_512_tokens', 
            'DC_SG_BD' : 'code_completion_docstring_random_cut_3.8k_30_150_tokens', 
            'DC_SG' : 'code_completion_docstring_signature_3.8k_30_150_tokens', 
            'DC': 'code_completion_docstring_5k_30_150_tokens'
        },
        'num_experiments' : 30, 
        'bootstrapping_size': 500,
        ######## INPUT 
        'global_taxonomy_results': '/workspaces/code-rationales/data/global_taxonomy_results/gpt',
        'grouping_results': '/workspaces/code-rationales/data/experiments/grouping_results', 
        ######## OUTPUT
        'quantitative_results' : '/workspaces/code-rationales/data/experiments/quantitative_results',
    }
    
params = param_default()

## Source Code

In [8]:
import json
import pandas as pd
import numpy as np
import math
from statistics import NormalDist

import seaborn as sns; sns.set_theme()
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap

pd.options.display.float_format = '{:.2f}'.format

In [9]:
from code_rationales.taxonomies import *
import os.path

### Frequency Dataframes

In [10]:
def flat_global_results(experiment_global_result: dict):
    flatten_results = { key: [] for key in experiment_global_result.keys() } ## There are 31 fixed categories
    for target_key, rationales in experiment_global_result.items():
        for rational_key, rational_values in rationales.items():
            flatten_results[rational_key] += rational_values
    return flatten_results

In [11]:
def create_frequencies_dataframe(experients_global_results: list):
    frequencies_df = pd.DataFrame(columns=experients_global_results[0].keys())
    for experiment_global_result in experients_global_results:
        rationales_results = flat_global_results(experiment_global_result)
        frequencies_df = frequencies_df.append({key: len(value) for key, value in rationales_results.items()}, ignore_index=True)
    frequencies_df = frequencies_df.fillna(0)   
    return frequencies_df


### Groupings

In [12]:
get_experiment_path =  lambda results_folder, dataset, exp: results_folder + '/' + dataset + '_exp_' + str(exp) +'.txt'

In [13]:
def store_groupings(global_results: list, path:str):
    with open(path, 'w') as output_file: 
        json.dump(global_results, output_file)

In [14]:
def open_groupings(path:str):
    with open(path, 'r') as output_file:
        data = json.load(output_file)
    return data

In [15]:
def get_experiments_results(results_folder, name:str):
    file_path = params['grouping_results'] + '/' + params['model'] + '/' + params['modality'] + '/' + params['datasets'][params['dataset']] + '_' + name + '.json'
    if os.path.isfile(file_path):
        return open_groupings(file_path)
    experiment_paths = [get_experiment_path(results_folder, params['datasets'][params['dataset']], exp) for exp in range(params['num_experiments'])]
    experiment_global_results = []
    for experiment_path in experiment_paths:
        with open(experiment_path, 'r') as file:
            experiment_global_results.append(json.loads(file.read()))
    store_groupings(experiment_global_results, file_path)
    return experiment_global_results

### Statistics 

In [16]:
def bootstrapping( np_data, np_func, size ):
    """Create a bootstrap sample given data and a function
    For instance, a bootstrap sample of means, or mediands. 
    The bootstrap replicates are a long as the original size
    we can choose any observation more than once (resampling with replacement:np.random.choice)
    """
    
    #Cleaning NaNs
    #np_data_clean = np_data[ np.logical_not( np.isnan(np_data) ) ] 
    
    #The size of the bootstrap replicate is as big as size
    #Creating the boostrap replicates as long as the orignal data size
    #This strategy might work as imputation 
    bootstrap_repl = [ np_func( np.random.choice( np_data, size=len(np_data) ) ) for i in range( size ) ]
    
    #logging.info("Covariate: " + cov) #Empirical Mean
    #logging.info("Empirical Mean: " + str(np.mean(np_data_clean))) #Empirical Mean
    #logging.info("Bootstrapped Mean: " + str( np.mean(bootstrap_repl) ) ) #Bootstrapped Mean
    
    return np.array( bootstrap_repl )

In [17]:
def confidence_intervals_large_samples(data, confidence=0.95):
    """
    @confidence: confidence interval 
    @return: tuple (lowerbound, uperbound, h-value)
    """
    dist = NormalDist.from_samples( data )
    z = NormalDist().inv_cdf((1 + confidence) / 2.)
    h = dist.stdev * z / ((len(data) - 1) ** .5)
    return dist.mean - h, dist.mean + h, h

In [18]:
def boostrap_dictionary(dictionary: dict, size):
    boostrapped_dict = {}
    for key, values in dictionary.items():
        if values:
            boostrapped_dict[key] = bootstrapping(values, np.mean, size)
    return boostrapped_dict

### Calculate statistics and get results DF

In [19]:
### CREATE GROUP FREQUENCY RESULTS
def calculate_results(global_dataframe):
    results_df = pd.DataFrame(columns=['type', 'group', 'category', 'mean', 'median', 'std', 'ci'])
    for category in global_dataframe.columns:
        experiments_values = global_dataframe[category].tolist()
        try:
            group = [key for key, value in global_groups().items() if category in value][0] ## Mapping from taxonomy groups
        except Exception as e:
            print(e)
        results_df = results_df.append({
            'type': group.split('_')[0], 
            'group': group, 
            'category': category, 
            'median' : np.median(experiments_values),
            'mean' : np.mean(experiments_values), 
            'std' : np.std(experiments_values),
            'ci' : confidence_intervals_large_samples(experiments_values)
        }, ignore_index=True)
    return results_df.sort_values(by='median', ascending=False).reset_index(drop=True)

In [20]:
experiments_global_results = get_experiments_results(params['global_taxonomy_results'], 'level_1')

In [21]:
global_frequencies_dataframe = create_frequencies_dataframe(experiments_global_results)
global_frequencies_dataframe

Unnamed: 0,punctuation,exceptions,oop,asserts,types,conditionals,loops,operators,indentation,bool,...,nl_pronoun,nl_adverb,nl_adjetive,nl_determiner,nl_preposition,nl_particle,nl_modal,nl_conjunction,nl_cardinal,nl_other
0,209452,7170,17089,1177,102664,5391,1879,35850,396367,1318,...,3801,11351,37943,50423,55014,131,3687,17943,3635,40246
1,206115,8280,16253,1212,95027,5004,2283,36222,378368,1872,...,3411,9100,38739,48037,52688,238,2341,17405,4309,39219
2,184980,9635,15828,943,79684,4176,2055,36601,360604,1676,...,2930,8604,37666,44851,50595,222,2995,16355,4674,36009
3,207147,7252,15944,1130,107351,4880,1993,55701,384061,2006,...,3540,10140,38649,46920,53059,109,2489,17487,2728,39296
4,194851,7963,15591,1023,80358,5453,1546,35526,379020,1714,...,3703,10971,37395,46356,53443,117,2963,16762,4006,37602
5,182024,9111,16365,846,86323,4658,1711,33074,338639,1764,...,2535,9637,34614,42837,47644,157,2597,15988,4138,37202
6,196255,4999,16342,1196,108108,6324,2022,38731,376384,2284,...,3034,9927,36926,47772,52782,169,3338,16498,4307,38501
7,195056,9975,16312,1026,106810,4958,2263,35877,373581,1420,...,3216,10264,37838,44176,50512,182,3011,16445,3288,40320
8,195438,9418,16326,706,97171,5323,1982,34220,391631,2101,...,4063,10749,39992,50387,54091,426,2936,18159,5094,40586
9,185253,9744,15119,1255,105581,5271,1387,33158,332217,2314,...,3480,10260,36881,41676,49381,189,2400,15763,3952,38291


In [22]:
global_frequencies_dataframe_results = calculate_results(global_frequencies_dataframe).sort_values(by=['group','mean'],ascending=[True, False])
global_frequencies_dataframe_results

Unnamed: 0,type,group,category,mean,median,std,ci
1,excluded,excluded,excluded,685630.27,682215.0,29573.52,"(674682.7959218231, 696577.7374115103, 10947.4..."
12,nl,nl_not_semantic,nl_preposition,51498.53,51196.0,2345.96,"(50630.10947981685, 52366.95718684982, 868.423..."
14,nl,nl_not_semantic,nl_determiner,46083.07,45947.0,2351.46,"(45212.60871391496, 46953.52461941837, 870.457..."
20,nl,nl_not_semantic,nl_conjunction,16656.17,16570.0,621.34,"(16426.161991183068, 16886.171342150268, 230.0..."
22,nl,nl_not_semantic,nl_adverb,9909.77,9962.0,855.78,"(9592.976337425343, 10226.55699590799, 316.790..."
25,nl,nl_not_semantic,nl_cardinal,4070.53,4138.0,600.55,"(3848.2242076692723, 4292.842458997395, 222.30..."
28,nl,nl_not_semantic,nl_modal,2902.07,2947.0,336.35,"(2777.5569294813818, 3026.5764038519515, 124.5..."
33,nl,nl_not_semantic,nl_particle,202.07,188.5,87.16,"(169.8005350843069, 234.33279824902644, 32.266..."
3,nl,nl_semantic,nl_noun,403578.7,402929.0,15210.07,"(397948.26374435826, 409209.13625564176, 5630...."
13,nl,nl_semantic,nl_verb,49245.57,48836.0,2225.11,"(48421.87964418512, 50069.25368914821, 823.687..."


In [None]:
global_frequencies_dataframe_results.to_csv(params['quantitative_results'] + '/' + params['model'] + '/' + params['modality']  + '/' + params['datasets'][params['dataset']] + '_frequencies' + '.csv', index=False)