In [1]:
import json
import pandas as pd
import numpy as np
import math
from statistics import NormalDist

import seaborn as sns; sns.set_theme()
import matplotlib.pyplot as plt

pd.options.display.float_format = '{:.2f}'.format

### SETUP 

In [19]:
def param_default():
    return {
        'dataset' : 'code_completion_random_cut_5k_30_512_tokens',
        #'dataset' : 'code_completion_docstring_random_cut_3.8k_30_150_tokens',
        #'dataset' : 'code_completion_docstring_signature_3.8k_30_150_tokens',
        #'dataset' : 'code_completion_docstring_5k_30_150_tokens',
        'global_ast_results': '/workspaces/code-rationales/data/global_ast_results/gpt',
        'global_taxonomy_results': '/workspaces/code-rationales/data/global_taxonomy_results/gpt',
        'num_experiments' : 30, 
        'bootstrapping_size': 500 ## question.. should be 500, change name
    }
params = param_default()

### Taxonomy Classification

In [3]:
def global_groups() -> dict:
    return {
        'sc_semantic': ['exceptions', 'oop', 'asserts', 'types', 'conditionals', 'loops', 'bool', 'structural', 'statements', 'with'], 
        'sc_nl': ['identifier', 'comment', 'string'],
        'sc_not_semantic': ['punctuation', 'operators', 'indentation', 'functional', 'return', 'expression', 'unknown'], 
        'sc_errors' : ['errors'], 
        'nl_semantic': ['nl_verb', 'nl_noun', 'nl_pronoun', 'nl_adjetive'],
        'nl_not_semantic' : ['nl_adverb', 'nl_determier', 'nl_preposition', 'nl_particle', 'nl_modal', 'nl_conjunction', 'nl_cardinal', 'nl_list', 'nl_other']
    }

### Load Aggregation results

In [4]:
get_experiment_path =  lambda results_folder, dataset, exp: results_folder + '/' + dataset + '_exp_' + str(exp) +'.txt'


In [5]:
def get_experiments_results(results_folder):
    experiment_paths = [get_experiment_path(results_folder, params['dataset'], exp) for exp in range(params['num_experiments'])]
    experiment_global_results = []
    for experiment_path in experiment_paths:
        with open(experiment_path, 'r') as file:
            experiment_global_results.append(json.loads(file.read()))
    return experiment_global_results


### Dictionary Flatting

In [6]:
def flat_global_results(experiment_global_result: dict):
    flatten_results = { key: [] for key in experiment_global_result.keys() } ## There are 31 fixed categories
    for target_key, rationales in experiment_global_result.items():
        for rational_key, rational_values in rationales.items():
            flatten_results[rational_key] += rational_values
    return flatten_results

### Statistics

In [7]:
def bootstrapping( np_data, np_func, size ):
    """Create a bootstrap sample given data and a function
    For instance, a bootstrap sample of means, or mediands. 
    The bootstrap replicates are a long as the original size
    we can choose any observation more than once (resampling with replacement:np.random.choice)
    """
    
    #Cleaning NaNs
    #np_data_clean = np_data[ np.logical_not( np.isnan(np_data) ) ] 
    
    #The size of the bootstrap replicate is as big as size
    #Creating the boostrap replicates as long as the orignal data size
    #This strategy might work as imputation 
    bootstrap_repl = [ np_func( np.random.choice( np_data, size=len(np_data) ) ) for i in range( size ) ]
    
    #logging.info("Covariate: " + cov) #Empirical Mean
    #logging.info("Empirical Mean: " + str(np.mean(np_data_clean))) #Empirical Mean
    #logging.info("Bootstrapped Mean: " + str( np.mean(bootstrap_repl) ) ) #Bootstrapped Mean
    
    return np.array( bootstrap_repl )

In [8]:
def confidence_intervals_large_samples(data, confidence=0.95):
    """
    @confidence: confidence interval 
    @return: tuple (lowerbound, uperbound, h-value)
    """
    dist = NormalDist.from_samples( data )
    z = NormalDist().inv_cdf((1 + confidence) / 2.)
    h = dist.stdev * z / ((len(data) - 1) ** .5)
    return dist.mean - h, dist.mean + h, h

In [9]:
def boostrap_dictionary(dictionary: dict, size):
    boostrapped_dict = {}
    for key, values in dictionary.items():
        if values:
            boostrapped_dict[key] = bootstrapping(values, np.mean, size)
    return boostrapped_dict

### Data processing

In [10]:
def get_experiments_dfs(experients_global_results):
    experiment_dfs = []
    for experiment in experients_global_results:
        experiment = flat_global_results(experiment)
        experiment = boostrap_dictionary(dictionary=experiment, size=params['bootstrapping_size'])
        experiment_dfs.append(pd.DataFrame(experiment))
    return experiment_dfs

### Analysis - Rational Concept - Countings

In [11]:
def create_frequencies_dataframe(experients_global_results: list):
    frequencies_df = pd.DataFrame(columns=experients_global_results[0].keys())
    for experiment_global_result in experients_global_results:
        rationales_results = flat_global_results(experiment_global_result)
        frequencies_df = frequencies_df.append({key: len(value) for key, value in rationales_results.items()}, ignore_index=True)
    frequencies_df = frequencies_df.fillna(0)   
    return frequencies_df


### Analysis - Rational Concept - Bootstrapping probability

In [66]:
# Function to fill NaN values with random chose from a specified array
def fill_nan_with_random_col(col):
    if col.isna().sum() == 0:
        return col
    random_values = np.random.choice(col[col.notna()].to_list(), col.isna().sum())
    col[col.isna()] = random_values
    return col

In [40]:
def create_bootstapped_dataframe(experients_global_results:list):
    bootstrapped_df = pd.DataFrame(columns=experients_global_results[0].keys())
    for experiment_global_result in experients_global_results:
        rationales_results = flat_global_results(experiment_global_result)
        rationales_results = boostrap_dictionary(dictionary=rationales_results, size=params['bootstrapping_size'])
        bootstrapped_df = bootstrapped_df.append({key: np.median(value) for key, value in rationales_results.items()}, ignore_index=True)
    #bootstrapped_df = bootstrapped_df.fillna() ## <- ??
    return bootstrapped_df

### Calculate statistics and get results DF

In [13]:
### CREATE GROUP FREQUENCY RESULTS
def calculate_results(global_dataframe):
    results_df = pd.DataFrame(columns=['type', 'group', 'category', 'mean', 'median', 'std', 'ci'])
    for category in global_dataframe.columns:
        experiments_values = global_dataframe[category].tolist()
        try:
            group = [key for key, value in global_groups().items() if category in value][0] ## Mapping from taxonomy groups
        except Exception as e:
            print(e)
        results_df = results_df.append({
            'type': group.split('_')[0], 
            'group': group, 
            'category': category, 
            'median' : np.median(experiments_values),
            'mean' : np.mean(experiments_values), 
            'std' : np.std(experiments_values),
            'ci' : confidence_intervals_large_samples(experiments_values)
        }, ignore_index=True)
    return results_df.sort_values(by='median', ascending=False).reset_index(drop=True)

### Execute Analysis

In [20]:
experients_global_results = get_experiments_results(params['global_taxonomy_results'])
global_frequencies_dataframe = create_frequencies_dataframe(experients_global_results)
global_bootstrapped_dataframe = create_bootstapped_dataframe(experients_global_results)


In [57]:
global_bootstrapped_dataframe_nans= global_bootstrapped_dataframe.apply(fill_nan_with_random_col, axis = 0) ## Imputation values, picking a random value from non nan

In [62]:
global_bootstrapped_dataframe_nans

Unnamed: 0,punctuation,oop,asserts,types,conditionals,loops,operators,bool,functional,with,...,nl_adverb,nl_adjetive,nl_determier,nl_preposition,nl_particle,nl_modal,nl_conjunction,nl_cardinal,nl_other,exceptions
0,0.06,0.11,0.03,0.06,0.08,0.09,0.08,0.05,0.14,0.06,...,0.05,0.05,0.04,0.05,0.02,0.02,0.01,0.15,0.07,0.2
1,0.06,0.08,0.05,0.07,0.05,0.1,0.06,0.09,0.1,0.07,...,0.06,0.07,0.05,0.05,0.01,0.05,0.01,0.08,0.06,0.12
2,0.06,0.1,0.04,0.06,0.08,0.07,0.06,0.13,0.09,0.07,...,0.04,0.03,0.03,0.04,0.0,0.03,0.04,0.06,0.05,0.2
3,0.06,0.14,0.04,0.07,0.08,0.06,0.06,0.13,0.08,0.07,...,0.06,0.05,0.04,0.04,0.01,0.15,0.03,0.03,0.03,0.23
4,0.06,0.11,0.03,0.06,0.06,0.09,0.08,0.12,0.05,0.07,...,0.07,0.07,0.04,0.08,0.0,0.1,0.03,0.08,0.04,0.06
5,0.06,0.1,0.02,0.06,0.06,0.09,0.06,0.09,0.07,0.06,...,0.06,0.06,0.03,0.03,0.02,0.04,0.0,0.04,0.03,0.07
6,0.06,0.12,0.03,0.05,0.08,0.11,0.07,0.12,0.1,0.07,...,0.06,0.05,0.04,0.05,0.02,0.09,0.06,0.05,0.04,0.12
7,0.06,0.11,0.02,0.06,0.06,0.07,0.06,0.25,0.11,0.08,...,0.05,0.04,0.04,0.08,0.01,0.07,0.03,0.04,0.04,0.46
8,0.06,0.08,0.04,0.06,0.06,0.08,0.06,0.08,0.08,0.07,...,0.07,0.08,0.05,0.04,0.01,0.15,0.04,0.04,0.05,0.12
9,0.06,0.08,0.04,0.06,0.09,0.1,0.06,0.08,0.11,0.06,...,0.06,0.07,0.04,0.04,0.01,0.04,0.02,0.14,0.02,0.34


In [67]:
calculate_results(global_frequencies_dataframe).sort_values(by=['group','mean'],ascending=[True, True])

Unnamed: 0,type,group,category,mean,median,std,ci
31,nl,nl_not_semantic,nl_particle,3.4,0.0,8.16,"(0.381137649402735, 6.418862350597265, 3.01886..."
29,nl,nl_not_semantic,nl_conjunction,36.7,31.5,34.56,"(23.905486012621466, 49.49451398737854, 12.794..."
27,nl,nl_not_semantic,nl_modal,51.93,45.0,51.95,"(32.70094326321086, 71.1657234034558, 19.23239..."
24,nl,nl_not_semantic,nl_cardinal,201.63,158.0,167.63,"(139.57955202399071, 263.6871146426759, 62.053..."
20,nl,nl_not_semantic,nl_other,378.67,374.5,113.99,"(336.4690333998361, 420.86429993349725, 42.197..."
19,nl,nl_not_semantic,nl_preposition,566.17,545.0,160.73,"(506.66885521518424, 625.6644781181491, 59.497..."
18,nl,nl_not_semantic,nl_determier,626.27,628.0,163.94,"(565.5798137287385, 686.9535196045948, 60.6868..."
17,nl,nl_not_semantic,nl_adverb,714.1,681.5,190.3,"(643.6569233928365, 784.5430766071636, 70.4430..."
28,nl,nl_semantic,nl_pronoun,65.7,43.0,65.41,"(41.48775821852452, 89.91224178147549, 24.2122..."
14,nl,nl_semantic,nl_adjetive,1658.4,1665.5,436.46,"(1496.8319928010674, 1819.9680071989328, 161.5..."


In [65]:
calculate_results(global_bootstrapped_dataframe_nans)

Unnamed: 0,type,group,category,mean,median,std,ci
0,sc,sc_semantic,exceptions,0.19,0.15,0.11,"(0.14556163239030417, 0.22811923047077537, 0.0..."
1,sc,sc_semantic,oop,0.1,0.1,0.02,"(0.09819585733133857, 0.10963642384422986, 0.0..."
2,sc,sc_semantic,bool,0.1,0.1,0.03,"(0.08849174210526146, 0.1135040064021579, 0.01..."
3,sc,sc_semantic,loops,0.09,0.09,0.02,"(0.08671983060698049, 0.09800596888513796, 0.0..."
4,sc,sc_not_semantic,functional,0.08,0.08,0.02,"(0.0765222203872661, 0.09138638733888298, 0.00..."
5,sc,sc_semantic,conditionals,0.07,0.07,0.01,"(0.06570224390291121, 0.07515004436126743, 0.0..."
6,sc,sc_not_semantic,operators,0.07,0.07,0.01,"(0.06431469501271955, 0.0683512942836321, 0.00..."
7,nl,nl_semantic,nl_noun,0.07,0.07,0.0,"(0.06453069785069293, 0.06606923828120481, 0.0..."
8,sc,sc_nl,identifier,0.06,0.07,0.0,"(0.06399343892005285, 0.06559697272581183, 0.0..."
9,sc,sc_not_semantic,unknown,0.06,0.06,0.0,"(0.06421600826572708, 0.06572096731198375, 0.0..."


### Visualization 

In [None]:
def print_heatmap(
        np_rationales, 
        fig_size = (10,10), 
        font = 15
):
        plt.rcParams.update({'font.size': font})
        ax = plt.axes()
        sns.set(rc = {'figure.figsize':fig_size})
        sns.heatmap(np_rationales, cmap="YlGnBu", 
                yticklabels=yticklabels, 
                xticklabels=xticklabels,
                ax=ax)
        plt.show
        pass

In [None]:
calculate_results(global_frequencies_dataframe).groupby('group')