# The code in this jupyter notebook 
* reads the models and the datasets
* calculates the accuracy of each model on its own dataset (the personal dataset of the peer within the shard), then on the entire shard's dataset
* implements the ensemble method and calculates its accuracy when the model pool is composed of the entire pool of models (models from all trained shards)


### Reading datasets and models from the trained shards

In [None]:
import pandas as pd
import os
from collections import defaultdict
from transformers import T5ForConditionalGeneration, T5Tokenizer, AdamW
import pickle
import torch
def defaultdict_to_dict(d):
    """ Recursively convert defaultdict to dict. """
    if isinstance(d, defaultdict):
        d = {key: defaultdict_to_dict(value) for key, value in d.items()}
    return d



# Reading data and models
groups = [str(i) for i in range(0,3)] # the index number of the shards trained (should be found in the aggregate_models folder)
peers = [str(i) for i in range(8090, 8100)] # the port under which the models were trained in a decentralized manner
epoch_cutoff = 6000 # the epoch checkpoint for identifying which models to use
# epoch_cutoff is generally the largest number of epochs trained by any peer (peers may train at different speeds when using 'main.py')

In [None]:
def find_first_files_with_str(directory, str_contain, epoch_cutoff):
    return os.path.join(str_contain + '_' + str(epoch_cutoff))

In [None]:
tokenizer = T5Tokenizer.from_pretrained('t5-small')


#reading entire dataset for all groups: train_df_group1


# reading individual peer datasets & group datasets: train_df_group1, train_df_group1_peer1
for group in groups:
    # creating train_df's
    exec(f'train_df_group{group} = pd.DataFrame()')
    for peer in peers:
        datasets_folder = os.path.join('aggregated_results',f'group{group}','datasets')
        exec_str = f"train_df_group{group}_peer{int(peer) - 8089} = pd.read_csv(os.path.join(datasets_folder,'{peer}_df.csv'))"
        exec(exec_str)
        exec(f'train_df_group{group} = pd.concat([train_df_group{group}, train_df_group{group}_peer{int(peer) - 8089}])')
       
    exec(f'train_df_group{group} = train_df_group{group}.drop_duplicates()')
    
    # creating test_df's: test_df_group1, test_df_group1_peer1
    
    datasets_folder = os.path.join('aggregated_results',f'group{group}','datasets')
    exec (f"test_df_group{group} = pd.read_csv(os.path.join(datasets_folder,'test_df.csv')) ")
    exec (f"test_df_group{group} = test_df_group{group}[test_df_group{group}['doc_id'].isin(train_df_group{group}['doc_id'].unique())]")
    for peer in peers:
        exec (f"test_df_group{group}_peer{int(peer) - 8089} = test_df_group{group}[test_df_group{group}['doc_id'].isin(train_df_group{group}_peer{int(peer) - 8089}['doc_id'].unique())]")
    
    
    

#reading models: model_group1_peer1
for group in groups:
    for peer in peers:
        model_folder = os.path.join('aggregated_results',f'group{group}', 'models')
        model_file = find_first_files_with_str(model_folder, peer, epoch_cutoff) # 10 is the largest number of saved models that all peers have finished training
        print (group, peer, model_file)
        exec_str = f"model_group{group}_peer{str(int(peer)-8089)} = T5ForConditionalGeneration.from_pretrained(os.path.join(model_folder, model_file))"
        
        
        exec(exec_str)

In [None]:
model_group1_peer1

In [None]:
train_df_group0[train_df_group0['doc_id'].isin(train_df_group1['doc_id'].unique())]['doc_id'].unique()

In [None]:
train_df_group0['doc_id'].nunique()

### Implementing testing algorithm to check accuracy for top1 of individual models on their local and shard-wide dataset

In [None]:
def read_model_and_evaluate(group, peer, mode = 'global'):
    global_scope = globals()
    acc_train = -1
    print ('group ', group, 'peer ', peer)
    exec (f'model = model_group{group}_peer{peer}', global_scope)
    if mode == 'global':
        exec (f'train_df = train_df_group{group}.copy()', global_scope)
        exec (f'test_df = test_df_group{group}.copy()', global_scope)
    elif mode == 'local':
        exec (f'train_df = train_df_group{group}_peer{peer}.copy()', global_scope)
        exec (f'test_df = test_df_group{group}_peer{peer}.copy()', global_scope)
        
    df_tot = train_df.copy()
    df_tst = test_df.copy()
    print (df_tot.shape, df_tst.shape)
    
    df_tot['generated_doc_id'] = df_tot['query'].apply(lambda x: generate_text(x, model))
    df_tst['generated_doc_id'] = df_tst['query'].apply(lambda x: generate_text(x, model))
    acc_train = df_tot[df_tot['doc_id'] == df_tot['generated_doc_id']].shape[0]/df_tot.shape[0]
    acc_test = df_tst[df_tst['doc_id'] == df_tst['generated_doc_id']].shape[0]/df_tst.shape[0]
    
    
    print (f'{mode} training set accuracy: ', acc_train)
    print (f'{mode} test set accuracy: ', acc_test)
    
    
    df_tot['generated_doc_id_log'] = df_tot['query'].apply(lambda x: generate_text_through_logits(x, model, df_tot))
    df_tst['generated_doc_id_log'] = df_tst['query'].apply(lambda x: generate_text_through_logits(x, model, df_tst))

    
    acc_train_log = df_tot[df_tot['doc_id'] == df_tot['generated_doc_id_log']].shape[0]/df_tot.shape[0]
    acc_test_log = df_tst[df_tst['doc_id'] == df_tst['generated_doc_id_log']].shape[0]/df_tst.shape[0]
    
    print (f'{mode} training set accuracy log: ', acc_train_log)
    print (f'{mode} test set accuracy log: ', acc_test_log)
    return acc_train, acc_test, df_tot, df_tst

In [None]:
# Function to generate text
def generate_text(query, model):
    input_ids = tokenizer.encode(query, return_tensors='pt')
    output = model.generate(input_ids, max_length = 20)
    return tokenizer.decode(output[0], skip_special_tokens=True)


In [None]:
def generate_text_through_logits(query,model, df_tst):
    doc_id = df_tst[df_tst['query'] == query]['doc_id'].iloc[0]
#     print (query, doc_id)
    inputs = tokenizer(query, padding=False, return_tensors="pt", truncation=True).input_ids
    labels = tokenizer(doc_id, padding=True, return_tensors="pt", truncation=True).input_ids

    # Forward pass
    outputs = model(input_ids=inputs, labels = labels)
    loss = outputs.loss

    # Extract logits and convert to token IDs
    logits = outputs.logits
    predicted_token_ids = tokenizer.decode(torch.argmax(logits, dim=-1)[0], skip_special_tokens=True)
#     print (predicted_token_ids)
    return predicted_token_ids

In [None]:
global_accuracies = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(float))))
for group in groups:
    for peer in peers:
        
        p = int(peer) - 8089
        exec(f"acc_train_local, acc_test_local, df_tot_l, df_tst_l = read_model_and_evaluate({group}, {p}, 'local')")
        global_accuracies[group][peer]['train']['local'] = acc_train_local
        global_accuracies[group][peer]['test']['local'] = acc_test_local
        
        exec(f"acc_train_global, acc_test_global, df_tot_g, df_tst_g = read_model_and_evaluate({group}, {p}, 'global')")
        global_accuracies[group][peer]['train']['global'] = acc_train_global
        global_accuracies[group][peer]['test']['global'] = acc_test_global

In [None]:
# Assuming 'my_defaultdict' is your existing defaultdict
# Convert it to a regular dictionary
regular_dict = defaultdict_to_dict(global_accuracies)

# Serialize and save to a file
with open('localnglobal_accuracies_allgroups_allpeers.pkl', 'wb') as file:
    pickle.dump(regular_dict, file)

# To load and optionally convert back to defaultdict
# (You'll need to redefine your defaultdict structure as before)
with open('localnglobal_accuracies_allgroups_allpeers.pkl', 'rb') as file:
    loaded_dict = pickle.load(file)
    # Optionally convert back to defaultdict
    # my_defaultdict = convert_to_defaultdict(loaded_dict)


In [None]:
# To load and optionally convert back to defaultdict
# (You'll need to redefine your defaultdict structure as before)
with open('localnglobal_accuracies_allgroups_allpeers.pkl', 'rb') as file:
    global_accuracies = pickle.load(file)
    # Optionally convert back to defaultdict
    # my_defaultdict = convert_to_defaultdict(loaded_dict)

### Calculating top5 accuracy for each peer under above conditions

In [None]:
import threading

# Global dictionary to store models for each group and peer
global_objects = {}
global_accuracies = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(float))))
class ModelEvaluator:
    def __init__(self, group, peer):
        self.group = group
        self.peer = peer
        self.counter = 0

    def read_model_and_evaluate(self, mode='global'):
        global global_accuracies
        acc_train = -1.0
        acc_test = -1.0
        print('group', self.group, 'peer', self.peer, 'mode', mode)

        model = globals()[f'model_group{self.group}_peer{self.peer}']
        
        if mode == 'global':
            df_tot = globals()[f'train_df_group{group}'].copy()
            self.df_tst = globals()[f'test_df_group{group}'].copy()
        elif mode == 'local':
            df_tot = globals()[f'train_df_group{group}_peer{peer}'].copy()
            self.df_tst = globals()[f'test_df_group{group}_peer{peer}'].copy()
        
        print(df_tot.shape, self.df_tst.shape)

        self.df_tst['generated_doc_id'] = self.df_tst['query'].apply(lambda x: self.generate_text_beams(x, model))
        acc_test = self.df_tst.apply(lambda row: row['doc_id'] in row['generated_doc_id'], axis=1).sum() / self.df_tst.shape[0]
        
        
        global global_objects
        global_objects[(self.group, self.peer)] = self.df_tst
        
        print(f'{mode} training set accuracy: ', acc_train)
        print(f'{mode} test set accuracy: ', acc_test)
        return acc_train, acc_test

    def generate_text_beams(self, query, model):
        self.counter += 1
        if self.counter % 1000 == 0:
            print(f"Processed {self.counter} queries")
        input_ids = tokenizer.encode(query, return_tensors='pt')
        output = model.generate(input_ids, do_sample=False, max_length=20,
                                num_beams=5, num_return_sequences=5)
        return [tokenizer.decode(i, skip_special_tokens=True) for i in output]

    def thread_function(self):
        global global_accuracies_20samples
        acc_train_global, acc_test_global = self.read_model_and_evaluate('global')
        global_accuracies[self.group][self.peer]['train']['global'] = acc_train_global
        global_accuracies[self.group][self.peer]['test']['global'] = acc_test_global
        print(f'finished global work for group {self.group} and peer {self.peer}, acc test global :{acc_test_global}')

def evaluate_in_thread(group, peer):
    try:
        evaluator = ModelEvaluator(group, peer)
        evaluator.thread_function()
    except Exception as e:
        print(f"Error in thread for group {group} and peer {peer}: {e}")

    
# Start threads directly in the main script body
threads = []
for group in groups:
    for peer in peers:
        p = int(peer) - 8089
        thread = threading.Thread(target=evaluate_in_thread, args=(group, p,))
        thread.start()
        threads.append(thread)

    # Wait for all threads to complete
    for thread in threads:
        thread.join()


In [None]:
global_accuracies

In [None]:
# Assuming 'my_defaultdict' is your existing defaultdict
# Convert it to a regular dictionary
regular_dict = defaultdict_to_dict(global_accuracies)

# Serialize and save to a file
with open('global_accuracies_5beams.pkl', 'wb') as file:
    pickle.dump(regular_dict, file)

In [None]:
with open('global_accuracies_5beams.pkl', 'rb') as file:
    loaded_dict = pickle.load(file)
    # Optionally convert back to defaultdict
    # my_defaultdict = convert_to_defaultdict(loaded_dict)
    
loaded_dict

### Implementing and saving results for ensemble method under inter-group conditions

In [None]:
three_groups_list = defaultdict(list)
for group in groups:
    print (f'group {group}')
    for i, peer in enumerate(peers):
        print (f'peer {int(peer) - 8089}')
        exec(f'three_groups_list["group{group}"].append(model_group{group}_peer{int(peer)-8089})')
            
            

In [None]:
import random
from collections import defaultdict
import threading
from torch.nn.functional import softmax

class ModelManager:
    def __init__(self, model_list, test_df, tokenizer):
        self.model_list = model_list
        self.test_df = test_df.copy()
        print ('test set size:', self.test_df.shape[0])
    
        self.tokenizer = tokenizer
        self.counter = 0

    def generate_text_beams(self, query):
        self.counter += 1
        if self.counter % 500 == 0:
            print(f"Processed {self.counter} queries")
        results = defaultdict(float)
        
        sampled_models = random.sample(self.model_list['group1'], 3)
        sampled_models.extend(random.sample(self.model_list['group2'], 3))
#         sampled_models.extend(random.sample(self.model_list['group3'], 3))
        for model in sampled_models:
            # Ensure query is properly encoded
            input_ids = self.tokenizer.encode(query, return_tensors='pt')
            output = model.generate(input_ids, do_sample=False, return_dict_in_generate=True, output_scores=True,
                                    num_beams=5, num_return_sequences=5, max_length = 20)

            beam_scores = output.sequences_scores
#             print (beam_scores)
            probabilities = softmax(beam_scores, dim=0).tolist()
#             print (probabilities)
            model_res = [self.tokenizer.decode(output_id, skip_special_tokens=True) for output_id in output.sequences]
            

            for res, prob in zip(model_res, probabilities):
                results[res] += prob

        return self.top_5_generated_texts(results)

    def top_5_generated_texts(self, input_dict):
        sorted_responses = sorted(input_dict.items(), key=lambda x: x[1], reverse=True)
        return [response[0] for response in sorted_responses[:5]]

    def evaluate_accuracy(self):
#         self.train_df['generated_doc_id'] = self.train_df['query'].apply(self.generate_text_beams)
        self.test_df['generated_doc_id'] = self.test_df['query'].apply(self.generate_text_beams)

#         acc_train = self.calculate_accuracy(self.train_df)
        acc_test = self.calculate_accuracy(self.test_df)

        return acc_test

    def calculate_accuracy(self, df):
        return df.apply(lambda row: row['doc_id'] in row['generated_doc_id'], axis=1).sum() / df.shape[0]

# Assuming you have predefined dictionaries/lists for models and datasets, such as:
# two_groups_list = [...]
# three_groups_list = [...]
# train_df_group1 = ...
# test_df_group1 = ...
# ... and so on for other groups
# And a tokenizer instance

def run_evaluation(group_nbr, tokenizer):
    model_list = three_groups_list

    test_df = globals()[f'test_df_group{group_nbr}']
    
    manager = ModelManager(model_list, test_df, tokenizer)
    
    key = group_nbr
    model_managers[key] = manager
    
    
    acc_test = manager.evaluate_accuracy()
    global_accuracies[key] = {'acc_test': acc_test}

    print(f"Group: {group_nbr}, Test Acc: {acc_test}")

    
# Global collection to store ModelManager instances
model_managers = {}
global_accuracies = {}
    
# Threading
threads = []

for group_nbr in groups:
        thread = threading.Thread(target=run_evaluation, args=(group_nbr, tokenizer))
        thread.start()
        threads.append(thread)

for thread in threads:
    thread.join()


In [None]:
for group in model_managers:
    df_temp = model_managers[group].test_df.copy()
    df_temp['generated_doc_id_1beam'] = df_temp['generated_doc_id'].apply(lambda x: [x[0]])
    print (group, df_temp.apply(lambda row: row['doc_id'] in row['generated_doc_id_1beam'], 
                                axis=1).sum() / df_temp.shape[0])

In [None]:
# Assuming 'my_defaultdict' is your existing defaultdict
# Convert it to a regular dictionary
regular_dict = defaultdict_to_dict(global_accuracies)

# Serialize and save to a file
with open('inter_group_accs_5beams.pkl', 'wb') as file:
    pickle.dump(regular_dict, file)

In [None]:
with open('inter_group_accs_5beams.pkl', 'rb') as file:
    loaded_dict = pickle.load(file)
    # Optionally convert back to defaultdict
    # my_defaultdict = convert_to_defaultdict(loaded_dict)
    
loaded_dict

In [None]:
regular_dict2 = {}
for group in model_managers:
    df_temp = model_managers[group].test_df.copy()
    df_temp['generated_doc_id_1beam'] = df_temp['generated_doc_id'].apply(lambda x: [x[0]])
    regular_dict2[group] = df_temp.apply(lambda row: row['doc_id'] in row['generated_doc_id_1beam'], 
                                axis=1).sum() / df_temp.shape[0]
    print (group, df_temp.apply(lambda row: row['doc_id'] in row['generated_doc_id_1beam'], 
                                axis=1).sum() / df_temp.shape[0])
    
    
# display(df_temp)

In [None]:
with open('inter_group_accs_1beams.pkl', 'wb') as file:
    pickle.dump(regular_dict2, file)

In [None]:
with open('inter_group_accs_1beams.pkl', 'rb') as file:
    loaded_dict = pickle.load(file)
    # Optionally convert back to defaultdict
    # my_defaultdict = convert_to_defaultdict(loaded_dict)
    
loaded_dict