# Multiplexer Data Generation
This notebook is set up to generate multiplexer datasets for experimental evaluation. 

***
## Imports:

In [1]:
import os
import random
from pprint import pformat as pf #for pickle debugging
import numpy as np
import pandas as pd

# If pip installed
#from skheros.heros import HEROS
# If locally run
from src.skheros.heros import HEROS

current_working_directory = os.getcwd()
print(current_working_directory)

c:\Users\ryanu\Documents\GitHub\scikit-heros


***
## Set Up Local Run Parameters


In [2]:
local_save = False
folder_path = 'C:/Users/ryanu/Desktop/MultiplexerData'
output_name = 'demo'
if not os.path.exists(folder_path):
        os.makedirs(folder_path)
if local_save:
    output_folder = './output'
else:
    output_folder = folder_path

In [3]:
def solve_equation(num_bits):
    for i in range(1000):
        if i+2**i==num_bits:
            return i
    return None


def generate_multiplexer_instance(num_bits):
    """ """
    first=solve_equation(num_bits)
    if first==None:
        print("Problem_Multiplexer: ERROR - The multiplexer takes # of bits as 3,6,11,20,37,70,135,264")
        
    else:
        condition = []
        #Generate random boolean string
        for i in range(num_bits):
            condition.append(str(random.randint(0,1)))
            
        gates=""
        
        for j in range(first):
            gates+=condition[j]
        
        gates_decimal=int(gates,2)
        outcome=condition[first+gates_decimal]
        group = gates_decimal

        return condition,outcome,group
    

def generate_mulitplexer_data(outpath, unique_name, num_bits, instances, show_group=False,show_instanceID=False):
    """ Generate n-bit multiplexer dataset with (balanced classes) with target n-bits (3,6,11,20,37,70,135,264) and specified number of instances."""
    first=solve_equation(num_bits)
    if first==None:
        print("Problem_Multiplexer: ERROR - The multiplexer takes # of bits as 3,6,11,20,37,70,135,264")
        
    else:
        #Make dataset header
        columns = []
        for i in range(first):
            columns.append('A_'+str(i)) #Address Bits
        for i in range(num_bits-first):
            columns.append('R_'+str(i)) #Register Bits
        columns.append("Class") 
        if show_group:
            columns.append("Group")
        if show_instanceID:
            columns.append('InstanceID')

        #Make instances
        rows_list = [] #temporarily stores all instances as a list of lists.
        class_0_count = 0
        class_1_count = 0
        target_class_count = int(instances/2.0)
        for i in range(instances):
            generate = True
            while generate:
                condition,outcome,group = generate_multiplexer_instance(num_bits)
                if (class_0_count < target_class_count) and int(outcome) == 0:
                    generate = False
                if (class_1_count < target_class_count) and int(outcome) == 1:
                    generate = False
            if int(outcome) == 0:
                class_0_count += 1
            if int(outcome) == 1:
                class_1_count += 1
            #Format instance
            row_list = condition
            row_list.append(outcome) #add outcome
            if show_group:
                row_list.append(group) #add group
            if show_instanceID:
                row_list.append(str(i+1))
            rows_list.append(row_list)
    
    df = pd.DataFrame(rows_list, columns=columns)
    df.to_csv(outpath+'/multiplexer_'+str(num_bits)+'_bit_'+str(instances)+'_inst_'+str(unique_name)+'.txt', sep='\t', index=False)


In [4]:
bits = [6,11,20,37,70,135]
instances = [500,5000,10000,10000,20000,20000]
replicates = 1

for i in range(len(bits)):
    for replicate in range(0,replicates):
        filename = str(replicate)+'_rep'
        generate_mulitplexer_data(output_folder,filename, bits[i], instances[i],show_group=True,show_instanceID=True)


In [5]:
def generate_complete_multiplexer_data(outpath, unique_name, num_bits, show_group=False,show_instanceID=False):
    """ Attempts to generate a complete non-redundant multiplexer dataset.  Ability to generate the entire dataset is computationally limited. 
     We had success generating up to the complete 20-multiplexer dataset"""
     
    print("Problem_Multiplexer: Attempting to generate multiplexer dataset")
    first=solve_equation(num_bits)
    
    if first==None:
        print("Problem_Multiplexer: ERROR - The multiplexer takes # of bits as 3,6,11,20,37,70,135,264")
        
    else:
        try:
            #Make dataset header
            columns = []
            for i in range(first):
                columns.append('A_'+str(i)) #Address Bits
            for i in range(num_bits-first):
                columns.append('R_'+str(i)) #Register Bits
            columns.append("Class") 
            if show_group:
                columns.append("Group")
            if show_instanceID:
                columns.append('InstanceID')

            #Make instances
            rows_list = [] #temporarily stores all instances as a list of lists.
            for i in range(2**num_bits):
                binary_str=bin(i)
                string_array=binary_str.split('b')
                binary=string_array[1]
                
                while len(binary)<num_bits:
                    binary="0" + binary
                    
                gates=""
                for j in range(first):
                    gates+=binary[j]
                
                gates_decimal=int(gates,2)
                outcome=binary[first+gates_decimal]

                #Format instance
                row_list = list(binary)
                row_list.append(outcome) #add outcome
                group = gates_decimal
                if show_group:
                    row_list.append(group) #add group
                if show_instanceID:
                    row_list.append(str(i+1))
                rows_list.append(row_list)

            df = pd.DataFrame(rows_list, columns=columns)
            df.to_csv(outpath+'/multiplexer_'+str(num_bits)+'_bit_complete.txt', sep='\t', index=False)
            
        except:
            print("Problem_Multiplexer: ERROR - Cannot generate all data instances for specified multiplexer due to computational limitations")
            
            

In [6]:
bits = [6,11,20]
folder_path = 'C:/Users/ryanu/Desktop/MultiplexerCompleteData'
for i in range(len(bits)):
    generate_complete_multiplexer_data(folder_path,filename, bits[i], show_group=True,show_instanceID=True)

Problem_Multiplexer: Attempting to generate multiplexer dataset
Problem_Multiplexer: Attempting to generate multiplexer dataset
Problem_Multiplexer: Attempting to generate multiplexer dataset
