## GB and MLP model notebook

**This notebook contains the code for inputting data from training sets, applying preprocessing methods from modules, and running data through GB and MLP classifiers**

In [54]:
#imports

from sklearn.model_selection import train_test_split
from modules.classification_model_skeletons import *
from modules.data_preprocessing import *


In [55]:
#df = load_data_into_df('data/IBM_Superposition_QRNG_100qubit_large.txt', ['quantum', 'QPU'])
df = pd.read_csv('data/IBM_Superposition_QRNG_100qubit_large.txt', names=['binary_number', 'quantum', 'QPU'], sep=' ')

In [56]:
# run below lines if classifying by QPU, otherwise comment out 
df_QPU = df[df['quantum'] == 'quantum']
df_QPU.reset_index(inplace=True)
df_QPU = df_QPU.drop('index', axis=1)

df_testing = filter_by_label(df_QPU, ['binary_number', 'QPU'])

In [57]:
# For testing with different input lengths (base length = 100)

df_testing_concatenated = concatenate_data(df_testing, 4)
df_testing_concatenated

Unnamed: 0,Concatenated_Data,QPU
0,1011001111011110101101000100010011001010110000...,ibm_brisbane
1,0101111101001110011100100101110001101101001011...,ibm_brisbane
2,0010111100110110011011101101010010101010111111...,ibm_brisbane
3,1101011110100100001110111111110011001000000001...,ibm_brisbane
4,1100101110000100111010110100101100001101101001...,ibm_brisbane
...,...,...
99995,0000111001101101110101010011100101001110100010...,ibm_kyiv
99996,0100100010011110100010001011111010110000011001...,ibm_kyiv
99997,0111110000000100001101111000111110100000111001...,ibm_kyiv
99998,0000111000100010111010011011110010000110111001...,ibm_kyiv


In [58]:
# Applying preprocessing feature extraction tests:
tests = ['counts']
'''tests = ['counts', 'runs', 'unq_subsq', 'longest_run']'''

# See modules directory for function documentation
df_testing_features = apply_individual_qubit_functions(df_testing_concatenated, tests, 100)
df_testing_features

  df[f'{function}_qb_{qb}'] = qubitRes
  df[f'{function}_qb_{qb}'] = qubitRes


Unnamed: 0,Concatenated_Data,QPU,counts_qb_0,counts_qb_1,counts_qb_2,counts_qb_3,counts_qb_4,counts_qb_5,counts_qb_6,counts_qb_7,...,counts_qb_90,counts_qb_91,counts_qb_92,counts_qb_93,counts_qb_94,counts_qb_95,counts_qb_96,counts_qb_97,counts_qb_98,counts_qb_99
0,1011001111011110101101000100010011001010110000...,ibm_brisbane,2,3,1,1,2,2,1,1,...,2,2,4,3,2,1,3,2,3,1
1,0101111101001110011100100101110001101101001011...,ibm_brisbane,4,1,3,2,1,1,1,1,...,3,3,3,2,1,2,1,0,2,2
2,0010111100110110011011101101010010101010111111...,ibm_brisbane,2,3,1,2,1,0,2,2,...,3,4,1,1,1,3,2,0,3,2
3,1101011110100100001110111111110011001000000001...,ibm_brisbane,2,2,2,2,4,1,1,3,...,0,1,0,3,2,3,1,3,0,2
4,1100101110000100111010110100101100001101101001...,ibm_brisbane,1,1,3,3,2,3,3,0,...,3,2,1,1,3,2,3,4,3,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99995,0000111001101101110101010011100101001110100010...,ibm_kyiv,4,1,4,3,1,0,2,2,...,2,3,2,3,3,4,2,3,2,3
99996,0100100010011110100010001011111010110000011001...,ibm_kyiv,3,0,3,2,0,4,4,2,...,2,2,3,1,3,1,3,1,2,1
99997,0111110000000100001101111000111110100000111001...,ibm_kyiv,2,2,2,1,1,1,2,4,...,3,2,2,4,2,1,2,3,0,1
99998,0000111000100010111010011011110010000110111001...,ibm_kyiv,2,1,3,3,0,2,1,3,...,2,3,4,2,0,0,3,1,2,2


In [59]:
# For counting individual bits as features: if just using frequency 
# as the only feature, comment out to avoid redundancy: 

'''
df_testing_features = make_bit_features(df_testing_concatenated)
#df_testing_features
'''

df_testing_features = df_testing_features.drop(columns='Concatenated_Data')
df_testing_features

Unnamed: 0,QPU,counts_qb_0,counts_qb_1,counts_qb_2,counts_qb_3,counts_qb_4,counts_qb_5,counts_qb_6,counts_qb_7,counts_qb_8,...,counts_qb_90,counts_qb_91,counts_qb_92,counts_qb_93,counts_qb_94,counts_qb_95,counts_qb_96,counts_qb_97,counts_qb_98,counts_qb_99
0,ibm_brisbane,2,3,1,1,2,2,1,1,2,...,2,2,4,3,2,1,3,2,3,1
1,ibm_brisbane,4,1,3,2,1,1,1,1,3,...,3,3,3,2,1,2,1,0,2,2
2,ibm_brisbane,2,3,1,2,1,0,2,2,4,...,3,4,1,1,1,3,2,0,3,2
3,ibm_brisbane,2,2,2,2,4,1,1,3,2,...,0,1,0,3,2,3,1,3,0,2
4,ibm_brisbane,1,1,3,3,2,3,3,0,1,...,3,2,1,1,3,2,3,4,3,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99995,ibm_kyiv,4,1,4,3,1,0,2,2,3,...,2,3,2,3,3,4,2,3,2,3
99996,ibm_kyiv,3,0,3,2,0,4,4,2,2,...,2,2,3,1,3,1,3,1,2,1
99997,ibm_kyiv,2,2,2,1,1,1,2,4,1,...,3,2,2,4,2,1,2,3,0,1
99998,ibm_kyiv,2,1,3,3,0,2,1,3,1,...,2,3,4,2,0,0,3,1,2,2


In [60]:
# Splitting intro training and testing data: 80% and 20% split

X = df_testing_features.drop(columns=df_testing_features.columns[0]).values
y = df_testing_features[df_testing_features.columns[0]].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### GB model

In [61]:
accuracy = sk_learn_gradient_boosting(X_train, y_train, X_test, y_test)
print('GB accuracy: ', accuracy)

fitting model
model fitted, now making predictions on test data
GB accuracy:  0.5348


### Basic MLP model 
**nodes in hidden layer = size of input layer (for consistency)**

In [138]:
num_features = df_testing_features.shape[1] - 1
accuracy = sk_learn_MLP(X_train, y_train, X_test, y_test, num_features)
print('MLP accuracy: ', accuracy)

fitting model
model fitted, now making predictions on test data
MLP accuracy:  0.6516
