### After t-SNE visualization of the 144 datasets collected (4 AL learning runs included),
### we found the dataset are not structured or grouped in the regard of outcome classes.
### So, I decided to reproduce 24 reactions randomly picked up from the 144 reactions.

In [None]:
import numpy as np
from numpy.random import random
from numpy import vstack, hstack
import pandas as pd
from Data.datasets import save_obj, load_obj, data_preprocess
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from mpl_toolkits.mplot3d import Axes3D
from sklearn.preprocessing import StandardScaler

In [None]:
# import concentration dataframe of all tested 144 reactions after 4th AL
df_tested = pd.read_csv('Data/004.morph phase mapping.csv')
df_tested.index = list(df_tested['index'])
df_tested = df_tested.drop(['index'], axis = 1)

# import volume space
df_pool_vol = load_obj('8R homogeneous volume statespace (Pb2, morph, H2O and FAH constrained)')

In [None]:
# randomly pick up 24 reactions from the 144 completed reactions
np.random.seed(42)
rdm_num = np.random.choice(range(len(df_tested)), size = 24) # generate random number from 0-23
query = df_tested.iloc[rdm_num.ravel()]
query = query.drop(['score'], axis = 1)
save_obj (query, "8 reagent concentration_repeat24_after4AL")
query_idx = query.index

### Generate experiments

In [None]:
from Models.expgen import robot_file_gen_R8
df_pool_query_vol = df_pool_vol.loc[query_idx] # locate volume entry using query index
robot_file_gen_R8(data = df_pool_query_vol, filename = '8R_repeat24_after4AL_robotinput')

### Check reproducility

In [None]:
# generate the dataframe of original experiment scores
df_orig = load_obj("8 reagent concentration_repeat24_after4AL")
df_orig_score = df_tested.filter(list(df_orig.index), axis = 0)
df_orig_score = df_orig_score.filter(['score'], axis = 1)

# generate the dataframe of reproducibility experiment scores
df_rep_score = pd.read_csv('Data/rep_score.csv')
df_rep_score.index = list(df_rep_score['Index'])
df_rep_score = df_rep_score.drop(['Index'], axis = 1)

print("original exp index match reproducibility exp index?", all(df_orig_score.index == df_rep_score.index))

In [None]:
score = 0
for i in range(24):
    if int(df_rep_score.iloc[i]) == int(df_orig_score.iloc[i]):
        score += 1
    else:
        print('index:', df_orig_score.index[i])
        print('Original exp score:', int(df_orig_score.iloc[i]), '|', 'Reproducility exp score:', int(df_rep_score.iloc[i]))
        print('*'*20)
print('Reproducibility is ', score/24*100, '%')

In [None]:
df_orig_score.values.ravel()

In [None]:
df_rep_score.values.ravel()

In [None]:
import pandas as pd
import seaborn as sn
import matplotlib.pyplot as plt

%matplotlib notebook
confusion_matrix = pd.crosstab(df_orig_score['score'], df_rep_score['score'], rownames=['Original'], colnames=['Reproduced'])

sn.heatmap(confusion_matrix, cmap="YlGnBu", annot=True)
plt.show()
plt.savefig('Graphs/Confusion matrix of reproducibility.svg', format = "svg", transparent=True)