This notebook is a part of the work 'Nucleation patterns of polymer crystals analyzed by machine learning models' and is written by Atmika Bhardwaj (bhardwaj@ipfdd.de).

The following machine learning (ML) models are employed: AutoEncoders (AE), Hierarchical Clustering (HC), Gaussian Mixture Models (GMM), Multilayer Perceptron (MLP) to harness the capabilities of ML algorithms to unfold the details associated with the different phases that emerge during polymer crystallization. We start by reading a coordinate file (dump file from LAMMPS) and analyze it to study the local environmental information of every coarse-grained bead. Then, we train an AE to compress that information and train a GMM on the compressed data to classify each coarse-grained into amorphous or crytalline depending on its enviromental fingerprint.

This notebook imports all the functions from another file called all_functions.py


In [None]:
import glob
import random
import pickle
import keras
import numpy                  as     np
from   numpy.random           import seed
import matplotlib.pyplot      as     plt
import matplotlib.cm          as     cm
import h5py                   as     h5py
from   mpl_toolkits.mplot3d   import Axes3D
from   matplotlib.offsetbox   import TextArea, DrawingArea, OffsetImage, AnnotationBbox
import matplotlib.image       as mpimg
from   natsort                import natsorted
from   sklearn.decomposition  import KernelPCA
import sys
from   all_functions          import *
import matplotlib.ticker      as mticker
import our_colors
import control_parameters

param           = control_parameters.get_defaults()
ocol            = our_colors.generate_colors()
olab            = our_colors.generate_labels()
pstyle          = our_colors.PlotStyle()
skip_processing = True
seed(1)
plt.rcParams["figure.figsize"]  = (6,5)
plt.rcParams["figure.dpi"]      = 300
plt.rcParams['xtick.labelsize'] = 16
plt.rcParams['ytick.labelsize'] = 16

ps_am       = 1.0               # crystalline point size
class_scale = 3.0
ps_cr       = class_scale*ps_am # amorphous point size


In [None]:
import matplotlib
import sklearn
import sys
print("numpy:",np.__version__)
print("h5py:",h5py.__version__)
print("matplotlib:",matplotlib.__version__)
print("sklearn:",sklearn.__version__)
print("python:",sys.version)

In [None]:
g                = natsorted(glob.glob('../../01_raw_data/dump_files/dump.*'))
h5_file          = h5py.File('../all_fingerprints.h5', 'r')
h5_keys          = natsorted(list(h5_file.keys()))
file_num         = -1
obj              = ReadCoordinates(g[file_num], param["chainlength"])


In [None]:
print ("Table 1: ")
for tlabel in ("t_pre","t_tr","t_min","t_end"):
    t      = param[tlabel]
    TLJ    = control_parameters.calcLJTempAtTime(t)
    Treal  = control_parameters.calcRealTemp(TLJ)
    tau    = (1.6+3.5)*.5e-12
    MDstep = 0.01*tau
    mus    = MDstep*t*1e6
    print (tlabel,t,mus,TLJ,Treal ) 
                

In [None]:
#filename = '../../01_raw_data/dump_files/simulation_files/filtered.dat'
#data     = np.loadtxt(filename)
#temp     = data[:, 1]
#vol      = data[:, 6]
#np.savetxt('../../03_figures/Fig01/Fig01a.csv', [temp, vol], delimiter=",")

filename = '../../03_figures/Fig01/Fig01a.csv'
data     = np.loadtxt(filename, delimiter=",")
temp     = data[0]
vol      = data[1]

fig  = plt.figure(figsize=(9,6))
ax   = fig.add_subplot()
ax2  = ax.twiny()
ax2.xaxis.tick_top()
tmin_i = 40
tmax_i = 56
Tmin_i = control_parameters.calcLJTempAtTime(tmax_i)
Tmax_i = control_parameters.calcLJTempAtTime(tmin_i)
ax2.set_xlim  (tmax_i, tmin_i)
ax.set_xlim  (Tmin_i,Tmax_i)

dt        = 1.0
times     = np.arange(len(temp))/(1.*len(temp))*len(g)
tidx      = (times/dt).astype(int)
n_result  = tidx[-1]+1
v1        = np.zeros(n_result)
v2        = np.zeros(n_result)
count     = np.zeros(n_result)

for i in np.arange(len(tidx)):
    j         = tidx[i]
    count[j] += 1
    v1[j]    += vol[i]/obj.totalAtoms
    v2[j]    += (vol[i]/obj.totalAtoms)**2

v1    = v1/count
v2    = v2/count
v_err = 2.*np.sqrt((v2-v1**2))
t_cg  = ((np.arange(n_result))-.5)*dt
MS    = 12

ax.scatter(temp, vol/obj.totalAtoms, alpha = 0.9, s=0.4, c="grey")
ax2.errorbar(t_cg, v1, yerr=v_err, alpha = 0.9, ms = MS, marker = "o", ls="",capsize = 10)

ax2.axvline(param["t_pre"], color = ocol["t_pre"],  ls = '--', lw = 2, label = '')
ax2.axvline(param["t_tr"],  color = ocol["t_tr"],   ls = '--', lw = 2, label = '')
ax2.axvline(param["t_min"], color = ocol["t_min"],  ls = '--', lw = 2, label = '')
ax2.axvline(param["t_end"], color = ocol["t_end"],  ls = '--', lw = 2, label = '')

ypos = 0.459
xadd = -0.5
ax2.text(param["t_pre"]+xadd, ypos,'$t_{pre}$', color = ocol["t_pre"], fontsize=40)
ax2.text(param["t_tr"]+xadd,  ypos,'$t_{tr}$' , color = ocol["t_tr"],  fontsize=40)

ax2.tick_params(axis='x', labelsize=27)
ax.tick_params (axis='x', labelsize=27)
ax.tick_params (axis='y', labelsize=27)

yl1, yl2 = 0.4555, 0.4595
ax.set_yticks(np.arange(0.456, yl2, 0.002))
ax.set_ylim(yl1, yl2)
ax2.arrow(20, 0.4635, 20, -0.0028, width=.0003, head_length=3, length_includes_head=True)
plt.savefig('../../03_figures/Fig01/Fig01a_inset.png',bbox_inches="tight")


In [None]:
filename = '../Cp_t_interval40.dat'
data     = np.loadtxt(filename, delimiter=",")
print(data.shape)
temp     = data[:,-1]
cp       = data[:,1]
time     = data[:,0]

filename   = '../Cp_t_interval100.dat'
data_cg    = np.loadtxt(filename, delimiter=",")
print(data_cg.shape)
temp_cg    = data_cg[:,-1]
cp_cg      = data_cg[:,1]
time_cg    = data_cg[:,0]

fig  = plt.figure(figsize=(9,6))
ax   = fig.add_subplot()
ax2  = ax.twiny()
ax2.xaxis.tick_top()
tmin_i = 40
tmax_i = 56
Tmin_i = control_parameters.calcLJTempAtTime(tmax_i)
Tmax_i = control_parameters.calcLJTempAtTime(tmin_i)
ax2.set_xlim  (tmax_i, tmin_i)
ax.set_xlim  (Tmin_i,Tmax_i)

ax.plot(temp, cp, alpha = 0.9, ms=0.4, c="grey", ls="--")
ax.plot(temp_cg, cp_cg, alpha = 0.9, ms = MS, marker = "o", ls="-")

ax2.axvline(param["t_pre"], color = ocol["t_pre"],  ls = '--', lw = 2, label = '')
ax2.axvline(param["t_tr"],  color = ocol["t_tr"],   ls = '--', lw = 2, label = '')
ax2.axvline(param["t_min"], color = ocol["t_min"],  ls = '--', lw = 2, label = '')
ax2.axvline(param["t_end"], color = ocol["t_end"],  ls = '--', lw = 2, label = '')

ypos  = 25
xadd  = -0.5
ax2.text(param["t_pre"]+xadd, ypos,'$t_{pre}$', color = ocol["t_pre"], fontsize=40)
ax2.text(param["t_tr"]+xadd,  ypos,'$t_{tr}$',  color = ocol["t_tr"],  fontsize=40)

ax2.tick_params(axis='x', labelsize=27)
ax.tick_params(axis='x',  labelsize=27)
ax.tick_params(axis='y',  labelsize=27)

ax.set_ylim(3, 30)
ax2.arrow(20, 0.4635, 20, -0.0028, width=.0003, head_length=3, length_includes_head=True)
plt.savefig('../../03_figures/Fig01/Fig01b_inset.png',bbox_inches="tight")


In [None]:
filename = '../../03_figures/Fig01/Fig01a.csv'
data     = np.loadtxt(filename, delimiter=",")
temp     = data[0]
vol      = data[1]

fig      = plt.figure(figsize=(10,7))
ax       = fig.add_subplot()
ax2      = ax.twiny()

tmin_i = 0
tmax_i = 76
Tmin_i = control_parameters.calcLJTempAtTime(tmax_i)
Tmax_i = control_parameters.calcLJTempAtTime(tmin_i)
ax2.set_xlim  (tmax_i, tmin_i)
ax.set_xlim  (Tmin_i,Tmax_i)

ax.plot(temp, vol/obj.totalAtoms, alpha = 0.9)
ax.set_xlabel('Temperature (LJ-units)', fontsize = 24, labelpad = 11) 
ax.set_ylabel('Specific volume ($\sigma^3$)',  fontsize = 24, labelpad = 10)

ax2.xaxis.tick_top()
ax2.set_xlabel('Time, $t$ ($10^6$ MD steps)', fontsize = 24, labelpad = 11) 
ax2.xaxis.set_label_position('top') 

ax2.axvline(param["t_pre"],    color = ocol["t_pre"],  ls = '--', lw = 2, label = '')
ax2.axvline(param["t_tr"],     color = ocol["t_tr"],   ls = '--', lw = 2, label = '')
ax2.axvline(param["t_min"],    color = ocol["t_min"],  ls = '--', lw = 2, label = '')
ax2.axvline(param["t_end"]-.2, color = ocol["t_end"],  ls = '--', lw = 2, label = '')

ypos    = 0.4262
xadd    = 1.5
yincr   = 0.003
thandle = ax2.text(param["t_pre"]+xadd,  ypos+2*yincr, '$t_{pre}$',color = ocol["t_pre"],fontsize=22)
thandle.set_bbox(dict(facecolor='white', alpha=0.8, edgecolor='None'))
thandle = ax2.text(param["t_tr"]+xadd,   ypos+yincr,   '$t_{tr}$' ,color = ocol["t_tr"], fontsize=22)
thandle.set_bbox(dict(facecolor='white', alpha=0.8, edgecolor='None'))
thandle = ax2.text(param["t_min"]+xadd,  ypos,         '$t_{min}$',color = ocol["t_min"],fontsize=22)
thandle.set_bbox(dict(facecolor='white', alpha=0.8, edgecolor='None'))
thandle = ax2.text(param["t_end"]-xadd,  ypos,         '$t_{end}$',color = ocol["t_end"],fontsize=22)
thandle.set_bbox(dict(facecolor='white', alpha=0.8, edgecolor='None'))

ypos    = 0.43
ax2.text(36, 0.463, 'Cooling', fontsize=24, rotation=8)

tlabel  = 70
tau     = 2.55e-12
text    = "$["+str(round(tau*tlabel*1e6*0.01*1e6,1))+"\mathrm{\mu s}]$"
ax2.text(tlabel+3, 0.4675, text, fontsize=20)

Tlabel  = 0.70
Tlabels = np.arange(0.75, 0.9, 0.05)

label_texts = list()
for T in Tlabels:
    Treal= control_parameters.calcRealTemp(T)
    text = "$"+str(round(T,2))+"$\n $["+str(int(round(Treal+1e-8,0)))+"\mathrm{K}$]"
    label_texts.append(text)
ax.set_xticks(Tlabels, label_texts)

ax.set_ylim(0.425, 0.47)

Ylabels  = np.arange(0.43, 0.47, 0.01)
ax.set_yticks(Ylabels)

Ylabels  = [0.44, 0.46]
for Y in Ylabels:
    text = "[$"+str(round(Y*.53**3,3))+"\mathrm{nm^3}$]"
    ax.text(0.75+0.002, Y-0.0008, text, fontsize=20)

text     = "$["+str(round(tau*tlabel*1e6*0.01*1e6,1))+"\mathrm{\mu s})$"

ax2.tick_params(axis='x', labelsize=21)
ax.tick_params (axis='x', labelsize=21)
ax.tick_params (axis='y', labelsize=21)

arr_inset = mpimg.imread('../../03_figures/Fig01/Fig01a_inset.png')
imagebox  = OffsetImage(arr_inset, zoom=0.133)
ab        = AnnotationBbox(imagebox, (0.85, 0.444),frameon = False,zorder=-1)
ax.add_artist(ab)

ax2.arrow(20, 0.4635, 20, -0.0028, width=.0003, head_length=3, length_includes_head=True)
plt.savefig('../../03_figures/Fig01/Fig01a.png',bbox_inches="tight")


In [None]:
filename   = '../Cp_t_interval100.dat'
data_cg    = np.loadtxt(filename, delimiter=",")
print(data_cg.shape)
temp_cg    = data_cg[:,-1]
cp_cg      = data_cg[:,1]
time_cg    = data_cg[:,0]

np.savetxt('../../03_figures/Fig01/Fig01b.csv', np.c_[temp_cg, cp_cg, time_cg], delimiter=",")

fig  = plt.figure(figsize=(10,7))
ax   = fig.add_subplot()
ax2  = ax.twiny()

tmin_i = 0
tmax_i = 76
Tmin_i = control_parameters.calcLJTempAtTime(tmax_i)
Tmax_i = control_parameters.calcLJTempAtTime(tmin_i)
ax2.set_xlim  (tmax_i, tmin_i)
ax.set_xlim  (Tmin_i,Tmax_i)


ax.plot(temp_cg, cp_cg, alpha = 0.9,ms = 0.5*MS, marker = "o", ls="-")
ax.set_xlabel('Temperature (LJ-units)',  fontsize = 24, labelpad = 11) 
ax.set_ylabel('Cp (LJ-units)', fontsize = 24, labelpad = 10)
#ax.set_xlim(0.75, 0.9)

ax2.xaxis.tick_top()
#ax2.set_xlim  (len(g), 0.)
ax2.set_xlabel('Time, $t$ ($10^6$ MD steps)', fontsize = 24, labelpad = 11) 
ax2.xaxis.set_label_position('top') 

ax2.axvline(param["t_pre"],    color = ocol["t_pre"],  ls = '--', lw = 2, label = '')
ax2.axvline(param["t_tr"],     color = ocol["t_tr"],   ls = '--', lw = 2, label = '')
ax2.axvline(param["t_min"],    color = ocol["t_min"],  ls = '--', lw = 2, label = '')
ax2.axvline(param["t_end"]-.2, color = ocol["t_end"],  ls = '--', lw = 2, label = '')

ypos    = 12
xadd    = 2
yincr   = 30
thandle = ax2.text(param["t_pre"]+xadd,  ypos+3*yincr,   '$t_{pre}$', color = ocol["t_pre"], fontsize=22)
thandle.set_bbox(dict(facecolor='white', alpha=0.8, edgecolor='None'))
thandle = ax2.text(param["t_tr"]+xadd,   ypos+2.5*yincr, '$t_{tr}$' , color = ocol["t_tr"],  fontsize=22)
thandle.set_bbox(dict(facecolor='white', alpha=0.8, edgecolor='None'))
thandle = ax2.text(param["t_min"]+xadd,  ypos,           '$t_{min}$', color = ocol["t_min"], fontsize=22)
thandle.set_bbox(dict(facecolor='white', alpha=0.8, edgecolor='None'))
thandle = ax2.text(param["t_end"]-xadd,  ypos,           '$t_{end}$', color = ocol["t_end"], fontsize=22)
thandle.set_bbox(dict(facecolor='white', alpha=0.8, edgecolor='None'))

arr_inset = mpimg.imread('../../03_figures/Fig01/Fig01b_inset.png')
imagebox  = OffsetImage(arr_inset, zoom=0.133)
ab        = AnnotationBbox(imagebox, (0.855, 60), frameon = False, zorder = -1)
ax.add_artist(ab)

Tlabels     = np.arange(0.75, 0.9, 0.05)
label_texts = list()

for T in Tlabels:
    #text    = "$"+str(round(T, 2))+"$\n $["+str(int(round(495/0.9*T)))+"\mathrm{K}$]"
    Treal= control_parameters.calcRealTemp(T)
    text = "$"+str(round(T,2))+"$\n $["+str(int(round(Treal+1e-8,0)))+"\mathrm{K}$]"
    label_texts.append(text)
    
ax.set_xticks  (Tlabels,label_texts)
ax2.tick_params(axis='x', labelsize=21)
ax.tick_params (axis='x', labelsize=21)
ax.tick_params (axis='y', labelsize=21)

text  = "$["+str(round(tau*tlabel*1e6*0.01*1e6, 1))+"\mathrm{\mu s}]$"
ax2.text(tlabel+3, 103, text, fontsize=20)

plt.savefig('../../03_figures/Fig01/Fig01b.png', bbox_inches="tight")


# Stem length (SL) labels for whole time series

In [None]:
def stemlen(arr):
    
    angle = 30
    stem  = []
    index = 0
    while (index < len(arr)):
        temp = index
        s    = 0
        
        if (arr[index] <= angle):
            while (temp < len(arr) and arr[temp] <= angle):   #count consecutive tt states > 30° or 150°
                s    += 1
                temp += 1
            arr[index:index+s] = s
            index = temp

        elif (arr[index] > angle):
            arr[index] = 0
            index += 1
    return arr

def stemlenOnCosines(arr):
    
    angle     = 30
    threshold = np.cos(angle/360.*2.*3.1415926535)
    stem      = []
    index     = 0
    while (index < len(arr)):
        temp = index
        s    = 0
        if (arr[index] >= threshold):
            while (temp < len(arr) and arr[temp] >= threshold):   #count consecutive tt states > 30° or 150°
                s    += 1
                temp += 1
            arr[index:index+s] = s
            index = temp

        elif (arr[index] < threshold):
            arr[index] = 0
            index += 1
    return arr

def getSL(mol):
    
    stems = []
    for k in set(mol):
        
        temp    = obj.bonds[mol == k]  #stores bond vectors of one chain at a time (of 1000 monomers)
        a       = temp[:-1]            #all except the last  bond vector
        b       = temp[1:]             #all except the first bond vector
        temp1   = np.sum(a*b,axis=1)
        temp2   = temp1[:1]
        cosines = np.concatenate([temp2,temp1]).reshape(-1,1)
        stems.append(stemlenOnCosines(cosines))
    stems       = np.concatenate(stems)
    return stems

skip_SL = True;
if not skip_SL:

    all_labels   = []
    all_labels2  = []
    for i in g:
        
        obj         = ReadCoordinates(i, param["chainlength"])
        mol         = obj.molID
        bonds       = obj.bonds
        stems       = getSL(mol)
            
        label  = stems.copy()
        label2 = stems.copy()
        
        label[label   <= param["boundary_SL"]]  = 0
        label[label   >  param["boundary_SL"]]  = 1

        label2[label2 <= param["boundary_SL2"]] = 0
        label2[label2 >  param["boundary_SL2"]] = 1
        
        label  = np.concatenate(label)
        label2 = np.concatenate(label2)
        
        all_labels.append (label)
        all_labels2.append(label2)

        label0    = np.sum(label  == 0)
        label1    = np.sum(label  == 1)
        label20   = np.sum(label2 == 0)
        label21   = np.sum(label2 == 1)
        print(i, str('  '), label0, label1, label20, label21) 

    filename   = '../../02_processed_data/Labels/SL_labels.sav'
    pickle.dump(all_labels, open(filename, 'wb'), protocol=pickle.HIGHEST_PROTOCOL)

    filename   = '../../02_processed_data/Labels/SL_labels2.sav'
    pickle.dump(all_labels2, open(filename, 'wb'), protocol=pickle.HIGHEST_PROTOCOL)


In [None]:
stems  = []
mol    = obj.molID
bonds  = obj.bonds
stems  = getSL(mol)

np.savetxt('../../03_figures/Fig05/Fig05a.csv', stems, delimiter=",")


In [None]:
filename = '../../03_figures/Fig05/Fig05a.csv'
stems    = np.loadtxt(filename, delimiter=",")
binsize  = 1
fig, ax  = plt.subplots(figsize=(9,7))

hist, bins      = np.histogram(stems[np.logical_and(stems <= param["boundary_SL"],stems > 0)], bins=.5*binsize+np.arange(0,param["boundary_SL"]+binsize,binsize))
hist_normalized = hist / obj.totalAtoms
plt.hist(bins[:-1], bins, weights=hist_normalized, color = ocol["amorph"], alpha = 0.8, label = olab["sSL"] + olab["am"])

hist, bins      = np.histogram(stems[stems > param["boundary_SL"]], bins=.5*binsize+np.arange(param["boundary_SL"],np.max(stems)+binsize,binsize))
hist_normalized = hist / obj.totalAtoms
plt.hist(bins[:-1], bins, weights=hist_normalized, color = ocol["SL"], alpha = 0.8, label = olab["sSL"] + olab["cr"])

ax.axvline(x = param["boundary_SL"], color = 'k', linestyle = '-', lw = 5, alpha = 0.2)
ax.axvline(x = param["boundary_SL2"], ymax = 0.3, color = 'black', linestyle = '--', lw = 1, alpha = 1)
ax.text(param["boundary_SL2"]+.1, 0.008,'$*$',color = "black",fontsize=25)

ax.set_xlabel('Stem length, $d_{tt}$', fontsize = 25)
ax.set_ylabel('Fraction of monomers',  fontsize = 25)

ax.tick_params(axis='both', labelsize=18)
lgnd         = ax.legend(fontsize=25) 
plt.savefig('../../03_figures/Fig05/Fig05a.png')


In [None]:
g          = natsorted(glob.glob('../../01_raw_data/dump_files/dump.*'))
h5_file    = h5py.File('../all_fingerprints.h5', 'r')
h5_keys    = natsorted(list(h5_file.keys()))
file_num   = param["t_pre"]-1
obj        = ReadCoordinates(g[file_num], param["chainlength"])
mol        = obj.molID
bonds      = obj.bonds
stems      = getSL(mol)
np.savetxt('../../03_figures/Fig10/Fig10c1.csv', stems, delimiter=",")


In [None]:
idx         = stems>param["boundary_SL2"]
print ( idx, idx.shape ) 
stem_select = np.arange(len(stems))[idx[:,0]]

print (stem_select)

print ( stem_select[1:]-stem_select[:len(stem_select)-1])


In [None]:
filename   = '../../03_figures/Fig10/Fig10c1.csv'
stems      = np.loadtxt(filename, delimiter=",")
binsize    = 1
fig, ax    = plt.subplots(figsize=(9,5))

hist, bins      = np.histogram(stems[np.logical_and(stems <= param["boundary_SL2"],stems > 0)], bins=.5*binsize+np.arange(0,param["boundary_SL2"]+binsize, binsize))
hist_normalized = hist / obj.totalAtoms
plt.hist(bins[:-1], bins, weights=hist_normalized, color = ocol["amorph"], alpha = 0.8, label = olab["sSLII"] + olab["am"])

hist, bins      = np.histogram(stems[stems > param["boundary_SL2"]], bins=.5*binsize+np.arange(param["boundary_SL2"], np.max(stems)+binsize, binsize))
hist_normalized = hist / obj.totalAtoms
plt.hist(bins[:-1], bins, weights=hist_normalized, color = ocol["SL"], alpha = 0.8, label = olab["sSLII"] + olab["cr"])

ax.axvline(x = param["boundary_SL2"], ymax = 0.3, color = 'black', linestyle = '--', lw = 1, alpha = 1)
ax.text(param["boundary_SL2"]+.3, 1e-4, '*', color = "black", fontsize=25)

plt.text(15, 2e-3, '$t=t_{pre}$', color=ocol["t_pre"], fontsize=28)

ax.set_xlabel('Stem length, $d_{tt}$',     fontsize = 27)
ax.set_ylabel('Fraction of monomers', fontsize = 27)

ax.tick_params(axis='both', labelsize=18)
lgnd        = ax.legend(fontsize=25) 
plt.tight_layout()
plt.yscale('log')
plt.savefig('../../03_figures/Fig10/Fig10c1.png', bbox_inches="tight")


# Average orientation (AO) for whole time series

In [None]:
file_num       = -1
h5_file_avgor  = h5py.File('../../02_processed_data/Labels/AO_P2.h5', 'r')
h5_keys_avg    = natsorted(list(h5_file_avgor.keys()))
target_lab     = np.array(h5_file_avgor.get(h5_keys_avg[file_num])).copy()
target_resc    = (2*target_lab + 1) / 3
np.savetxt('../../03_figures/Fig05/Fig05b.csv', target_resc, delimiter=",")

filename       = '../../03_figures/Fig05/Fig05b.csv'
target_resc    = np.loadtxt(filename, delimiter=",")
binsize        = 0.04
fig, ax        = plt.subplots(figsize=(9,7))

hist, bins     = np.histogram(target_resc[target_resc <= param["boundary_AO"]], bins=np.arange(0,param["boundary_AO"]+.5*binsize,binsize))
hist_normalized= hist / obj.totalAtoms
plt.hist(bins[:-1], bins, weights=hist_normalized, color = ocol["amorph"], alpha = 0.8, label = olab["sAO"] + olab["am"])

hist, bins     = np.histogram(target_resc[target_resc > param["boundary_AO"]], bins=np.arange(param["boundary_AO"],1.0+.5*binsize,binsize))
hist_normalized= hist / obj.totalAtoms
plt.hist(bins[:-1], bins, weights=hist_normalized, color = ocol["AO"], alpha = 0.8, label = olab["sAO"] + olab["cr"])

ax.axvline(x = param["boundary_AO"], color = 'k', linestyle = '-', lw = 5, alpha = 0.2)
ax.axvline(x = param["boundary_AO2"], ymax = 0.22, color = 'black', linestyle = '--', lw = 1, alpha = 1)
ax.text(param["boundary_AO2"]+0.01,0.01,'*',color = "black",fontsize=25)

plt.xlabel(r'$S_{AO};\langle cos^2(\Theta)\rangle$', fontsize = 25)
plt.ylabel('Fraction of monomers',  fontsize = 25)

ax.tick_params(axis='both', labelsize=18)
lgnd         = ax.legend(fontsize=25)
plt.savefig('../../03_figures/Fig05/Fig05b.png')


In [None]:
file_num       = param["t_pre"]-1
h5_file_avgor  = h5py.File('../../02_processed_data/Labels/AO_P2.h5', 'r')
h5_keys_avg    = natsorted(list(h5_file_avgor.keys()))
target_lab     = np.array(h5_file_avgor.get(h5_keys_avg[file_num])).copy()
target_resc    = (2*target_lab + 1) / 3
np.savetxt('../../03_figures/Fig05/Fig05b.csv', target_resc, delimiter=",")

filename       = '../../03_figures/Fig05/Fig05b.csv'
target_resc    = np.loadtxt(filename, delimiter=",")
binsize        = 0.005

np.savetxt('../../03_figures/Fig10/Fig10c2.csv', target_resc, delimiter=",")

fig, ax        = plt.subplots(figsize=(9,4.65))
hist, bins     = np.histogram(target_resc[target_resc <= param["boundary_AO2"]], bins=np.arange(0,param["boundary_AO2"]+.5*binsize,binsize))
hist_normalized= hist / obj.totalAtoms
plt.hist(bins[:-1], bins, weights=hist_normalized, color = ocol["amorph"], alpha = 0.8, label = olab["sAOII"] + olab["am"])
hist, bins     = np.histogram(target_resc[target_resc > param["boundary_AO2"]], bins=np.arange(param["boundary_AO2"],1.0+.5*binsize,binsize))
hist_normalized= hist / obj.totalAtoms
plt.hist(bins[:-1], bins, weights=hist_normalized, color = ocol["AO"], alpha = 0.8, label = olab["sAOII"] + olab["cr"])

ax.axvline(x = param["boundary_AO2"], ymax = 0.22, ymin=0.05, color = 'black', linestyle = '--', lw = 1, alpha = 1)
ax.text(param["boundary_AO2"]+0.01,0.0001,'*',color = "black",fontsize=25)

plt.text(0.55,0.0003,'$t=t_{pre}$',color=ocol["t_pre"], fontsize=28)

ax.tick_params(axis='both', labelsize=18)
lgnd          = plt.legend(fontsize=25)
plt.xlabel(r'$S_{AO};\langle cos^2(\Theta)\rangle$', fontsize = 27)
plt.ylabel('Fraction of monomers',  fontsize = 26)
plt.ylim([0,0.0006])
plt.xlim([0.18,0.68])

plt.savefig('../../03_figures/Fig10/Fig10c2.png', bbox_inches="tight")


In [None]:
h5_file_avgor  = h5py.File('../../02_processed_data/Labels/AO_P2.h5', 'r')
h5_keys_avg    = natsorted(list(h5_file_avgor.keys()))

all_ao_labels  = np.zeros([len(g), obj.totalAtoms])
all_ao_labels2 = np.zeros([len(g), obj.totalAtoms])

for time in np.arange(len(g)):
    print(time)
    target_lab     = np.array(h5_file_avgor.get(h5_keys_avg[time])).copy()
    target_resc    = np.array((2*target_lab + 1)/3)
    target_resc    = np.round(target_resc, 2)
    class0_75      = np.where(target_resc <= param["boundary_AO"])[0]
    all_ao_labels[time][class0_75] = 0
    class1_75      = np.where(target_resc >  param["boundary_AO"])[0]
    all_ao_labels[time][class1_75] = 1

    class0_75      = np.where(target_resc <= param["boundary_AO2"])[0]
    all_ao_labels2[time][class0_75] = 0
    class1_75      = np.where(target_resc >  param["boundary_AO2"])[0]
    all_ao_labels2[time][class1_75] = 1
    
filename = '../../02_processed_data/Labels/AO_labels.sav'
pickle.dump(all_ao_labels,   open(filename, 'wb'), protocol=pickle.HIGHEST_PROTOCOL)
filename = '../../02_processed_data/Labels/AO_labels2.sav'
pickle.dump(all_ao_labels2,   open(filename, 'wb'), protocol=pickle.HIGHEST_PROTOCOL)

# ROC Analysis

In [None]:
def scanOrderParameterBoundaries(orderParameter, boundaries):
    
    output = np.zeros([len(boundaries), len(orderParameter)])
    
    for i in np.arange(len(boundaries)):
        data           = orderParameter.copy()
        boundary       = boundaries[i]
        class0_75      = np.where(data <= boundary)[0]
        output[i][class0_75] = 0
        class1_75      = np.where(data >  boundary)[0]
        output[i][class1_75] = 1

    return output

def ROC_analysis(testlabels, refPositives):
    
    filed     = []
    reflabels = refPositives.copy()
    numRef    = len(reflabels)
    print ( "number of reference counts = ", numRef)

    sls       = []
    numLabels = []

    for i in np.arange(len(testlabels)):

        sl1       = np.where(testlabels[i] == 1)[0]
        true_pos  = len(np.intersect1d(sl1, reflabels))/len(reflabels)
        false_neg = len(set(sl1) - set(reflabels))/len(reflabels)
        print(false_neg, true_pos)
        filed.append([false_neg, true_pos])

        numLabels.append(len(sl1))
    
    d2 = np.sqrt((np.array(numLabels)-numRef)**2)
           
    return numLabels ,np.array(filed), np.argmin(d2)


def ROC_analysis2(testlabels, refPositives):
    
    filed     = []
    numLabels = []
    number    = testlabels.shape[1]
    refP      = np.zeros(number).astype(bool)
    refP[refPositives] = 1
    refN      = 1 - refP

    numRefP   = np.sum(refP)
    numRefN   = np.sum(refN)

    for i in np.arange(len(testlabels)):

        P     = np.zeros(number).astype(bool)
        P[testlabels[i]==1] = True
        N     = 1 - P

        numLabels.append(np.sum(P))

        TP    = np.sum(P*refP)
        FP    = np.sum(P*refN)

        TN    = np.sum(N*refN)
        FN    = np.sum(N*refP)

        TPR   = TP/(TP+FN)
        FPR   = FP/(FP+TN)

        filed.append( (FPR, TPR) )
        
    d2        = np.sqrt((np.array(numLabels)-numRefP)**2)
    filed     = np.array(filed)
    projection= np.sum(filed,axis=1)*2.**-0.5
    
    distance_vec       = filed.copy()
    distance_vec[:,0] -= projection
    distance_vec[:,1] -= projection
    
    d2_diag            = np.sqrt(np.sum(distance_vec*distance_vec, axis=1))

    return np.array(numLabels), filed,np.argmin(d2), np.argmax(d2_diag)


In [None]:
def putTextLabels(filedata, bins, di, lx, ly, color="black", fs=15):
    
    for i in np.arange(0, len(filedata)-1, di):
        x   = filedata[i,0]+lx
        y   = filedata[i,1]+ly
        txt = np.round(bins[i],decimals=2)
        plt.text(x, y, txt, color = color, fontsize=fs)

def niceArrow(dataset, length, offset, angle, color="grey", w=0.008, hl=0.006):
    
    x, y    = dataset[0], dataset[1]
    rad     = angle/360.*2*3.1415926535
    fcos    = np.cos(rad)
    fsin    = np.sin(rad)
    to_x    = x + fcos*(offset+length)
    to_y    = y + fsin*(offset+length)
    lx      = -length*fcos
    ly      = -length*fsin
    print ("arrow for x, y ", x, y, to_x, to_y)
    plt.arrow(to_x, to_y, lx, ly, width=w, head_length=hl, length_includes_head=True, color=color )

In [None]:
file_num= -1
print ("reading trajectory data for time step ...", file_num)
g       = natsorted(glob.glob('../../01_raw_data/dump_files/dump.*'))
h5_file = h5py.File('../all_fingerprints.h5', 'r')
h5_keys = natsorted(list(h5_file.keys()))
obj     = ReadCoordinates(g[file_num], param["chainlength"])
stems   = []
mol     = obj.molID
bonds   = obj.bonds
print ("calc stems...")

stems   = getSL(mol)
binsize = 1
num, slbins    = np.histogram(stems, bins = np.arange(2, 30, binsize).astype(int))
print ( "sl bins = ", slbins)
print ("retrieve P2 data...")

h5_file_avgor  = h5py.File('../../02_processed_data/Labels/AO_P2.h5', 'r')
h5_keys_avg    = natsorted(list(h5_file_avgor.keys()))
target_lab     = np.array(h5_file_avgor.get(h5_keys_avg[file_num])).copy()
target_resc    = (2*target_lab + 1) / 3
binsize        = 0.02
num, aobins    = np.histogram(target_resc, bins = np.arange(0.4, 0.9, binsize))
print ( "ao bins = ",aobins)
filename = '../../02_processed_data/Labels/AE_labels.sav'
aelab    = np.array(pickle.load(open(filename, 'rb')))[file_num]
ae1      = np.where(aelab == 1)[0]
print ("scan for SL boundaries")
sllab    = scanOrderParameterBoundaries(stems, slbins)
print ("scan for AO boundaries")
aolab    = scanOrderParameterBoundaries(target_resc, aobins)


In [None]:
plt.figure(figsize=(6,7))
numberSL, filedata, minIdx, minIdx2 = ROC_analysis2(sllab, ae1)
minIdx        = np.argmin(np.sqrt((slbins-param["boundary_SL"])**2)) # switching back to the idx where SL=15
numberSL_tend = numberSL[minIdx]

plt.scatter(filedata[:,0], filedata[:,1], label = olab["sSL"], marker="v", color=ocol["SL"], s = 70, alpha=0.6, zorder=-1)
putTextLabels(filedata, slbins, 3, 0.005, -0.015, ocol["SL"]*0.3, 8)
niceArrow(filedata[minIdx],  0.05, 0.02, -45, 0.8*ocol["SL"])
niceArrow(filedata[minIdx2], 0.05, 0.02, -45, [0.8, 0.8, 0.8])
plt.text(0.25, 0.75, '$t=t_{end}$', color=ocol["t_end"], fontsize=28)

numberAO, filedata, minIdx, minIdx2 = ROC_analysis2(aolab, ae1)
minIdx = np.argmin(np.sqrt((aobins - param["boundary_AO"])**2))

plt.scatter(filedata[:,0], filedata[:,1], label = olab["sAO"], color=ocol["AO"], s = 70, alpha=0.6)
putTextLabels(filedata, aobins, 2, -0.02, +0.01, ocol["AO"]*0.3, 8)
niceArrow(filedata[minIdx],  0.05, 0.035, -45+180, 0.8*ocol["AO"])
niceArrow(filedata[minIdx2], 0.05, 0.035, -45+180, [0.8, 0.8, 0.8])

plt.ylabel('True Positive Rate',  fontsize = 20)
plt.xlabel('False Positive Rate', fontsize = 20)
plt.legend(loc="lower right")
plt.grid(alpha=0.3)

np.savetxt('../../03_figures/FigS8/FigS8a.csv', filedata, delimiter=",")
plt.savefig('../../03_figures/FigS8/FigS8a.png', bbox_inches="tight")

numberAE = len(ae1)
fig      = plt.figure(figsize=(6,7))
ax       = fig.add_subplot()

ax.plot(aobins, numberAO/1e6, ms=2*ps_cr, color = ocol["AO"], marker = "o", label = olab["sAO"])

ax.axhline(numberAE/1e6,      color = ocol["AE"], ls = '--', lw = 2, label = '', zorder=-1)
ax.axhline(numberSL_tend/1e6, color = ocol["SL"], ls = '--', lw = 2, label = '', zorder=-1)
ax.grid()

ax.set_xlabel("AO boundary", fontsize = 20)
ax.set_ylabel("Crystallinity at $t_{end}$", fontsize = 20)
ax2  = ax.twiny()
ax2.xaxis.tick_top()

ax2.set_xlabel('SL boundary, $d_{tt}$', fontsize = 20, labelpad = 11)
ax2.xaxis.set_label_position('top')

ax2.plot(slbins, numberSL/1e6, ms=2*ps_cr, color = ocol["SL"], marker = "v", label = olab["sSL"])

plt.savefig('../../03_figures/FigS8/FigS8c.png', bbox_inches="tight")


In [None]:
file_num= param["t_pre"] - 1
print ("reading trajectory data for time step ...", file_num)
g       = natsorted(glob.glob('../../01_raw_data/dump_files/dump.*'))
h5_file = h5py.File('../all_fingerprints.h5', 'r')
h5_keys = natsorted(list(h5_file.keys()))
obj     = ReadCoordinates(g[file_num], param["chainlength"])
stems   = []
mol     = obj.molID
bonds   = obj.bonds
print ("calc stems...")

stems   = getSL(mol)
binsize = 1
num, slbins    = np.histogram(stems, bins = np.arange(2, 30, binsize))
print ( "sl bins = ",slbins)
print ("retrieve P2 data...")
h5_file_avgor  = h5py.File('../../02_processed_data/Labels/AO_P2.h5', 'r')
h5_keys_avg    = natsorted(list(h5_file_avgor.keys()))
target_lab     = np.array(h5_file_avgor.get(h5_keys_avg[file_num])).copy()
target_resc    = (2*target_lab + 1) / 3
print (np.sum(target_resc>0.7))

binsize        = 0.01
num, aobins    = np.histogram(target_resc, bins = np.arange(0.4, 0.9, binsize))
print ( "ao bins = ",aobins)
filename = '../../02_processed_data/Labels/AE_labels.sav'
aelab    = np.array(pickle.load(open(filename, 'rb')))[file_num]
ae1      = np.where(aelab == 1)[0]
print ("num AE labels = ", len(ae1))
print ("scan for SL boundaries")
sllab    = scanOrderParameterBoundaries(stems, slbins)
print ("scan for AO boundaries")
aobins   = np.concatenate((np.arange(.4, .55, binsize),np.arange(.55, .65, .5*binsize),np.arange(.65, .9, binsize)))
aolab    = scanOrderParameterBoundaries(target_resc, aobins)


In [None]:
fig = plt.figure(figsize=(6,7))
ax  = fig.add_subplot()

numberSL, filedata, minIdx, minIdx2 = ROC_analysis2(sllab, ae1)
for i in np.arange(len(slbins)):
    print ( i, slbins[i] )
select0  = filedata == 0
filedata[select0]    = 1e-6
filedata = np.log(filedata)/np.log(10.0)
print ("SL ROC:",filedata)
print ( minIdx, slbins[minIdx],  minIdx2, slbins[minIdx2] )

plt.scatter(filedata[:,0], filedata[:,1], label = olab["sSL"], marker="v", color=ocol["SL"], s = 70, alpha=0.6, zorder=-1)

putTextLabels(filedata, slbins, 6, 0.08, -0.15, ocol["SL"]*0.8, 8)
niceArrow(filedata[minIdx], .4, 0.1, -45, 0.8*ocol["SL"], 0.05, 0.05)

niceArrow(filedata[minIdx2], .4, 0.1, -45, [0.8, 0.8, 0.8], 0.05, 0.05)
plt.text(-3, -3, '$t=t_{pre}$', color=ocol["t_pre"], fontsize=28)

numberAO, filedata, minIdx, minIdx2 = ROC_analysis2(aolab, ae1)

for i in np.arange(len(aobins)):
    print ( i, aobins[i] )

select0  = filedata == 0
filedata[select0] = 1e-6
filedata = np.log(filedata)/np.log(10.0)
print ( minIdx, aobins[minIdx],  minIdx2, aobins[minIdx2] )

print ("AO ROC:",filedata)
plt.scatter(filedata[:,0], filedata[:,1], label = olab["sAO"], color=ocol["AO"], s = 70,alpha=0.6)

ignore   = 15
maxIdx   = len(filedata)-ignore
putTextLabels(filedata[:maxIdx], aobins[:maxIdx], 8, -0.1, +0.15, ocol["AO"]*0.8, 8)

print ( "find min idx at ", minIdx, " boundary = ", aobins[minIdx])
niceArrow(filedata[minIdx],.4,0.2,-45+180,0.8*ocol["AO"], 0.05, 0.05)
niceArrow(filedata[minIdx2],.4,0.2,-45+180,[0.8,0.8,0.8], 0.05, 0.05)
plt.ylabel('log$_{10}$ [True Positive Rate]',  fontsize = 19)
plt.xlabel('log$_{10}$ [False Positive Rate]',  fontsize = 19)
plt.legend(loc="lower right")
plt.grid(alpha = 0.3)

labels    = [t.get_text() for t in ax.get_xticklabels()]
locs      = list(ax.get_xticks())
labels[1] = 'Zero'
print ( locs, labels )
ax.set_xticklabels(labels)

labels    = [t.get_text() for t in ax.get_yticklabels()]
locs      = list(ax.get_yticks())
labels[1] = 'Zero'
print ( locs, labels )
ax.set_yticklabels(labels)

np.savetxt('../../03_figures/FigS8/FigS8b.csv', filedata, delimiter=",")
plt.savefig('../../03_figures/FigS8/FigS8b.png', bbox_inches="tight")

numberAE = len(ae1)

fig      = plt.figure(figsize=(6,7))
ax       = fig.add_subplot()
ax.plot(aobins, numberAO/1e6, ms=2*ps_cr, color = ocol["AO"], marker = "o", label = olab["sAO"])

ax.axhline(numberAE/1e6, color = ocol["AE"],ls = '--', lw = 2, label = '')

ax.grid()
ax.set_xlabel("AO boundary",fontsize = 20)
ax.set_ylabel("Crystallinity at $t_{pre}$",fontsize = 20)

ax2  = ax.twiny()
ax2.xaxis.tick_top()

ax2.set_xlabel('SL boundary, $d_{tt}$', fontsize = 20, labelpad = 11)
ax2.xaxis.set_label_position('top') 

ax2.plot(slbins, numberSL/1e6, ms=2*ps_cr, color = ocol["SL"], marker = "v", label = olab["sSL"])
ax.set_yscale('log')
ax2.set_yscale('log')
plt.savefig('../../03_figures/FigS8/FigS8d.png', bbox_inches="tight")



## Conformation labels on point cloud

#### PCA on conformation

In [None]:
filename  = '../../02_processed_data/Training_data/ae_training_data.csv'
combined  = np.loadtxt(filename, delimiter=",")
combined  = combined[:, :10]
combined.shape

In [None]:
conf           = combined.copy()
for i in np.arange(np.shape(combined)[1]):
    conf[:, i] = (2*combined[:,i] + 1)/3

In [None]:
transformer   = KernelPCA(n_components=1, kernel='linear')
X_transformed = transformer.fit_transform(conf)
print(max(X_transformed), min(X_transformed))

In [None]:
def get_cmap(n, name='hsv'):
    '''Returns a function that maps each index in 0, 1, ..., n-1 to a distinct 
    RGB color; the keyword argument name must be a standard mpl colormap name.'''
    return plt.cm.get_cmap(name, n)

sep  = (max(X_transformed) - min(X_transformed)) /6
print(sep)
sep = 0.45
boundary = np.arange(min(X_transformed), max(X_transformed)+0.2, sep)
print(boundary)
cmap = get_cmap(len(boundary))
cmap

In [None]:
filename    = '../../03_figures/Fig06/Fig06.csv'
encoded     = np.loadtxt(filename, delimiter=",")

target_resc = flatten(X_transformed)
j           = 0
p1          = encoded[(target_resc <= boundary[1])]

Zmin=np.min(target_resc)
plt.scatter(p1[:,1], p1[:,0], color = cmap(j),  s = 1, label = '$Z_{conf}\in[$'+ str(round(Zmin,2)) + "," + str(round(boundary[1], 2))+"]", alpha = 0.4)
j           = 1
for i in boundary[1:-1]:
    p1      = encoded[np.logical_and(target_resc > i, target_resc <= i+1)]
    plt.scatter(p1[:,1], p1[:,0], color = cmap(j),  s = 1, label = "["+str(round(i, 2)) +","+ str(round(i+sep, 2))+"]", alpha = 0.4)
    j      += 1

our_colors.formatMap(plt)

lgnd = plt.legend(fontsize=12, handletextpad=0.03, frameon=False,loc=(-.02,0.6))
for handle in lgnd.legendHandles:
    handle.set_sizes([40.0])

plt.savefig('../../03_figures/Fig06/Fig06b.png',bbox_inches="tight")
plt.show()


In [None]:
def scatter_hist(frac_arr):
    num, bins = np.histogram(frac_arr, bins = np.arange(min(X_transformed), max(X_transformed)+0.2, sep))
    X         = (bins[:-1] + bins[1:])/2
    Y         = num
    return X, Y

np.savetxt('../../03_figures/FigS5/FigS5.csv', X_transformed, delimiter=",")
plt.figure(figsize=(8.5,6))
X, Y = scatter_hist(X_transformed)
for i in np.arange(len(X)):
    plt.bar(X[i], Y[i], align   = 'center', color  = cmap(i), alpha=0.6)
plt.xlabel('$Z_{conf}$', fontsize = 25)
plt.ylabel('Number of monomers', fontsize = 22)
plt.savefig('../../03_figures/FigS5/FigS5a.png',bbox_inches="tight")


In [None]:
plt.figure(figsize=(7,6))
rndm_conf = conf.copy()
for i in np.arange(len(boundary)-1):
    filterr       = np.where(np.logical_and(X_transformed > boundary[i], X_transformed <= boundary[i+1]))[0]
    filtered_conf = rndm_conf[filterr]
    avg_conf      = np.array([np.mean(filtered_conf[:,:4]), np.mean(filtered_conf[:,4:7]), np.mean(filtered_conf[:,7:9]), np.mean(filtered_conf[:,-1])])
    print(avg_conf)
    plt.scatter(np.array([1,2,3,4]), avg_conf, color = cmap(i), s=150, alpha=0.5)
    
plt.ylabel(r'$\langle\cos^2 \Theta_{ij}\rangle$', fontsize = 26)
plt.xlabel('$|i-j|$', fontsize = 26)
plt.savefig('../../03_figures/FigS5/FigS5b.png',bbox_inches="tight")


## AO labels on point cloud

In [None]:
def get_cmap(n, name='hsv'):
    '''Returns a function that maps each index in 0, 1, ..., n-1 to a distinct 
    RGB color; the keyword argument name must be a standard mpl colormap name.'''
    return plt.cm.get_cmap(name, n)

boundary = np.arange(0.4, 1.01, 0.1)
print(boundary)
cmap = get_cmap(len(boundary))
cmap

In [None]:
file_num     = -1
filename     = '../../02_processed_data/Training_data/ae_training_idxs.csv'
rnd          = np.int64(np.loadtxt(filename, delimiter=','))

h5_file_avgor= h5py.File('../../02_processed_data/Labels/AO_P2.h5', 'r')
h5_keys_avg  = natsorted(list(h5_file_avgor.keys()))
target_lab   = np.array(h5_file_avgor.get(h5_keys_avg[file_num])[rnd])
target_resc  = (2*target_lab + 1) / 3
np.savetxt('../../03_figures/Fig05/Fig05b.csv', target_resc, delimiter=",")

filename     = '../../03_figures/Fig06/Fig06.csv'
encoded      = np.loadtxt(filename, delimiter=",")

j            = 0
valmin       = np.min(target_resc)
p1           = encoded[(target_resc <= boundary[1])]
plt.scatter(p1[:,1], p1[:,0], color = cmap(j),  s = 2, label = r'$\langle \cos^2 \Theta \rangle \leq$'+str(boundary[1]), alpha=0.4)
j            = 1
for i in np.arange(len(boundary[1:-1]))+1:
    lo       = boundary[i]
    hi       = boundary[i+1]
    p1       = encoded[np.logical_and(target_resc > lo, target_resc <= hi)]
    plt.scatter(p1[:,1], p1[:,0], color = cmap(j),  s = 5, label = r'$\in[$'+str(round(lo, 2))+","+str(round(hi, 2))+"$]$", alpha=0.4)
                
    j       += 1

our_colors.formatMap(plt)

lgnd = plt.legend(fontsize=12,handletextpad=0.03,frameon=False,loc=(-.02,0.6))
for handle in lgnd.legendHandles:
    handle.set_sizes([40.0])
plt.savefig('../../03_figures/Fig06/Fig06c.png',bbox_inches="tight")
plt.show()


### SL on point cloud

In [None]:
filename = '../../03_figures/Fig05/Fig05a.csv'
stems    = np.loadtxt(filename, delimiter=",")
stems

In [None]:
def get_cmap(n, name='hsv'):
    '''Returns a function that maps each index in 0, 1, ..., n-1 to a distinct 
    RGB color; the keyword argument name must be a standard mpl colormap name.'''
    return plt.cm.get_cmap(name, n)

boundary = np.arange(7, max(stems)+1, 4)
cmap = get_cmap(len(boundary))
print(boundary)
cmap

In [None]:
file_num     = -1
filename     = '../../02_processed_data/Training_data/ae_training_idxs.csv'
rnd          = np.int64(np.loadtxt(filename, delimiter=','))
filename     = '../../02_processed_data/Predictions/all_encoded.sav'
encoded      = np.array(pickle.load(open(filename, 'rb')))[file_num][rnd]
target_resc  = stems[rnd]

j            = 0
p1           = encoded[(target_resc <= boundary[0])]
plt.scatter(p1[:,1], p1[:,0], color = cmap(j),  s = 0.3, label = 'SL <= ' + str(boundary[1]))
j            = 1
for i in boundary[1:-1]:
    p1       = encoded[np.logical_and(target_resc > i, target_resc <= i+1)]
    plt.scatter(p1[:,1], p1[:,0], color = cmap(j),  s = 0.3, label = str(round(i, 2)) + ' < SL <= ' + str(round(i+4, 2)))
    j       += 1

our_colors.formatMap(plt)

lgnd = plt.legend(fontsize=12)
for handle in lgnd.legendHandles:
    handle.set_sizes([40.0])


## Main branch formations

In [None]:
filename   = '../../02_processed_data/Predictions/all_encoded.sav'
all_enc    = np.array(pickle.load(open(filename, 'rb')))
filename   = '../../02_processed_data/Labels/AE_labels.sav'
all_aelab  = np.array(pickle.load(open(filename, 'rb')))

In [None]:
timestep = param["t_pre"]-1
yhc      = all_aelab[timestep][rnd]
encoded  = all_enc  [timestep][rnd]

np.savetxt('../../03_figures/Fig07/Fig07a.csv', np.c_[encoded, yhc], delimiter=",")

fig = plt.figure()
p1  = encoded[yhc==0]
plt.scatter(p1[:,1], p1[:,0], c = ocol["amorph"],  s = ps_am, label = olab["sAE"] + olab["am"])
p1  = encoded[yhc==1]
plt.scatter(p1[:,1], p1[:,0], c = ocol["AE"], s = ps_cr, label = olab["sAE"] + olab["cr"])

our_colors.formatMap(plt)
lgnd     = plt.legend(fontsize=18,loc="upper left")
for handle in lgnd.legendHandles:
        handle.set_sizes([40.0])

plt.savefig('../../03_figures/Fig07/Fig07a.png',bbox_inches="tight")

In [None]:
timestep = param["t_tr"]-1
yhc      = all_aelab[timestep][rnd]
encoded  = all_enc  [timestep][rnd]

np.savetxt('../../03_figures/Fig07/Fig07b.csv', np.c_[encoded, yhc], delimiter=",")

fig = plt.figure()
p1  = encoded[yhc==0]
plt.scatter(p1[:,1], p1[:,0], c = ocol["amorph"],  s = ps_am, label = olab["sAE"] + olab["am"])
p1  = encoded[yhc==1]
plt.scatter(p1[:,1], p1[:,0], c = ocol["AE"], s = ps_cr, label = olab["sAE"] + olab["cr"])

our_colors.formatMap(plt)
plt.savefig('../../03_figures/Fig07/Fig07b.png',bbox_inches="tight")

In [None]:
timestep = param["t_min"]-1
yhc      = all_aelab[timestep][rnd]
encoded  = all_enc  [timestep][rnd]

np.savetxt('../../03_figures/Fig07/Fig07c.csv', np.c_[encoded, yhc], delimiter=",")

fig = plt.figure()
p1  = encoded[yhc==0]
plt.scatter(p1[:,1], p1[:,0], c = ocol["amorph"],  s = ps_am, label = olab["sAE"] + olab["am"])
p1  = encoded[yhc==1]
plt.scatter(p1[:,1], p1[:,0], c = ocol["AE"], s = ps_cr, label = olab["sAE"] + olab["cr"])

our_colors.formatMap(plt)

plt.savefig('../../03_figures/Fig07/Fig07c.png',bbox_inches="tight")

## Rmax and bin-size

In [None]:
file_num    = -1
def normg(av):
    bins    = len(av)
    j       = np.arange(bins+1)
    v_shell = np.pi*(4./3.)*(j[1:bins+1]**3-j[:bins]**3)
    rho     = av/v_shell
    rho    /= np.mean(rho[len(rho)-10:])
    return rho  

In [None]:
filename  = '../../02_processed_data/Labels/AE_labels.sav'
all_aelab = np.array(pickle.load(open(filename, 'rb')))[file_num]

counts    = np.loadtxt('../../03_figures/FigS3/FigS3.csv',delimiter=",")

av_0      = np.mean( (counts[all_aelab == 0]), axis = 0)
av_1      = np.mean( (counts[all_aelab == 1]), axis = 0)
av_       = np.mean(  counts, axis = 0)

fig = plt.figure()
x = np.arange(param["dR"], param["Rmax"], param["dR"]) - 0.5*param["dR"]
plt.xticks(np.arange(0, param["Rmax"]+param["dR"]+0.01, param["dR"]))
print (x.shape, av_.shape)

plt.axvline(3.2, color = ocol["t_min"],  ls = '--', lw = 2)
plt.text(3.22, 0.65, '$R_{max}$', color = ocol["t_min"], fontsize=15)

plt.plot(x, normg(av_1), c=ocol["AE"],     marker = 'o', label = olab["sAE"]+olab["cr"], lw = 0.7)
plt.plot(x, normg(av_0), c=ocol["amorph"], marker = 's', label = olab["sAE"]+olab["am"], lw = 0.7)
plt.plot(x, normg(av_),  'black',          marker = 'p', label = 'Unclassified',         lw = 0.7)

plt.xlabel('Radius, R $(\sigma)$', fontsize = 20)
plt.ylabel('g(R)',  fontsize = 20)
plt.legend(fontsize = 18, loc = 'best')
plt.grid()
plt.xlim(0., param["Rmax"]+0.4)
plt.tight_layout()
plt.savefig('../../03_figures/FigS3/FigS3.png',bbox_inches="tight")
