In [2]:
%matplotlib inline
# Importing necessary packages:
from glob import glob
import pandas as pd
import datetime as dt
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from PipeLine import *

In [None]:
def cyl_sumrule_norms_ens_evg(all_in_one):
    """
    cyl_sumrule_norms_ens_evg rescales some of the physical properties in an ensemble-average properties dataframe of the cylindrical
    sum rule project.
    
    Parameters:
    all_in_one: a dataframe of all the ensemble-averaged properties.
    
    Return:
    all_in_one with several new columns.
    """
    
    all_in_one['fsd_normalized'] = 0.0
    all_in_one['gyr_normalized'] = 0.0
    all_in_one['rflory_normalized'] = 0.0
    all_in_one['vfrc_c_normalized'] = (all_in_one['dmon'] * all_in_one['vfrc_crowd'])/all_in_one['dcrowd'] 
    group_properties = ['nmon','dcyl','dcrowd'] # this is the way ensemble group defined in the cylindrical sum rule project
    groups = [list(group_name) for group_name in list(all_in_one.groupby(group_properties).size().index)]
    for nmon, dcyl, dcrowd in groups:
        condition = (all_in_one['nmon'] == nmon) & (all_in_one['dcyl'] == dcyl) & (all_in_one['dcrowd'] == dcrowd) & (all_in_one['vfrc_crowd'] == 0)
        fsd = all_in_one[condition]['fsd'].values[0]
        gyr = all_in_one[condition]['gyr'].values[0]
        rflory = all_in_one[condition]['rflory'].values[0]
        cond_normal = (all_in_one['nmon'] == nmon) & (all_in_one['dcyl'] == dcyl) & (all_in_one['dcrowd'] == dcrowd)
        all_in_one.loc[cond_normal,'fsd_normalized'] = all_in_one.loc[cond_normal,'fsd'] / fsd
        all_in_one.loc[cond_normal,'gyr_normalized'] = all_in_one.loc[cond_normal,'gyr'] / gyr
        all_in_one.loc[cond_normal,'rflory_normalized'] = all_in_one.loc[cond_normal,'rflory'] / rflory
        
    all_in_one['vfrc_crowd'] = all_in_one['vfrc_crowd'].round(decimals=3)
    all_in_one['vfrc_crowd_eff'] = (np.pi * all_in_one['dcrowd'] ** 3 / 6) * all_in_one['ncrowd'] / ((np.pi / 4 * (all_in_one['dcyl']-all_in_one['dcrowd']) ** 2) * all_in_one['lcyl'])
    all_in_one['vfrc_c_eff_normalized'] = (all_in_one['dmon'] * all_in_one['vfrc_crowd_eff']) / all_in_one['dcrowd'] 
    return all_in_one

In [None]:
def all_dfs_in_one(property_files, property_name, ens_avg=False, to_file=True, norm_func=None, **kwargs):
    """
    all_dfs_in_one merges the all the dataframes into one dataframe. df_files can be a list of ensmeble csvs or ensemble_average group csv.
    
    Caution: 
    A simulation group usually results in a graph or curve for the project and refers to a collection of simulations that all have the same values for one or several input parameters of the project.
    An ensemble is a collection of simulations that differs only in their random number seeds, initial conditions, or boundary conditions but have the same input parameters.
    An ensemble-averaged group is an average over all the simulations in an ensemble and usually gives a data point.
    If there are N esmebles, each with M simulations, then there are N ensemble-average groups and N*M simulations in the simulation group.
    
    Paremeters:
    property_files: filenames of dataframes.
    property_name (str): name of the property.
    ens_avg (bool): whether ensemble files or ensemble-averaged files.
    to_file (bool): whether save to file or not
    
    Return:
    A dataframe of all the properties of all the ensembles.
    
    Requirements:
    Pandas, my own cellAttributes class
    """
    all_in_one = []
    for property_file in property_files: #files are organized based on their name
        all_in_one.append(pd.read_csv(property_file[0],comment='#',**kwargs))# each list member is a tuple with one member; see file_reader function.
    all_in_one = pd.concat(all_in_one)
    all_in_one.reset_index(inplace=True,drop=True)
    if (ens_avg == True) and (norm_func != None) and (property_name == 'properties'):
        all_in_one = norm_func(all_in_one)
        df_type = 'all_in_one-ens_avg-normalized.csv'
    elif ens_avg:
        df_type = 'all_in_one-ens_avg.csv'
    else:
        df_type = 'all_in_one.csv'
    if to_file:
        all_in_one.to_csv(property_name+'-'+df_type)
    return all_in_one

In [4]:
csv_files = glob("../sumrule_data/N*-analyze_bug/N*.csv")
property_files = PipeLine.file_reader(csv_files,extensions=['-properties.csv'])
properties_all_in_one = PipeLine.all_dfs_in_one(property_files, 'properties', index_col=0)
property_files = PipeLine.file_reader(csv_files,extensions=['-properties-ens_avg.csv'])
properties_all_in_one_ens_avg = PipeLine.all_dfs_in_one(property_files,'properties', ens_avg=True, norm_func=PipeLine.cyl_sumrule_norms_ens_evg, index_col=0)

Total number of files is  11
Path to the first tuple of the sorted file:  ('../sumrule_data/N1000D15.0ac1.0-analyze_bug/N1000D15.0ac1.0-properties.csv',)
Total number of files is  11
Path to the first tuple of the sorted file:  ('../sumrule_data/N1000D15.0ac1.0-analyze_bug/N1000D15.0ac1.0-properties-ens_avg.csv',)


In [None]:
csv_files = glob("../sumrule_data/N*-analyze_bug/N*.csv")
geometry = 'cylinder'
df_name = 'all_in_one-ens_avg'
ext=['-properties-ens_avg.csv']
ens_evg_properties_files = PipeLine.file_reader(csv_files,extensions=ext)
ens_avg_properties_df = properties(ens_evg_properties_files, geometry,index_col=0)
ens_avg_properties_df = ens_avg_properties_df.round(4)
ens_avg_properties_df['fsd_normalized'] = 0.0
ens_avg_properties_df['gyr_normalized'] = 0.0
ens_avg_properties_df['rflory_normalized'] = 0.0
ens_avg_properties_df['vfrc_c_normalized'] = (ens_avg_properties_df['dmon'] * ens_avg_properties_df['vfrc_crowd']) / ens_avg_properties_df['dcrowd'] 
unique_simulations = [list(input_set) for input_set in list(ens_avg_properties_df.groupby(['nmon','dcyl','dcrowd']).size().index)]
for nmon, dcyl, dcrowd in unique_simulations:
    condition = (ens_avg_properties_df['nmon'] == nmon) & (ens_avg_properties_df['dcyl'] == dcyl) & (ens_avg_properties_df['dcrowd'] == dcrowd) & (ens_avg_properties_df['vfrc_crowd'] == 0)
    fsd = ens_avg_properties_df[condition]['fsd'].values[0]
    gyr = ens_avg_properties_df[condition]['gyr'].values[0]
    rflory = ens_avg_properties_df[condition]['rflory'].values[0]
    cond_normal = (ens_avg_properties_df['nmon'] == nmon) & (ens_avg_properties_df['dcyl'] == dcyl) & (ens_avg_properties_df['dcrowd'] == dcrowd)
    ens_avg_properties_df.loc[cond_normal,'fsd_normalized'] = ens_avg_properties_df.loc[cond_normal,'fsd'] / fsd
    ens_avg_properties_df.loc[cond_normal,'gyr_normalized'] = ens_avg_properties_df.loc[cond_normal,'gyr'] / gyr
    ens_avg_properties_df.loc[cond_normal,'rflory_normalized'] = ens_avg_properties_df.loc[cond_normal,'rflory'] / rflory

In [None]:
ens_avg_properties_df

In [None]:
def change_legend_name(line, legend_new_labels):
    legend_old_labels = line.legend(fontsize=16,bbox_to_anchor=(1.005, 1), loc=2,edgecolor='black',title_fontsize=16,markerscale=1.5).texts
    for new_label, line in zip(legend_old_labels, legend_new_labels): new_label.set_text(line)

In [None]:
def chainsize_plot(df, xcol, leg_labels, colors, fontsize=20):
    sns.set_context('paper')
    sns.set_style("ticks")
    fig, axes = plt.subplots(nrows=3,ncols=1,sharex=True,figsize=(16,12))
    line1 = sns.lineplot(x=xcol, y="fsd_normalized", hue='dcyl',style='dcrowd', size='nmon', palette=colors, markers=True, markersize=8, data=df,ax=axes[0])
    line2 = sns.lineplot(x=xcol, y="gyr_normalized", hue='dcyl',style='dcrowd', size='nmon', palette=colors, markers=True, markersize=8, data=df, legend=False,ax=axes[1])
    line3 = sns.lineplot(x=xcol, y="rflory_normalized", hue='dcyl',style='dcrowd', size='nmon', palette=colors, markers=True, markersize=8, data=df, legend=False,ax=axes[2])

    xlabels = {"vfrc_crowd":r"$\phi_c$","vfrc_c_normalized":r"${a\phi_c}/{a_c}$"}
    ylabels = {"vfrc_crowd":[r'$\frac{L_{FSD}(\phi_c)}{L_{FSD}(0)}$',r'$\frac{R_{ROG}(\phi_c)}{R_{ROG}(0)}$',r'$\frac{R_{Flory}(\phi_c)}{R_{Flory}(0)}$'],
              "vfrc_c_normalized":[r'$\frac{L_{FSD}({a\phi_c}/{a_c})}{L_{FSD}(0)}$',r'$\frac{R_{ROG}({a\phi_c}/{a_c})}{R_{ROG}(0)}$',r'$\frac{R_{Flory}({a\phi_c}/{a_c})}{R_{Flory}(0)}$']}

    for num, axis in enumerate(axes):
        axis.grid(True,ls=':',lw=1)
        axis.tick_params(axis ='both',direction='inout',width=1)
        axis.set_ylabel(ylabels[xcol][num],fontsize=fontsize)
        PipeLine.yticks(axis,(0,1.0,0.2,0.04),code=True,fontsize=14)

    PipeLine.xticks(axes[2],(0.0,0.4,0.05,0.005),code=True,fontsize=14)
    change_legend_name(line1,leg_labels)
    line3.set_xlabel(xlabels[xcol],fontsize=fontsize)

    picname = "chainsize-"+xcol
    plt.savefig(picname+'.pdf',dpi=300,bbox_inches='tight')

In [None]:
xcol="vfrc_crowd"
df=ens_avg_properties_df
fontsize=20
colors = sns.color_palette(n_colors=3, palette="Set2", desat=1)
leg_labels = [r'$\frac{D}{a}$ (Line color)', '15.0', '20.0', '25.0', r'$N$ (Line size)', 1000, 2000, r'$\frac{a_c}{a}$ (Marker type)', '1.0', '2.0', '4.0']
chainsize_plot(ens_avg_properties_df, "vfrc_crowd", leg_labels, colors, fontsize=20)
chainsize_plot(ens_avg_properties_df, "vfrc_c_normalized", leg_labels, colors, fontsize=20)

In [None]:
fig, ax = plt.subplots(nrows=1,ncols=1,figsize=(16,9))
fontsize = 24
color = 'tab:blue'
line1, = ax.plot(ens_avg_phi_r.iloc[:,0], markersize=0.1, c=color)
#ax.legend()
ax.set_xlabel(r"Cylinder radius ($\frac{D}{2a}$)",fontsize=fontsize)
ax.set_ylabel(r"$\phi_m(r)$",fontsize=fontsize)
ax.tick_params(labelsize=24)

ax2 = ax.twinx()
color = 'tab:red'
line2, = ax2.plot(ens_avg_rho_r.iloc[:,0], rho, markersize=0.1, c=color)
ax2.set_ylabel(r"$\rho_m(r)$",fontsize=fontsize)
ax2.tick_params(labelsize=24)

lines = [line1, line2]

ax.legend(lines, [l.get_label() for l in lines],loc=1,ncol=2,fontsize=fontsize)
#fig.legend()
picname = filename.split("_rHists")[0]+"_local"
plt.savefig(picname+'.pdf',dpi=300)

In [None]:
import os
for direct in rhist_df.columns:
    path = os.path.join("./", direct)
    os.mkdir(path)

In [None]:
fpath = hist_files[24]

histo_collections = np.loadtxt(fpath[0],dtype=np.int32)
bin_edges = np.around(np.loadtxt(fpath[1]),decimals=2)
cell_attrs = PipeLine.cellAttributes(fpath[0],geometry='cylinder',printname=True)
filename = cell_attrs.filename
cylindrical_shell_integrand = lambda r: 2 * np.pi * cell_attrs.dcyl * r

# the sum of rho is not equal to the bulk number density (r=infiity) natom/cell_vol
# this arises from the way we descritize the local number desnity.
rho = PipeLine.local_number_density(histo_collections, bin_edges, cylindrical_shell_integrand)

rmon = cell_attrs.dmon / 2.0
bin_centers = np.around((bin_edges[:-1] + bin_edges[1:]) / 2.0,decimals=2)

range_of_bins = PipeLine.bin_edge_index(rmon, bin_centers, bin_edges)
volume_shares = PipeLine.vol_shares(rmon, bin_centers, bin_edges, range_of_bins, PipeLine.sphere_cylinder_intersection)

# the sum of phi is not equal to the bulk volume fraction (r=infiity) natom*vol_per_atom/cell_vol
# this arises from the way we descritize the local volume fraction and the way we relate it to the local number density.
phi = PipeLine.local_volume_fraction(bin_centers, rho, volume_shares)

fig, ax = plt.subplots(nrows=1,ncols=1,figsize=(16,9))

fontsize = 24
color = 'tab:blue'
line1, = ax.plot(bin_centers, phi, label = r"$\phi_c(R=\infty)={:4.2f}$".format(cell_attrs.vfrc_crowd), markersize=0.1, c=color)
#ax.legend()
ax.set_xlabel(r"Cylinder radius ($\frac{D}{2a}$)",fontsize=fontsize)
ax.set_ylabel(r"$\phi_m(r)$",fontsize=fontsize)
ax.tick_params(labelsize=24)

ax2 = ax.twinx()
color = 'tab:red'
line2, = ax2.plot(bin_centers, rho, label = r"$\rho_c(R=\infty)={:4.2f}$".format(cell_attrs.vfrc_crowd), markersize=0.1, c=color)
ax2.set_ylabel(r"$\rho_m(r)$",fontsize=fontsize)
ax2.tick_params(labelsize=24)

lines = [line1, line2]

ax.legend(lines, [l.get_label() for l in lines],loc=1,ncol=2,fontsize=fontsize)
#fig.legend()
picname = filename.split("_rHists")[0]+"_local"
plt.savefig(picname+'.pdf',dpi=300)

In [None]:
fig, ax = plt.subplots(nrows=1,ncols=1)
nens = 0
hists = 0
phi_c = 0.0 

for filename in ens_avg_properties.filename:
    condition = ens_avg_properties['filename']==filename
    for col in rhist_dfs.columns:
        if col.split('ens')[0] == filename.split('_ens_avg')[0]:
            #print(rhist_dfs[col])
            PipeLine.histo_plotter(ax, rhist_dfs[col].values, lambda r: 2 * ens_avg_properties[condition].dcyl / 2.0 * r, rhist_dfs.index.values, label = r"$\phi_c={}$".format(ens_avg_properties[condition].vfrc_crowd), markersize=0.1)
    #ens_evg_rhist
    
#PipeLine.histo_plotter(ax, hists, lambda r: 2 * cell_attrs.dcyl / 2.0 * r, edges, label = r"$\phi_c={}$".format(phi_c), markersize=0.1)
ax.legend()
ax.set_xlabel(r"Cylinder radius ($\frac{D}{2a}$)")
ax.set_xlabel(r"Cylinder radius ($\frac{D}{2a}$)")