In [None]:
## Conclusion

## Introduction

Here, I gonna test the *neighbor*, *neigh_modify*, and *processors* commands of *Lammps*. To this end, I choose the details of these commands in the following way:
1. processors Px Py Pz:

    1.1. Px, Py, and Pz are the # of processors in direction x, y, and z, respectively.
    
    1.2. For 2 and 4 cores, I use _processors 1 1 *_.
    
    1.3. For 8, 16, 32 cores, I use _processors 2 2 *_.
2. neigbor rskin bin:

    2.1 rskin is the extra distance beyond the rcutoff of the potential. I use WCA (purely repulsive Lennard-Jones potential) with $r_{cutoff}=2^{1/6}\sigma$ where the size (diameter) of an LJ bead $\sigma=a_m=1.0$ and $a_m$ is the monomers size size, so $r_{skin}=rskin*\sigma$.
    
    2.2 In my test, $rskin=0.2,0.3,0.4,$ and $0.5$.
 
3. neigh_modify delay every check page one:

    3.1 *delay* can be 0 or a multiple of every. Here, $delay=0,1,2,4,10,20$.
    
    3.2 *every* is set to $1,2,$ or $4$.
    
    3.3 *page* is set to $3000$, or $100000$.
    
    3.4 check is always *yes*.
    
    3.5. *one* is set to $300$, or $2000$.

4. I also test *recenter* off and on.

In [1]:
#%matplotlib notebook
%matplotlib inline
# Importing necessary packages:
import sys
import os
#import scipy.integrate as integrate
#import scipy.special as special
#from scipy import stats
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
#import math
import re
from glob import glob
import pandas as pd

In [2]:
path = os.getcwd()
print(path)
try:
    os.mkdir('results')
except OSError:
    print ("Creation of the directory %s failed" % path)
else:
    print ("Successfully created the directory %s " % path)

/Users/amirhsi/OneDrive - University of Waterloo/Thesis-PhD/Jupyter/log_file_pipeline
Creation of the directory /Users/amirhsi/OneDrive - University of Waterloo/Thesis-PhD/Jupyter/log_file_pipeline failed


In [3]:
files = glob('../log_nieghbor_test/*.log')

In [4]:
neighfile=open("neighbor_testing.txt", "w")
# neigh_modify delay NUM every NUM check YES/NO page NUM one NUM:
neighfile.write('filename,shortname,recenter,rskin,delay,every,check,page,one,temp_last_ts,total_eng_last_ts,press_last_ts,total_time(s),cores,timestep,atoms,ts_per_sec,')
# Section columns: min time, avg time, max time, %varavg, %total"
# Section rows: Pair, Bond, Neigh, Comm, Output, Modify, Other
neighfile.write('pair_avg(s),pair_pct,bond_avg(s),bond_pct,neigh_avg(s),neigh_pct,comm_avg(s),comm_pct,output_avg(s),output_pct,modify_avg(s),modify_pct,other_avg(s),other_pct,dangerous\n')
neighfile.close()

In [5]:
for file in files:
    if len(file.split('rskin')) == 1:
        words=file.split('cpu')
        filename = words[0]+'rskin0.2cpu'+words[-1]
        filename = filename.split('.log')
        filename = filename[0]
        filename = filename.split('/')[-1]
        shortname = filename.split('page')[0]+'rskin'+filename.split('rskin')[-1]
        rskin = filename.split('rskin')[-1].split('cpu')[0]
        recenter = filename.split('_')[-1]
    else:
        filename = file.split('.log')
        filename = filename[0]
        filename = filename.split('/')[-1]
        shortname = filename.split('page')[0]+'rskin'+filename.split('rskin')[-1]
        rskin = filename.split('rskin')[-1].split('cpu')[0]
        recenter = filename.split('_')[-1]
    #with open("neighbor_testing.txt", "a") as neigh_write:
    #print(filename)
    with open(file,'r') as log,\
    open("neighbor_testing.txt", "a") as neighfile:
    
        neighfile.write(filename)
        neighfile.write(",")
        neighfile.write(shortname)
        neighfile.write(",")
        neighfile.write(recenter)
        neighfile.write(",")
        neighfile.write(rskin)
        neighfile.write(",")
        
        line = log.readline()
        
        # The other of while loop are important
        #neigh_modify delay NUM every NUM check YES/NO page NUM one NUM:
        while not(line.startswith('neigh_modify')):
            line = log.readline()
            
        words = line.split()
        # picking the NUMs and Yes/No from neigh_modify command:
        for i in range(int(len(words)/2)): 
            neighfile.write(words[2*i+2])
            neighfile.write(",")
            
        if 'rskin' in file:    
            while not(line.startswith("  100000")):
                line = log.readline()
            words = line.split() # tep Temp E_pair E_mol TotEng Press
            neighfile.write(words[1])#temp_last_ts
            neighfile.write(",")
            neighfile.write(words[4])# total_energy_last_ts
            neighfile.write(",")
            neighfile.write(words[5])# press_last_ts
            neighfile.write(",")
            
        else:
            while not(line.startswith("   10000")):
                line = log.readline()
            words = line.split() # tep Temp E_pair E_mol TotEng Press
            neighfile.write(words[1])#temp_last_ts
            neighfile.write(",")
            neighfile.write(words[4])# total_energy_last_ts
            neighfile.write(",")
            neighfile.write(words[5])# press_last_ts
            neighfile.write(",")
            

        while not(line.startswith('Loop time')):
            line = log.readline()
            
        words = line.split()
        neighfile.write(words[3])#total time
        neighfile.write(",")
        neighfile.write(words[5])# # of cores
        neighfile.write(",")
        neighfile.write(words[8])# total timesteps
        neighfile.write(",")
        neighfile.write(words[11])# total atoms
        neighfile.write(",")
        
        while not(line.startswith('Performance:')):
            line = log.readline()
            
        words = line.split()
        neighfile.write(words[3])# timesteps per second
        neighfile.write(",")
       

        while not(line.startswith('Section')):
            line = log.readline()
        _ = log.readline()
        for i in range(6): # Section rows: Pair, Bond, Neigh, Comm, Output, Modify, Other
            # Section columns: min time, avg time, max time, %varavg, %total"
            line = log.readline()
            sect_min = line.split('|')[2].strip()
            neighfile.write(sect_min)
            neighfile.write(",")
            
            sect_pct = line.split()[-1] # Pair pct of total time
            neighfile.write(sect_pct)
            neighfile.write(",")
        
        line = log.readline()
        sect_min = line.split('|')[2].strip()
        neighfile.write(sect_min)
        neighfile.write(",")
        sect_pct = line.split()[-1] # Pair pct of total time
        neighfile.write(sect_pct)
        neighfile.write(",")
        
        while not(line.startswith('Dangerous')):
            line = log.readline()
        words = line.split()
        neighfile.write(str(int(words[-1]))) # # number of dangerous builds
        neighfile.write("\n")       

In [24]:
neigh_data = pd.read_csv("neighbor_testing.txt")
neigh_data['sum_pct']=neigh_data['neigh_pct']+neigh_data['comm_pct']
neigh_data['sum_ave(s)']=neigh_data['neigh_avg(s)']+neigh_data['comm_avg(s)']
cols_sort = ['cores','sum_pct','neigh_pct','comm_pct','ts_per_sec','atoms']
cols_ascending = [True,True,True,True,False,False]
neigh_data.sort_values(cols_sort,inplace=True,ascending=cols_ascending)
neigh_data.reset_index(inplace=True,drop=True)
cols = ['cores','sum_pct','neigh_pct','comm_pct','ts_per_sec','rskin','atoms','delay','every','page','one','total_time(s)','sum_ave(s)']

In [25]:
default_page = neigh_data.loc[(neigh_data.page!=3000) & (neigh_data.recenter == 'yes') & (neigh_data.atoms == 100080) & (neigh_data.dangerous == 1)]
default_page.reset_index(inplace=True,drop=True)
default_cpu2 = default_page.loc[default_page.cores == 2]
default_cpu4 = default_page.loc[default_page.cores == 4]
default_cpu8 = default_page.loc[default_page.cores == 8]
default_page2 = neigh_data.loc[(neigh_data.recenter == 'yes')]
default_cpu16 = default_page2.loc[default_page2.cores == 16]
default_cpu32 = default_page2.loc[default_page2.cores == 32]

In [29]:
neigh_data.loc[(neigh_data.cores == 32)]

Unnamed: 0,filename,shortname,recenter,rskin,delay,every,check,page,one,temp_last_ts,...,comm_pct,output_avg(s),output_pct,modify_avg(s),modify_pct,other_avg(s),other_pct,dangerous,sum_pct,sum_ave(s)
156,delay0every1page100000one2000rskin0.2cpu32rece...,delay0every1rskin0.2cpu32recenter_no,no,0.2,0,1,yes,100000,2000,0.1479,...,15.91,0.14756,0.79,2.3214,12.39,1.081,5.77,0,74.2,13.903
157,delay0every1page100000one2000rskin0.2cpu16rece...,delay0every1rskin0.2cpu16recenter_yes,yes,0.2,0,1,yes,100000,2000,0.147591,...,15.83,0.15894,0.72,3.5419,16.01,0.4918,2.22,1,75.03,16.5961
158,delay0every1page3000one300rskin0.2cpu16recente...,delay0every1rskin0.2cpu16recenter_no,no,0.2,0,1,yes,3000,300,0.1479,...,17.63,0.14428,0.74,2.3356,12.04,1.069,5.51,0,75.07,14.562
159,delay2every2page100000one2000rskin0.2cpu16rece...,delay2every2rskin0.2cpu16recenter_yes,yes,0.2,2,2,yes,100000,2000,0.147723,...,18.25,0.13164,0.73,2.9861,16.62,0.2006,1.12,1,75.14,13.5017
160,delay0every1page3000one300rskin0.2cpu32recente...,delay0every1rskin0.2cpu32recenter_yes,yes,0.2,0,1,yes,3000,300,0.147591,...,17.89,0.15075,0.7,3.436,15.9,0.4619,2.14,1,75.28,16.2719
161,delay2every2page100000one2000rskin0.2cpu32rece...,delay2every2rskin0.2cpu32recenter_no,no,0.2,2,2,yes,100000,2000,0.147729,...,22.17,0.12587,0.82,1.9521,12.69,0.6554,4.26,0,75.35,11.5947
162,delay0every1page100000one2000rskin0.2cpu16rece...,delay0every1rskin0.2cpu16recenter_no,no,0.2,0,1,yes,100000,2000,0.1479,...,17.65,0.14505,0.76,2.3288,12.23,0.9392,4.93,0,75.42,14.3587
163,delay0every1page3000one300rskin0.2cpu32recente...,delay0every1rskin0.2cpu32recenter_no,no,0.2,0,1,yes,3000,300,0.1479,...,16.95,0.13847,0.72,2.3232,12.15,0.9591,5.02,0,75.45,14.4241
164,delay0every1page100000one2000rskin0.2cpu32rece...,delay0every1rskin0.2cpu32recenter_yes,yes,0.2,0,1,yes,100000,2000,0.147591,...,18.86,0.12985,0.73,2.8527,16.1,0.2744,1.55,1,75.46,13.3678
165,delay4every2page100000one2000rskin0.2cpu32rece...,delay4every2rskin0.2cpu32recenter_yes,yes,0.2,4,2,yes,100000,2000,0.147648,...,15.9,0.14838,0.73,3.2519,16.04,0.3032,1.5,1,75.48,15.3


In [8]:
cols = ['cores','total_time(s)','sum_pct','neigh_pct','comm_pct','ts_per_sec','rskin','atoms','delay','every','temp_last_ts','total_eng_last_ts','press_last_ts','dangerous']
cols_sort =  ['ts_per_sec','sum_pct','neigh_pct','comm_pct','sum_ave(s)']
cols_ascending = [False,True,True,True,False]
default_cpu2.reset_index(inplace=True,drop=True)
default_cpu2.sort_values(cols_sort,ascending=cols_ascending)[cols][:10]

Unnamed: 0,cores,total_time(s),sum_pct,neigh_pct,comm_pct,ts_per_sec,rskin,atoms,delay,every,temp_last_ts,total_eng_last_ts,press_last_ts,dangerous
1,2,2113.74,64.12,63.23,0.89,47.309,0.5,100080,10,1,0.137544,-0.273035,24.569674,1
3,2,2411.78,64.56,61.99,2.57,41.463,0.4,100080,10,1,0.137544,-0.273035,24.569674,1
22,2,2796.04,65.84,64.82,1.02,35.765,0.2,100080,10,2,0.137648,-0.270139,24.45899,1
26,2,2812.38,65.93,64.49,1.44,35.557,0.4,100080,10,2,0.137648,-0.270139,24.45899,1
33,2,2824.5,66.27,65.24,1.03,35.405,0.2,100080,10,1,0.137544,-0.273035,24.569674,1
31,2,2834.29,66.11,65.04,1.07,35.282,0.4,100080,4,2,0.137177,-0.27213,24.392454,1
25,2,2835.92,65.86,64.8,1.06,35.262,0.3,100080,10,2,0.137648,-0.270139,24.45899,1
29,2,2846.44,66.03,64.67,1.36,35.132,0.2,100080,4,4,0.137177,-0.27213,24.392454,1
17,2,2857.65,65.57,64.06,1.51,34.994,0.2,100080,4,2,0.137177,-0.27213,24.392454,1
23,2,2860.05,65.84,64.82,1.02,34.964,0.3,100080,2,2,0.137377,-0.271202,24.372552,1


In [9]:
col = 'total_eng_last_ts'
default_page[col].sem()/default_page[col].mean()*100

-0.07227033143786842

In [10]:
# The column with the highest timestep_per_second on a 2 core machine
default_cpu2.iloc[default_cpu2.ts_per_sec.idxmax()]
# The difference in neigh_pct of this column with the column with lowest neigh_pct is less than 5% 
# however its comm_pct is half of that with the lowest neigh_pct.

filename             delay10every1page100000one2000rskin0.5cpu2rece...
shortname                        delay10every1rskin0.5cpu2recenter_yes
recenter                                                           yes
rskin                                                              0.5
delay                                                               10
every                                                                1
check                                                              yes
page                                                            100000
one                                                               2000
temp_last_ts                                                  0.137544
total_eng_last_ts                                            -0.273035
press_last_ts                                                  24.5697
total_time(s)                                                  2113.74
cores                                                                2
timest

In [22]:
# The above analyze is for a system with 100080 atoms run for 100000 timesteps with timestep/sec=47.309
# what about a system with 10000 atoms that run for 7*10^7 timesteps?
# To answer this question we assume all the algorithm are of order O(N) where N is the number of atoms.
data = default_cpu2
col = 'ts_per_sec'
idx = data[col].idxmax()
idx = 7
test_natoms = data.loc[idx,'atoms']
test_nsteps = data.loc[idx,'timestep']
test_ttotal = data.loc[idx,'total_time(s)']
test_cores = data.loc[idx,'cores']
#print(test_nsteps/test_ttotal)
natoms = 2e3
nsteps = 5e7
ttotal = (natoms/test_natoms)*(nsteps/test_nsteps)*test_ttotal # seconds
ttotal_hr = ttotal/3600 # hours 
print("The estimated simulation time for a system with {} atoms in {} timesteps on {}-core machine is {} seconds or {} hours.".format(natoms,nsteps,test_cores,ttotal,ttotal_hr))

The estimated simulation time for a system with 2000.0 atoms in 50000000.0 timesteps on 2-core machine is 24741.406874500397 seconds or 6.872613020694555 hours.


In [34]:
default_cpu4.reset_index(inplace=True,drop=True)
default_cpu4.sort_values(cols_sort,ascending=cols_ascending)[cols]

Unnamed: 0,cores,total_time(s),sum_pct,neigh_pct,comm_pct,ts_per_sec,rskin,atoms,delay,every,temp_last_ts,total_eng_last_ts,press_last_ts,dangerous
1,4,1288.18,66.34,63.39,2.95,77.629,0.3,100080,0,1,0.137163,-0.272891,24.583867,1
21,4,1514.01,68.01,65.6,2.41,66.05,0.5,100080,4,2,0.136924,-0.276594,24.788856,1
27,4,1516.12,68.24,65.95,2.29,65.958,0.5,100080,10,2,0.137044,-0.271941,24.668963,1
31,4,1527.36,68.41,66.08,2.33,65.473,0.4,100080,10,2,0.137044,-0.271941,24.668963,1
17,4,1530.52,67.7,65.4,2.3,65.337,0.2,100080,2,2,0.137234,-0.271126,24.478831,1
13,4,1535.37,67.61,65.35,2.26,65.131,0.3,100080,2,2,0.137234,-0.271126,24.478831,1
34,4,1537.1,68.68,66.35,2.33,65.058,0.5,100080,0,4,0.136924,-0.276594,24.788856,1
4,4,1541.16,67.12,64.93,2.19,64.886,0.4,100080,0,4,0.136924,-0.276594,24.788856,1
18,4,1546.28,67.79,65.11,2.68,64.671,0.5,100080,4,4,0.136924,-0.276594,24.788856,1
6,4,1548.32,67.16,64.73,2.43,64.586,0.4,100080,4,2,0.136924,-0.276594,24.788856,1


In [9]:
# The column with the highest timestep_per_second on a 4-core machine
default_cpu4.iloc[default_cpu4.ts_per_sec.idxmax()]
# The difference in neigh_pct of this column with the column with lowest neigh_pct is less than 5% 
# however its comm_pct is half of that with the lowest neigh_pct.

filename             delay0every1page100000one2000rskin0.3cpu4recen...
shortname                         delay0every1rskin0.3cpu4recenter_yes
recenter                                                           yes
rskin                                                              0.3
delay                                                                0
every                                                                1
check                                                              yes
page                                                            100000
one                                                               2000
temp_last_ts                                                  0.137163
total_eng_last_ts                                            -0.272891
press_last_ts                                                  24.5839
total_time(s)                                                  1288.18
cores                                                                4
timest

In [24]:
# The above analyze is for a system with 100080 atoms run for 100000 timesteps with timestep/sec=47.309
# what about a system with 10000 atoms that run for 7*10^7 timesteps?
# To answer this question we assume all the algorithm are of order O(N) where N is the number of atoms.
data = default_cpu4
col = 'ts_per_sec'
idx = data[col].idxmax()
idx = 6
test_natoms = data.loc[idx,'atoms']
test_nsteps = data.loc[idx,'timestep']
test_ttotal = data.loc[idx,'total_time(s)']
test_cores = data.loc[idx,'cores']
#print(test_nsteps/test_ttotal)
natoms = 2.5e4
nsteps = 5e7
ttotal = (natoms/test_natoms)*(nsteps/test_nsteps)*test_ttotal
ttotal_hr = ttotal/3600
print("The estimated simulation time for a system with {} atoms in {} timesteps on {}-core machine is {} seconds or {} hours.".format(natoms,nsteps,test_cores,ttotal,ttotal_hr))

The estimated simulation time for a system with 25000.0 atoms in 50000000.0 timesteps on 4-core machine is 169585.5815347722 seconds or 47.10710598188117 hours.


In [11]:
default_cpu8.reset_index(inplace=True,drop=True)
default_cpu8.sort_values(cols_sort,ascending=cols_ascending)[cols]

Unnamed: 0,cores,total_time(s),sum_pct,neigh_pct,comm_pct,ts_per_sec,rskin,atoms,delay,every,temp_last_ts,total_eng_last_ts,press_last_ts,dangerous
1,8,746.242,68.13,58.56,9.57,134.005,0.3,100080,1,1,0.137424,-0.270585,24.542303,1
3,8,800.134,69.63,56.62,13.01,124.979,0.5,100080,2,2,0.136991,-0.274291,24.518592,1
5,8,818.264,70.44,59.83,10.61,122.21,0.5,100080,10,1,0.137046,-0.272734,24.516004,1
4,8,838.987,70.32,57.4,12.92,119.191,0.3,100080,10,2,0.137347,-0.273521,24.600945,1
0,8,850.876,67.39,57.42,9.97,117.526,0.3,100080,2,2,0.136991,-0.274291,24.518592,1
2,8,858.272,69.0,56.94,12.06,116.513,0.4,100080,2,2,0.136991,-0.274291,24.518592,1
26,8,1037.65,73.7,65.79,7.91,96.372,0.4,100080,4,4,0.137535,-0.26924,24.322144,1
9,8,1046.95,72.53,64.1,8.43,95.515,0.4,100080,4,2,0.137535,-0.26924,24.322144,1
12,8,1051.85,72.93,63.78,9.15,95.071,0.2,100080,10,2,0.137347,-0.273521,24.600945,1
14,8,1053.35,73.16,64.69,8.47,94.935,0.3,100080,4,2,0.137535,-0.26924,24.322144,1


In [12]:
# The column with the highest timestep_per_second on a 2 core machine
default_cpu8.iloc[default_cpu8.ts_per_sec.idxmax()]
# The difference in neigh_pct of this column with the column with lowest neigh_pct is less than 5% 
# however its comm_pct is half of that with the lowest neigh_pct.

filename             delay1every1page100000one2000rskin0.3cpu8recen...
shortname                         delay1every1rskin0.3cpu8recenter_yes
recenter                                                           yes
rskin                                                              0.3
delay                                                                1
every                                                                1
check                                                              yes
page                                                            100000
one                                                               2000
temp_last_ts                                                  0.137424
total_eng_last_ts                                            -0.270585
press_last_ts                                                  24.5423
total_time(s)                                                  746.242
cores                                                                8
timest

In [16]:
# The above analyze is for a system with 100080 atoms run for 100000 timesteps with timestep/sec=47.309
# what about a system with 10000 atoms that run for 7*10^7 timesteps?
# To answer this question we assume all the algorithm are of order O(N) where N is the number of atoms.
data = default_cpu8
col = 'ts_per_sec'
idx = data[col].idxmax()
#idx = 1
test_natoms = data.loc[idx,'atoms']
test_nsteps = data.loc[idx,'timestep']
test_ttotal = data.loc[idx,'total_time(s)']
test_cores = data.loc[idx,'cores']
#print(test_nsteps/test_ttotal)
natoms = 1e5
nsteps = 7e7
ttotal = (natoms/test_natoms)*(nsteps/test_nsteps)*test_ttotal
ttotal_hr = ttotal/3600
print("The estimated simulation time for a system with {} atoms in {} timesteps on {}-core machine is {} seconds or {} hours.".format(natoms,nsteps,test_cores,ttotal,ttotal_hr))

The estimated simulation time for a system with 100000.0 atoms in 70000000.0 timesteps on 8-core machine is 521951.83852917666 seconds or 144.98662181366018 hours.


In [26]:
default_cpu16

Unnamed: 0,filename,shortname,recenter,rskin,delay,every,check,page,one,temp_last_ts,...,comm_pct,output_avg(s),output_pct,modify_avg(s),modify_pct,other_avg(s),other_pct,dangerous,sum_pct,sum_ave(s)


In [34]:
cols = ['cores','total_time(s)','sum_pct','neigh_pct','comm_pct','ts_per_sec','rskin','atoms','delay','every','temp_last_ts','total_eng_last_ts','press_last_ts','dangerous']
cols_sort =  ['ts_per_sec','sum_pct','neigh_pct','comm_pct','sum_ave(s)']
cols_ascending = [False,True,True,True,False]
default_cpu32.reset_index(inplace=True,drop=True)
default_cpu32.sort_values(cols_sort,ascending=cols_ascending)[cols]

Unnamed: 0,cores,total_time(s),sum_pct,neigh_pct,comm_pct,ts_per_sec,rskin,atoms,delay,every,temp_last_ts,total_eng_last_ts,press_last_ts,dangerous
3,32,17.7145,75.46,56.6,18.86,564.51,0.2,78704,0,1,0.147591,-0.079501,14.135611,1
1,32,17.9674,75.14,56.89,18.25,556.563,0.2,78704,2,2,0.147723,-0.078737,14.265818,1
11,32,19.9218,77.75,64.22,13.53,501.962,0.2,78704,10,1,0.147569,-0.079641,14.400009,850
5,32,19.9227,75.72,60.24,15.48,501.94,0.2,78704,10,2,0.147565,-0.079353,14.248593,823
4,32,20.2692,75.48,59.58,15.9,493.359,0.2,78704,4,2,0.147648,-0.074808,14.043154,1
6,32,20.6504,76.05,58.32,17.73,484.253,0.2,78704,10,2,0.147565,-0.079353,14.248593,823
7,32,20.7442,76.21,57.58,18.63,482.062,0.2,78704,4,2,0.147648,-0.074808,14.043154,1
9,32,20.7562,76.6,59.87,16.73,481.784,0.2,78704,0,1,0.147591,-0.079501,14.135611,1
2,32,21.6159,75.28,57.39,17.89,462.622,0.2,78704,0,1,0.147591,-0.079501,14.135611,1
10,32,21.6818,77.23,62.59,14.64,461.216,0.2,78704,10,1,0.147569,-0.079641,14.400009,850


In [31]:
# The above analyze is for a system with 100080 atoms run for 100000 timesteps with timestep/sec=47.309
# what about a system with 10000 atoms that run for 7*10^7 timesteps?
# To answer this question we assume all the algorithm are of order O(N) where N is the number of atoms.
data = default_cpu32
col = 'ts_per_sec'
idx = data[col].idxmax()
#idx = 1
test_natoms = data.loc[idx,'atoms']
test_nsteps = data.loc[idx,'timestep']
test_ttotal = data.loc[idx,'total_time(s)']
test_cores = data.loc[idx,'cores']
#print(test_nsteps/test_ttotal)
natoms = 1e5
nsteps = 7e7
ttotal = (natoms/test_natoms)*(nsteps/test_nsteps)*test_ttotal
ttotal_hr = ttotal/3600
print("The estimated simulation time for a system with {} atoms in {} timesteps on {}-core machine is {} seconds or {} hours.".format(natoms,nsteps,test_cores,ttotal,ttotal_hr))

The estimated simulation time for a system with 100000.0 atoms in 70000000.0 timesteps on 32-core machine is 157554.25391339703 seconds or 43.76507053149918 hours.


In [None]:
col1='neigh_avg(s)'
col2='comm_avg(s)'
col3='sum_ave(s)'
col4='ts_per_sec'
default_cpu2_bar = default_cpu2[['shortname',col1,col2,col3,col4]]
default_cpu4_bar = default_cpu4[['shortname',col1,col2,col3,col4]]
default_cpu8_bar = default_cpu8[['shortname',col1,col2,col3,col4]]
default_cpu16_bar = default_cpu16[['shortname',col1,col2,col3,col4]]
default_cpu32_bar = default_cpu32[['shortname',col1,col2,col3,col4]]
default_page_bar = default_page[['shortname',col1,col2,col3,col4]]
default_cpu2_bar.sort_values([col3,col2,col1],inplace=True,ascending=False)
default_cpu4_bar.sort_values([col3,col2,col1],inplace=True,ascending=False)
default_cpu8_bar.sort_values([col3,col2,col1],inplace=True,ascending=False)
default_cpu16_bar.sort_values([col3,col2,col1],inplace=True,ascending=False)
default_cpu32_bar.sort_values([col3,col2,col1],inplace=True,ascending=False)
default_page_bar.sort_values([col3,col2,col1],inplace=True,ascending=False)

default_cpu2_bar = default_cpu2_bar[['shortname',col1,col2]]
default_cpu4_bar = default_cpu4_bar[['shortname',col1,col2]]
default_cpu8_bar = default_cpu8_bar[['shortname',col1,col2]]
default_cpu16_bar = default_cpu16_bar[['shortname',col1,col2]]
default_cpu32_bar = default_cpu32_bar[['shortname',col1,col2]]
default_page_bar = default_page_bar[['shortname',col1,col2]]

default_cpu2_bar.set_index('shortname',drop=True,inplace=True)
default_cpu4_bar.set_index('shortname',drop=True,inplace=True)
default_cpu8_bar.set_index('shortname',drop=True,inplace=True)
default_cpu16_bar.set_index('shortname',drop=True,inplace=True)
default_cpu32_bar.set_index('shortname',drop=True,inplace=True)
default_page_bar.set_index('shortname',drop=True,inplace=True)

In [None]:
default_cpu2[['filename','ts_per_sec']]

In [None]:
#fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(16,9))
ax = default_cpu2_bar.plot.barh(stacked=True,figsize=(20, 30))
ax.text(1700,30,"For rskin=0.2, # of timesteps = 10000")
name = 'avg_min'
ax.set_xlabel('Time (sec)')
plt.savefig('neighbor_cpu2_'+name+'.pdf',dpi=300,format='pdf',bbox_inches='tight')