# Check raw VTK files

In [1]:
import glob

# Original VTK file information

## Data locations

In [2]:
ls /n/scratchlfs/ac290r/p2_re5_pe1

bgkflag.1.ios  bgkflag.hdr     [0m[01;34mDIRDATA_Drug[0m/  [01;32mrun2.py[0m*
bgkflag.2.ios  bgkflag.xyz     err.txt        slurm-7875866.out
bgkflag.dat    [01;34mDIRDATA_Blood[0m/  out.txt        submit.sh


In [3]:
ls /n/scratchlfs/ac290r/p2_re5_pe10

bgkflag.1.ios  bgkflag.hdr     [0m[01;34mDIRDATA_Drug[0m/  [01;32mrun2.py[0m*
bgkflag.2.ios  bgkflag.xyz     err.txt        slurm-7875872.out
bgkflag.dat    [01;34mDIRDATA_Blood[0m/  out.txt        submit.sh


In [4]:
ls /n/scratchlfs/ac290r/p2_re10_pe1

bgkflag.1.ios  bgkflag.hdr     [0m[01;34mDIRDATA_Drug[0m/  out.txt            submit.sh
bgkflag.2.ios  bgkflag.xyz     err.txt        [01;32mrun2.py[0m*
bgkflag.dat    [01;34mDIRDATA_Blood[0m/  [01;34mMAGIC[0m/         slurm-7885067.out


In [5]:
ls /n/scratchlfs/ac290r/p2_re10_pe10

bgkflag.1.ios  bgkflag.hdr     [0m[01;34mDIRDATA_Drug[0m/  [01;32mrun2.py[0m*
bgkflag.2.ios  bgkflag.xyz     err.txt        slurm-7983640.out
bgkflag.dat    [01;34mDIRDATA_Blood[0m/  out.txt        submit.sh


## Raw data size

### One time-step data size

In [6]:
!du -sh /n/scratchlfs/ac290r/p2_re5_pe1/DIRDATA_Blood/VTK/T0000001000 

124M	/n/scratchlfs/ac290r/p2_re5_pe1/DIRDATA_Blood/VTK/T0000001000


In [7]:
!du -sh /n/scratchlfs/ac290r/p2_re5_pe1/DIRDATA_Drug/VTK/T0000001000 

100M	/n/scratchlfs/ac290r/p2_re5_pe1/DIRDATA_Drug/VTK/T0000001000


In [8]:
!du -sh /n/scratchlfs/ac290r/p2_re10_pe1/DIRDATA_Blood/VTK/T0000001000

973M	/n/scratchlfs/ac290r/p2_re10_pe1/DIRDATA_Blood/VTK/T0000001000


In [9]:
!du -sh /n/scratchlfs/ac290r/p2_re10_pe1/DIRDATA_Drug/VTK/T0000001000

780M	/n/scratchlfs/ac290r/p2_re10_pe1/DIRDATA_Drug/VTK/T0000001000


`pe10` has the same size as `pe1`

### Number of time steps

In [10]:
def sglob(pathname):
    '''Sorted glob'''
    return sorted(glob.glob(pathname))

In [11]:
filelist_drug_re5pe1 = sglob('/n/scratchlfs/ac290r/p2_re5_pe1/DIRDATA_Drug/VTK/*.pvtu')
filelist_drug_re5pe10 = sglob('/n/scratchlfs/ac290r/p2_re5_pe10/DIRDATA_Drug/VTK/*.pvtu')
filelist_drug_re10pe1 = sglob('/n/scratchlfs/ac290r/p2_re10_pe1/DIRDATA_Drug/VTK/*.pvtu')
filelist_drug_re10pe10 = sglob('/n/scratchlfs/ac290r/p2_re10_pe10/DIRDATA_Drug/VTK/*.pvtu')

In [12]:
filelist_bld_re5pe1 = sglob('/n/scratchlfs/ac290r/p2_re5_pe1/DIRDATA_Blood/VTK/*.pvtu')
filelist_bld_re5pe10 = sglob('/n/scratchlfs/ac290r/p2_re5_pe10/DIRDATA_Blood/VTK/*.pvtu')
filelist_bld_re10pe1 = sglob('/n/scratchlfs/ac290r/p2_re10_pe1/DIRDATA_Blood/VTK/*.pvtu')
filelist_bld_re10pe10 = sglob('/n/scratchlfs/ac290r/p2_re10_pe10/DIRDATA_Blood/VTK/*.pvtu')

In [13]:
len(filelist_drug_re5pe1), len(filelist_drug_re5pe10), len(filelist_drug_re10pe1), len(filelist_drug_re10pe10)

(4000, 4000, 1266, 1372)

The total size would be 
- `224MB * 4000 = 900 GB` for each `re5` case
- `1753MB * 4000 = 7000 GB` for each `re10` case (if all finished)

### Naming convention

In [14]:
filelist_drug_re5pe1[:3], filelist_drug_re5pe1[-3:]

(['/n/scratchlfs/ac290r/p2_re5_pe1/DIRDATA_Drug/VTK/T0000001000.pvtu',
  '/n/scratchlfs/ac290r/p2_re5_pe1/DIRDATA_Drug/VTK/T0000002000.pvtu',
  '/n/scratchlfs/ac290r/p2_re5_pe1/DIRDATA_Drug/VTK/T0000003000.pvtu'],
 ['/n/scratchlfs/ac290r/p2_re5_pe1/DIRDATA_Drug/VTK/T0003998000.pvtu',
  '/n/scratchlfs/ac290r/p2_re5_pe1/DIRDATA_Drug/VTK/T0003999000.pvtu',
  '/n/scratchlfs/ac290r/p2_re5_pe1/DIRDATA_Drug/VTK/T0004000000.pvtu'])

In [15]:
filelist_drug_re5pe1[-1][-16:-5]  # time stamp

'T0004000000'

In [16]:
filelist_drug_re5pe1[-1][-13:-5]  # truncated time stamp

'04000000'

# Prepare empty directory to host processed & compressed NetCDF files

In [17]:
%%bash
# make empty directory for hosting data
TOPDIR=/n/scratchlfs/ac290r/p2_data_postprocess/

mkdir -p $TOPDIR

for CASE in re10_pe1 re10_pe10 re5_pe1 re5_pe10
do
    # echo $TOPDIR$CASE/blood
    mkdir -p $TOPDIR$CASE/blood
    mkdir -p $TOPDIR$CASE/drug
done

echo 'Processed & compressed data for Project Group 2' > $TOPDIR/README

In [18]:
ls /n/scratchlfs/ac290r/p2_data_postprocess/  # destination directory

[0m[01;34mre10_pe1[0m/  [01;34mre10_pe10[0m/  [01;34mre5_pe1[0m/  [01;34mre5_pe10[0m/  [01;34mre5_pe1_backup[0m/  README


In [19]:
!cat /n/scratchlfs/ac290r/p2_data_postprocess/README

Processed & compressed data for Project Group 2
