# Strong Scaling 

In [1]:
import time
import unittest
from test.test_meteorology import Test_relhum

#import cupy as cp
import dask
import numpy as np
import pandas as pd
import src.geocat.comp.meteorology as geo
import xarray as xr
csvpath = "relhum_strong_scaling.csv"
import math 
# import dask.array as da
from dask_jobqueue import PBSCluster
from dask.distributed import Client

  from distributed.utils import tmpfile


In [2]:
Routine = "Relhum"
ArraySize = 10**7
p_def = xr.DataArray(np.random.uniform(low=2000,high=100800,size=ArraySize)).chunk(10**5)
t_def = xr.DataArray(np.random.uniform(low=194.65,high=302.45,size=ArraySize)).chunk(10**5)
q_def = xr.DataArray(np.random.uniform(low=0,high=0.02038,size=ArraySize)).chunk(10**5)

# CPU

## 1 CPU node, 36 cores, 200 GB mem

In [3]:
cores_per_nodes = 36
mem_per_node = 200
i = 1
ncpus = i*cores_per_nodes
ngpus = i
nodes = i
mem = i*mem_per_node
resourceCPU = 'select={nodes}:ncpus=36:mem=200gb'.format(nodes = nodes)
clusterCPU = PBSCluster(memory='{mem} GB'.format(mem = mem),
                        processes=ncpus,
                        cores=ncpus,
                        queue='casper',
                        walltime='02:00:00',
                        resource_spec=resourceCPU)
print(clusterCPU.job_script())
clusterCPU.scale(1)
client = Client(clusterCPU)

#!/usr/bin/env bash

#PBS -N dask-worker
#PBS -q casper
#PBS -A NTDD0005
#PBS -l select=1:ncpus=36:mem=200gb
#PBS -l walltime=02:00:00

/glade/work/hkashgar/conda-envs/geocat/bin/python -m distributed.cli.dask_worker tcp://10.12.205.43:40625 --nthreads 1 --nprocs 36 --memory-limit 5.17GiB --name dummy-name --nanny --death-timeout 60 --interface ib0 --protocol tcp://



In [4]:
client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/hkashgar/proxy/8787/status,

0,1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/hkashgar/proxy/8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.12.205.43:40625,Workers: 0
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/hkashgar/proxy/8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


In [5]:
repsize = 10
repeat = np.zeros([repsize])
allData = pd.DataFrame()
for rep in range(repsize):
    numpy_res = geo.relhum(t_def, q_def, p_def,False).compute()
    time1 = time.time()
    numpy_res = geo.relhum(t_def, q_def, p_def,False).compute()
    time2 = time.time()
    repeat[rep] = time2-time1
    #numpy_results.append(numpy_res)
data = {'Routine': np.repeat(Routine, repsize),
        'Input':"Xarray with Dask array input",
        'Approach': np.repeat(np.__name__ , repsize),
        'ArraySize': np.repeat(ArraySize , repsize),
        'nodes': nodes,
        'iteration' : np.arange(1,repsize+1),
        'Runtime(s)': repeat}
allData = pd.concat([allData,pd.DataFrame(data)], ignore_index=True)
try:
    previous = pd.read_csv(csvpath)
    previous = pd.concat([previous,allData])
except FileNotFoundError:
    previous = allData
previous.to_csv(csvpath, index=False)

In [6]:
allData

Unnamed: 0,Routine,Input,Approach,ArraySize,nodes,iteration,Runtime(s)
0,Relhum,Xarray with Dask array input,numpy,10000000,1,1,1.049283
1,Relhum,Xarray with Dask array input,numpy,10000000,1,2,0.856433
2,Relhum,Xarray with Dask array input,numpy,10000000,1,3,0.885935
3,Relhum,Xarray with Dask array input,numpy,10000000,1,4,0.875385
4,Relhum,Xarray with Dask array input,numpy,10000000,1,5,0.850934
5,Relhum,Xarray with Dask array input,numpy,10000000,1,6,0.910931
6,Relhum,Xarray with Dask array input,numpy,10000000,1,7,0.866241
7,Relhum,Xarray with Dask array input,numpy,10000000,1,8,0.837108
8,Relhum,Xarray with Dask array input,numpy,10000000,1,9,0.494458
9,Relhum,Xarray with Dask array input,numpy,10000000,1,10,0.838911


In [7]:
clusterCPU.close()

## 2 CPU node, 72 cores, 400 GB mem

In [8]:
cores_per_nodes = 36
mem_per_node = 200
i = 2
ncpus = i*cores_per_nodes
ngpus = i
nodes = i
mem = i*mem_per_node
resourceCPU = 'select={nodes}:ncpus=36:mem=200gb'.format(nodes = nodes)
clusterCPU = PBSCluster(memory='{mem} GB'.format(mem = mem),
                        processes=ncpus,
                        cores=ncpus,
                        queue='casper',
                        walltime='02:00:00',
                        resource_spec=resourceCPU)
print(clusterCPU.job_script())
clusterCPU.scale(1)
client = Client(clusterCPU)

#!/usr/bin/env bash

#PBS -N dask-worker
#PBS -q casper
#PBS -A NTDD0005
#PBS -l select=2:ncpus=36:mem=200gb
#PBS -l walltime=02:00:00

/glade/work/hkashgar/conda-envs/geocat/bin/python -m distributed.cli.dask_worker tcp://10.12.205.43:35576 --nthreads 1 --nprocs 72 --memory-limit 5.17GiB --name dummy-name --nanny --death-timeout 60 --interface ib0 --protocol tcp://



In [9]:
client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/hkashgar/proxy/8787/status,

0,1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/hkashgar/proxy/8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.12.205.43:35576,Workers: 0
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/hkashgar/proxy/8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


In [10]:
repsize = 10
repeat = np.zeros([repsize])
allData = pd.DataFrame()
for rep in range(repsize):
    numpy_res = geo.relhum(t_def, q_def, p_def,False).compute()
    time1 = time.time()
    numpy_res = geo.relhum(t_def, q_def, p_def,False).compute()
    time2 = time.time()
    repeat[rep] = time2-time1
    #numpy_results.append(numpy_res)
data = {'Routine': np.repeat(Routine, repsize),
        'Input':"Xarray with Dask array input",
        'Approach': np.repeat(np.__name__ , repsize),
        'ArraySize': np.repeat(ArraySize , repsize),
        'nodes': nodes,
        'iteration' : np.arange(1,repsize+1),
        'Runtime(s)': repeat}
allData = pd.concat([allData,pd.DataFrame(data)], ignore_index=True)
try:
    previous = pd.read_csv(csvpath)
    previous = pd.concat([previous,allData])
except FileNotFoundError:
    previous = allData
previous.to_csv(csvpath, index=False)

2022-07-19 14:00:35,617 - distributed.client - ERROR - Failed to reconnect to scheduler after 30.00 seconds, closing client


In [11]:
allData

Unnamed: 0,Routine,Input,Approach,ArraySize,nodes,iteration,Runtime(s)
0,Relhum,Xarray with Dask array input,numpy,10000000,2,1,1.24916
1,Relhum,Xarray with Dask array input,numpy,10000000,2,2,2.071994
2,Relhum,Xarray with Dask array input,numpy,10000000,2,3,1.602484
3,Relhum,Xarray with Dask array input,numpy,10000000,2,4,0.920929
4,Relhum,Xarray with Dask array input,numpy,10000000,2,5,0.949291
5,Relhum,Xarray with Dask array input,numpy,10000000,2,6,1.396514
6,Relhum,Xarray with Dask array input,numpy,10000000,2,7,1.266397
7,Relhum,Xarray with Dask array input,numpy,10000000,2,8,1.32554
8,Relhum,Xarray with Dask array input,numpy,10000000,2,9,1.516688
9,Relhum,Xarray with Dask array input,numpy,10000000,2,10,0.907008


In [12]:
clusterCPU.close()

## 3 CPU node, 108 cores, 600 GB mem

In [13]:
cores_per_nodes = 36
mem_per_node = 200
i = 3
ncpus = i*cores_per_nodes
ngpus = i
nodes = i
mem = i*mem_per_node
resourceCPU = 'select={nodes}:ncpus=36:mem=200gb'.format(nodes = nodes)
clusterCPU = PBSCluster(memory='{mem} GB'.format(mem = mem),
                        processes=ncpus,
                        cores=ncpus,
                        queue='casper',
                        walltime='02:00:00',
                        resource_spec=resourceCPU)
print(clusterCPU.job_script())
clusterCPU.scale(1)
client = Client(clusterCPU)

#!/usr/bin/env bash

#PBS -N dask-worker
#PBS -q casper
#PBS -A NTDD0005
#PBS -l select=3:ncpus=36:mem=200gb
#PBS -l walltime=02:00:00

/glade/work/hkashgar/conda-envs/geocat/bin/python -m distributed.cli.dask_worker tcp://10.12.205.43:42496 --nthreads 1 --nprocs 108 --memory-limit 5.17GiB --name dummy-name --nanny --death-timeout 60 --interface ib0 --protocol tcp://



In [14]:
client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/hkashgar/proxy/8787/status,

0,1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/hkashgar/proxy/8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.12.205.43:42496,Workers: 0
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/hkashgar/proxy/8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


In [15]:
repsize = 10
repeat = np.zeros([repsize])
allData = pd.DataFrame()
for rep in range(repsize):
    numpy_res = geo.relhum(t_def, q_def, p_def,False).compute()
    time1 = time.time()
    numpy_res = geo.relhum(t_def, q_def, p_def,False).compute()
    time2 = time.time()
    repeat[rep] = time2-time1
    #numpy_results.append(numpy_res)
data = {'Routine': np.repeat(Routine, repsize),
        'Input':"Xarray with Dask array input",
        'Approach': np.repeat(np.__name__ , repsize),
        'ArraySize': np.repeat(ArraySize , repsize),
        'nodes': nodes,
        'iteration' : np.arange(1,repsize+1),
        'Runtime(s)': repeat}
allData = pd.concat([allData,pd.DataFrame(data)], ignore_index=True)
try:
    previous = pd.read_csv(csvpath)
    previous = pd.concat([previous,allData])
except FileNotFoundError:
    previous = allData
previous.to_csv(csvpath, index=False)

2022-07-19 14:01:08,945 - distributed.client - ERROR - Failed to reconnect to scheduler after 30.00 seconds, closing client


In [16]:
allData

Unnamed: 0,Routine,Input,Approach,ArraySize,nodes,iteration,Runtime(s)
0,Relhum,Xarray with Dask array input,numpy,10000000,3,1,2.267573
1,Relhum,Xarray with Dask array input,numpy,10000000,3,2,1.170421
2,Relhum,Xarray with Dask array input,numpy,10000000,3,3,0.991273
3,Relhum,Xarray with Dask array input,numpy,10000000,3,4,1.021972
4,Relhum,Xarray with Dask array input,numpy,10000000,3,5,0.958401
5,Relhum,Xarray with Dask array input,numpy,10000000,3,6,0.987716
6,Relhum,Xarray with Dask array input,numpy,10000000,3,7,1.013
7,Relhum,Xarray with Dask array input,numpy,10000000,3,8,1.013932
8,Relhum,Xarray with Dask array input,numpy,10000000,3,9,1.007303
9,Relhum,Xarray with Dask array input,numpy,10000000,3,10,0.948582


In [17]:
clusterCPU.close()

## 4 CPU node, 144 cores, 800 GB mem

In [20]:
cores_per_nodes = 36
mem_per_node = 100
i = 4
ncpus = i*cores_per_nodes
ngpus = i
nodes = i
mem = i*mem_per_node
resourceCPU = 'select={nodes}:ncpus=36:mem=100gb'.format(nodes = 1)
clusterCPU = PBSCluster(memory='{mem} GB'.format(mem = mem),
                        processes=ncpus,
                        cores=ncpus,
                        queue='casper',
                        walltime='02:00:00',
                        resource_spec=resourceCPU)
print(clusterCPU.job_script())
clusterCPU.scale(4)
client = Client(clusterCPU)

#!/usr/bin/env bash

#PBS -N dask-worker
#PBS -q casper
#PBS -A NTDD0005
#PBS -l select=1:ncpus=36:mem=100gb
#PBS -l walltime=02:00:00

/glade/work/hkashgar/conda-envs/geocat/bin/python -m distributed.cli.dask_worker tcp://10.12.205.43:42557 --nthreads 1 --nprocs 144 --memory-limit 2.59GiB --name dummy-name --nanny --death-timeout 60 --interface ib0 --protocol tcp://



Perhaps you already have a cluster running?
Hosting the HTTP server on port 41044 instead


In [21]:
client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/hkashgar/proxy/41044/status,

0,1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/hkashgar/proxy/41044/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.12.205.43:42557,Workers: 0
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/hkashgar/proxy/41044/status,Total threads: 0
Started: Just now,Total memory: 0 B


In [None]:
repsize = 10
repeat = np.zeros([repsize])
allData = pd.DataFrame()
for rep in range(repsize):
    numpy_res = geo.relhum(t_def, q_def, p_def,False).compute()
    time1 = time.time()
    numpy_res = geo.relhum(t_def, q_def, p_def,False).compute()
    time2 = time.time()
    repeat[rep] = time2-time1
    #numpy_results.append(numpy_res)
data = {'Routine': np.repeat(Routine, repsize),
        'Input':"Xarray with Dask array input",
        'Approach': np.repeat(np.__name__ , repsize),
        'ArraySize': np.repeat(ArraySize , repsize),
        'nodes': nodes,
        'iteration' : np.arange(1,repsize+1),
        'Runtime(s)': repeat}
allData = pd.concat([allData,pd.DataFrame(data)], ignore_index=True)
try:
    previous = pd.read_csv(csvpath)
    previous = pd.concat([previous,allData])
except FileNotFoundError:
    previous = allData
previous.to_csv(csvpath, index=False)

In [None]:
allData

In [None]:
clusterCPU.close()

# GPU

## 1 GPU, 1 CPU, 200 GB mem

In [None]:
i = 1
ngpus = i
nodes = i
mem = i*mem_per_node
resouceGPU = 'select={nodes}:ncpus=1:ngpus=1:mem=200gb'.format(nodes = nodes)
clusterCUDA = PBSCluster(memory='{mem} GB'.format(mem = mem),
                         processes=1*i,
                         cores=1*i,
                         queue='casper',
                         walltime='02:00:00',
                         resource_spec=resouceGPU)
print(clusterCUDA.job_script())
clusterCUDA.scale(1)
client = Client(clusterCUDA)

In [None]:
client

In [None]:
allData = pd.DataFrame()
repsize = 10
repeat = np.zeros([repsize])
for rep in range(0,repsize):
    #create different sizes of arrays
    cupy_res = geo.relhum(t_def, q_def, p_def,True).compute()
    cp.cuda.runtime.deviceSynchronize()
    time1 = time.time()
    cupy_res = geo.relhum(t_def, q_def, p_def,True).compute()
    cp.cuda.runtime.deviceSynchronize()
    time2 = time.time()
    repeat[rep] = time2-time1
data = {'Routine': np.repeat(Routine, repsize),
        'Input':"Xarray with Dask array input",
        'Approach': np.repeat(cp.__name__ , repsize),
        'ArraySize': np.repeat(ArraySize , repsize),
        'nodes': nodes,
        'iteration' : np.arange(1,repsize+1),
        'Runtime(s)': repeat}
allData = pd.concat([allData,pd.DataFrame(data)], ignore_index=True)

In [None]:
allData

In [None]:
clusterCUDA.close()

## 2 GPU, 2 CPU, 400 GB mem

In [None]:
i = 2
ncpus = i
ngpus = i
nodes = 1
mem = 300
resouceGPU = 'select={nodes}:ncpus={ncpus}:ngpus={ngpus}:mem={mem}gb'.format(nodes = nodes, ncpus = ncpus, ngpus = ngpus, mem = mem)
clusterCUDA = PBSCluster(memory='{mem} GB'.format(mem = mem),
                         processes=i,
                         cores=i,
                         queue='casper',
                         walltime='02:00:00',
                         resource_spec=resouceGPU)
print(clusterCUDA.job_script())
clusterCUDA.scale(1)
client = Client(clusterCUDA)

In [None]:
client

In [None]:
allData = pd.DataFrame()
repsize = 10
repeat = np.zeros([repsize])
for rep in range(0,repsize):
    #create different sizes of arrays
    cupy_res = geo.relhum(t_def, q_def, p_def,True).compute()
    cp.cuda.runtime.deviceSynchronize()
    time1 = time.time()
    cupy_res = geo.relhum(t_def, q_def, p_def,True).compute()
    cp.cuda.runtime.deviceSynchronize()
    time2 = time.time()
    repeat[rep] = time2-time1
data = {'Routine': np.repeat(Routine, repsize),
        'Input':"Xarray with Dask array input",
        'Approach': np.repeat(cp.__name__ , repsize),
        'ArraySize': np.repeat(ArraySize , repsize),
        'nodes': ngpus,
        'iteration' : np.arange(1,repsize+1),
        'Runtime(s)': repeat}
allData = pd.concat([allData,pd.DataFrame(data)], ignore_index=True)

In [None]:
allData

In [None]:
clusterCUDA.close()

## 3 GPU, 3 CPU, 600 GB mem

In [None]:
i = 2
ncpus = i
ngpus = i
nodes = 1
mem = 300
resouceGPU = 'select={nodes}:ncpus={ncpus}:ngpus={ngpus}:mem={mem}gb'.format(nodes = nodes, ncpus = ncpus, ngpus = ngpus, mem = mem)
clusterCUDA = PBSCluster(memory='{mem} GB'.format(mem = mem),
                         processes=i,
                         cores=i,
                         queue='casper',
                         walltime='02:00:00',
                         resource_spec=resouceGPU)
print(clusterCUDA.job_script())
clusterCUDA.scale(1)
client = Client(clusterCUDA)

In [None]:
client

In [None]:
allData

In [None]:
import matplotlib.pyplot as plt
import numpy as np

def plot(allData,name):
    nodes = np.unique(allData['nodes'])
    sd_numpy = np.zeros(len(nodes))
    sd_cupy = np.zeros(len(nodes))
    y_numpy = np.zeros(len(nodes))
    y_cupy = np.zeros(len(nodes))
    plt.rcParams.update({'font.size': 20})
    for i in range(0,len(nodes)):
        cupydata = allData.loc[(allData['nodes'] == nodes[i]) & (allData['Approach'] == 'cupy')]
        numpydata = allData.loc[(allData['nodes'] == nodes[i]) & (allData['Approach'] == 'numpy')]
        y_cupy[i] = np.mean(cupydata['Runtime(s)'])
        y_numpy[i] = np.mean(numpydata['Runtime(s)'])
        sd_cupy[i] = np.std(cupydata['Runtime(s)'])
        sd_numpy[i] = np.std(numpydata['Runtime(s)'])
    fig, ax = plt.subplots(figsize=(9, 6))
    ax.errorbar(nodes, y_numpy, yerr=sd_numpy, fmt='-o',label='numpy',markersize=20)
    ax.errorbar(nodes, y_cupy, yerr=sd_cupy, fmt='-D',label='cupy',markersize=20)
    ax.legend();  # Add a legend.
    ax.set_xlabel('nodes')  # Add an x-label to the axes.
    ax.set_ylabel('Runtime(s)')  # Add a y-label to the axes.
    ax.set_title(("Test_relhum"))
    #ax.set_xscale('log')
    #ax.set_yscale('log')
    #plt.savefig(name,dpi=fig.dpi)
def test_validation(res_numpy,res_cupy):
    assert np.allclose(res_numpy,res_cupy, atol=0.0000001)