In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [3]:
results = pd.read_csv(r'..\results\TestingResults1.csv')
results

Unnamed: 0,Machine,InterpolationType,GridType,BatchSize,Time,RemovalFraction,MAE,RMSE,Max Error
0,CPU,Bilinear,A,1000,0,,,,
1,CPU,Cubic,A,1000,0,,,,
2,CPU,Kriging,A,1000,6,,,,
3,GPU,Bilinear,A,1000,13,,,,
4,GPU,Cubic,A,1000,40,,,,
...,...,...,...,...,...,...,...,...,...
343,CPU,Cubic,B,250344,645,0.05,6.30093,15.0715,529.750
344,CPU,Kriging,B,250344,708,0.05,4.98024,11.2783,342.524
345,GPU,Bilinear,B,250344,61,0.05,6.74368,15.9349,414.333
346,GPU,Cubic,B,250344,246,0.05,6.30093,15.0715,529.750


## Performance Metrics

### Grid A

Calculates the average time in ms that both the CPU and GPU took to run
    on each interpolation type, for each batch size of data

The batch sizes are: 1000, 5000, 100000, 500000, 100000, 1000000, 5000000

In [6]:
grid_a_results = results.loc[results['GridType'] == 'A']
grid_a_results

Unnamed: 0,Machine,InterpolationType,GridType,BatchSize,Time,RemovalFraction,MAE,RMSE,Max Error
0,CPU,Bilinear,A,1000,0,,,,
1,CPU,Cubic,A,1000,0,,,,
2,CPU,Kriging,A,1000,6,,,,
3,GPU,Bilinear,A,1000,13,,,,
4,GPU,Cubic,A,1000,40,,,,
...,...,...,...,...,...,...,...,...,...
247,CPU,Cubic,A,5000000,2901,,,,
248,CPU,Kriging,A,5000000,14942,,,,
249,GPU,Bilinear,A,5000000,462,,,,
250,GPU,Cubic,A,5000000,420,,,,


In [30]:
# separate into df's by batch size
sizes = grid_a_results['BatchSize'].unique()
sizes

array([   1000,    5000,   10000,   50000,  100000, 1000000, 5000000],
      dtype=int64)

In [29]:
interp_types = ['Bilinear', 'Cubic', 'Kriging']
machines = ['CPU', 'GPU']

# Dict to store performance for each batch size
performance_a = {
    'CPU Bilinear': [],
    'GPU Bilinear': [],
    'CPU Cubic': [],
    'GPU Cubic': [],
    'CPU Kriging': [],
    'GPU Kriging': []
}

for size in sizes:
    # subset the appropriate batch
    batch = grid_a_results.loc[grid_a_results['BatchSize'] == size]

    for interp in interp_types:
        for machine in machines:
            # 1) Get all entries for machine and interpolation type from larger set
            subset = batch.loc[(batch['Machine'] == machine) & (batch['InterpolationType'] == interp)]
            
            # 2) Sum over all times and divide by length to get average
            average = np.mean(subset['Time'].values)

            key_string = machine + " " + interp
            performance_a[key_string].append(average)

for key in performance_a.keys():
    print(key, performance_a[key])
        
        

CPU Bilinear [0.2, 0.8, 1.4, 8.8, 15.4, 170.8, 830.6]
GPU Bilinear [8.6, 1.6, 2.4, 6.8, 13.2, 116.8, 684.6]
CPU Cubic [0.0, 3.4, 7.0, 33.6, 74.0, 678.6, 3404.4]
GPU Cubic [27.4, 2.6, 6.0, 27.8, 53.2, 486.6, 2393.2]
CPU Kriging [4.0, 18.0, 35.8, 165.6, 333.4, 3212.4, 16252.2]
GPU Kriging [4.6, 5.8, 11.4, 47.6, 93.0, 903.2, 4484.2]


### Grid B

Calculates average performance times (ms) and average errors for CPU and GPU
    performance on each interpolation type based on:

- Region (Mid-Atlantic Ridge, East-Pacific Rise, Mariana Trench, Kerguelen Plateau)
- Removal Fraction -> percentage of data randomly masked from the original grid to imitate real-life        applications

In [31]:
grid_b_results = results.loc[results['GridType'] == 'B']
grid_b_results

Unnamed: 0,Machine,InterpolationType,GridType,BatchSize,Time,RemovalFraction,MAE,RMSE,Max Error
126,CPU,Bilinear,B,8101,1,0.01,3.54526,6.76434,196.000
127,CPU,Cubic,B,8101,21,0.01,3.91214,7.12105,156.500
128,CPU,Kriging,B,8101,23,0.01,3.37423,5.80733,168.544
129,GPU,Bilinear,B,8101,148,0.01,3.54526,6.76434,196.000
130,GPU,Cubic,B,8101,47,0.01,3.91214,7.12105,156.500
...,...,...,...,...,...,...,...,...,...
343,CPU,Cubic,B,250344,645,0.05,6.30093,15.07150,529.750
344,CPU,Kriging,B,250344,708,0.05,4.98024,11.27830,342.524
345,GPU,Bilinear,B,250344,61,0.05,6.74368,15.93490,414.333
346,GPU,Cubic,B,250344,246,0.05,6.30093,15.07150,529.750


In [34]:
sizes = grid_b_results['BatchSize'].unique()
sizes

array([  8101,  40507,  81015, 121523, 162031,   1303,  14428,   9321,
        46609,  50068, 250344], dtype=int64)

In [35]:
fracs = grid_b_results['RemovalFraction'].unique()
fracs

array([0.01, 0.05, 0.1 , 0.15, 0.2 ])

FIX LATER: Need to keep track of which rows correspond to which region's data
    Doing it all manually right now (very bad)

## Tectonically Active Zones


### Mid-Atlantic Ridge
#### Removal Fraction = 0.01
TestingResults1 - rows 278-289

In [48]:
# indexes are a bit off, go low-row-index - 2 and high-row-index - 1
mid_atlantic = results.iloc[276:288]
mid_atlantic

Unnamed: 0,Machine,InterpolationType,GridType,BatchSize,Time,RemovalFraction,MAE,RMSE,Max Error
276,CPU,Bilinear,B,1303,0,0.01,16.8584,26.5357,165.667
277,CPU,Cubic,B,1303,3,0.01,15.7884,27.2985,212.75
278,CPU,Kriging,B,1303,4,0.01,13.4503,22.6796,194.635
279,GPU,Bilinear,B,1303,16,0.01,16.8584,26.5357,165.667
280,GPU,Cubic,B,1303,38,0.01,15.7884,27.2985,212.75
281,GPU,Kriging,B,1303,7,0.01,13.4503,22.6796,194.635
282,CPU,Bilinear,B,1303,0,0.01,16.8584,26.5357,165.667
283,CPU,Cubic,B,1303,4,0.01,15.7884,27.2985,212.75
284,CPU,Kriging,B,1303,3,0.01,13.4503,22.6796,194.635
285,GPU,Bilinear,B,1303,10,0.01,16.8584,26.5357,165.667


In [49]:
interp_types = ['Bilinear', 'Cubic', 'Kriging']
machines = ['CPU', 'GPU']

perf = {
    'CPU Bilinear': [],
    'GPU Bilinear': [],
    'CPU Cubic': [],
    'GPU Cubic': [],
    'CPU Kriging': [],
    'GPU Kriging': []
}

for interp in interp_types:
    for machine in machines:
        key_string = machine + " " + interp
        # 1) Get all entries for machine and interpolation type from larger set
        subset = mid_atlantic.loc[(mid_atlantic['Machine'] == machine) & (mid_atlantic['InterpolationType'] == interp)]
        
        # 2) Sum over all times and divide by length to get average
        average = np.mean(subset['Time'].values)

        perf[key_string].append(average)

        # 3) Average out the Mean Absolute Error (MAE's)
        mae_avg = np.mean(subset['MAE'].values)

        perf[key_string].append(mae_avg)

        # 4) Average out the Root Mean Squared Error (RMSE's)
        rmse_avg = np.mean(subset['RMSE'].values)

        perf[key_string].append(rmse_avg)

        # 5) Average out the max error's
        max_err_avg = np.mean(subset['Max Error'].values)

        perf[key_string].append(max_err_avg)

for key in perf.keys():
    print(key, perf[key])



CPU Bilinear [0.0, 16.8584, 26.5357, 165.667]
GPU Bilinear [13.0, 16.8584, 26.5357, 165.667]
CPU Cubic [3.5, 15.7884, 27.2985, 212.75]
GPU Cubic [38.0, 15.7884, 27.2985, 212.75]
CPU Kriging [3.5, 13.4503, 22.6796, 194.635]
GPU Kriging [6.0, 13.4503, 22.6796, 194.635]


### East Pacific Rise
#### Removal Fraction = 0.01
TestingResults1 - rows 290-301

In [50]:
# indexes are a bit off, go low-row-index - 2 and high-row-index - 1
east_pacific = results.iloc[288:300]
east_pacific

Unnamed: 0,Machine,InterpolationType,GridType,BatchSize,Time,RemovalFraction,MAE,RMSE,Max Error
288,CPU,Bilinear,B,14428,2,0.01,14.4228,24.8556,305.667
289,CPU,Cubic,B,14428,38,0.01,14.4862,25.9785,363.25
290,CPU,Kriging,B,14428,42,0.01,11.1512,19.4132,453.267
291,GPU,Bilinear,B,14428,16,0.01,14.4228,24.8556,305.667
292,GPU,Cubic,B,14428,52,0.01,14.4862,25.9785,363.25
293,GPU,Kriging,B,14428,24,0.01,11.1512,19.4132,453.267
294,CPU,Bilinear,B,14428,3,0.01,14.4228,24.8556,305.667
295,CPU,Cubic,B,14428,49,0.01,14.4862,25.9785,363.25
296,CPU,Kriging,B,14428,45,0.01,11.1512,19.4132,453.267
297,GPU,Bilinear,B,14428,13,0.01,14.4228,24.8556,305.667


In [51]:
interp_types = ['Bilinear', 'Cubic', 'Kriging']
machines = ['CPU', 'GPU']

perf = {
    'CPU Bilinear': [],
    'GPU Bilinear': [],
    'CPU Cubic': [],
    'GPU Cubic': [],
    'CPU Kriging': [],
    'GPU Kriging': []
}

for interp in interp_types:
    for machine in machines:
        key_string = machine + " " + interp
        # 1) Get all entries for machine and interpolation type from larger set
        subset = east_pacific.loc[(east_pacific['Machine'] == machine) & (east_pacific['InterpolationType'] == interp)]
        
        # 2) Sum over all times and divide by length to get average
        average = np.mean(subset['Time'].values)

        perf[key_string].append(average)

        # 3) Average out the Mean Absolute Error (MAE's)
        mae_avg = np.mean(subset['MAE'].values)

        perf[key_string].append(mae_avg)

        # 4) Average out the Root Mean Squared Error (RMSE's)
        rmse_avg = np.mean(subset['RMSE'].values)

        perf[key_string].append(rmse_avg)

        # 5) Average out the max error's
        max_err_avg = np.mean(subset['Max Error'].values)

        perf[key_string].append(max_err_avg)

for key in perf.keys():
    print(key, perf[key])

CPU Bilinear [2.5, 14.4228, 24.8556, 305.667]
GPU Bilinear [14.5, 14.4228, 24.8556, 305.667]
CPU Cubic [43.5, 14.4862, 25.9785, 363.25]
GPU Cubic [52.0, 14.4862, 25.9785, 363.25]
CPU Kriging [43.5, 11.1512, 19.4132, 453.267]
GPU Kriging [23.5, 11.1512, 19.4132, 453.267]


### Mariana Trench
#### Removal Fraction = 0.01
TestingResults1 - rows 302-313


In [52]:
# indexes are a bit off, go low-row-index - 2 and high-row-index - 1
mariana001 = results.iloc[300:312]
mariana001

Unnamed: 0,Machine,InterpolationType,GridType,BatchSize,Time,RemovalFraction,MAE,RMSE,Max Error
300,CPU,Bilinear,B,9321,1,0.01,33.5012,49.6573,488.667
301,CPU,Cubic,B,9321,24,0.01,36.6279,58.1008,487.5
302,CPU,Kriging,B,9321,26,0.01,28.8211,42.944,303.626
303,GPU,Bilinear,B,9321,15,0.01,33.5012,49.6573,488.667
304,GPU,Cubic,B,9321,48,0.01,36.6279,58.1008,487.5
305,GPU,Kriging,B,9321,19,0.01,28.8211,42.944,303.626
306,CPU,Bilinear,B,9321,1,0.01,33.5012,49.6573,488.667
307,CPU,Cubic,B,9321,24,0.01,36.6279,58.1008,487.5
308,CPU,Kriging,B,9321,25,0.01,28.8211,42.944,303.626
309,GPU,Bilinear,B,9321,12,0.01,33.5012,49.6573,488.667


In [53]:
interp_types = ['Bilinear', 'Cubic', 'Kriging']
machines = ['CPU', 'GPU']

perf = {
    'CPU Bilinear': [],
    'GPU Bilinear': [],
    'CPU Cubic': [],
    'GPU Cubic': [],
    'CPU Kriging': [],
    'GPU Kriging': []
}

for interp in interp_types:
    for machine in machines:
        key_string = machine + " " + interp
        # 1) Get all entries for machine and interpolation type from larger set
        subset = mariana001.loc[(mariana001['Machine'] == machine) & (mariana001['InterpolationType'] == interp)]
        
        # 2) Sum over all times and divide by length to get average
        average = np.mean(subset['Time'].values)

        perf[key_string].append(average)

        # 3) Average out the Mean Absolute Error (MAE's)
        mae_avg = np.mean(subset['MAE'].values)

        perf[key_string].append(mae_avg)

        # 4) Average out the Root Mean Squared Error (RMSE's)
        rmse_avg = np.mean(subset['RMSE'].values)

        perf[key_string].append(rmse_avg)

        # 5) Average out the max error's
        max_err_avg = np.mean(subset['Max Error'].values)

        perf[key_string].append(max_err_avg)

for key in perf.keys():
    print(key, perf[key])

CPU Bilinear [1.0, 33.5012, 49.6573, 488.667]
GPU Bilinear [13.5, 33.5012, 49.6573, 488.667]
CPU Cubic [24.0, 36.6279, 58.1008, 487.5]
GPU Cubic [48.0, 36.6279, 58.1008, 487.5]
CPU Kriging [25.5, 28.8211, 42.944, 303.626]
GPU Kriging [19.0, 28.8211, 42.944, 303.626]


#### Removal Fraction = 0.05
TestingResults1 - rows 314-325

In [59]:
# indexes are a bit off, go low-row-index - 2 and high-row-index - 1
mariana005 = results.iloc[312:324]
mariana005

Unnamed: 0,Machine,InterpolationType,GridType,BatchSize,Time,RemovalFraction,MAE,RMSE,Max Error
312,CPU,Bilinear,B,46609,6,0.05,33.5738,50.5098,693.333
313,CPU,Cubic,B,46609,118,0.05,35.7862,57.7545,755.0
314,CPU,Kriging,B,46609,143,0.05,28.0409,42.8661,1102.08
315,GPU,Bilinear,B,46609,19,0.05,33.5738,50.5098,693.333
316,GPU,Cubic,B,46609,81,0.05,35.7862,57.7545,755.0
317,GPU,Kriging,B,46609,72,0.05,28.0409,42.8661,1102.08
318,CPU,Bilinear,B,46609,12,0.05,33.5738,50.5098,693.333
319,CPU,Cubic,B,46609,136,0.05,35.7862,57.7545,755.0
320,CPU,Kriging,B,46609,140,0.05,28.0409,42.8661,1102.08
321,GPU,Bilinear,B,46609,18,0.05,33.5738,50.5098,693.333


In [60]:
interp_types = ['Bilinear', 'Cubic', 'Kriging']
machines = ['CPU', 'GPU']

perf = {
    'CPU Bilinear': [],
    'GPU Bilinear': [],
    'CPU Cubic': [],
    'GPU Cubic': [],
    'CPU Kriging': [],
    'GPU Kriging': []
}

for interp in interp_types:
    for machine in machines:
        key_string = machine + " " + interp
        # 1) Get all entries for machine and interpolation type from larger set
        subset = mariana005.loc[(mariana005['Machine'] == machine) & (mariana005['InterpolationType'] == interp)]
        
        # 2) Sum over all times and divide by length to get average
        average = np.mean(subset['Time'].values)

        perf[key_string].append(average)

        # 3) Average out the Mean Absolute Error (MAE's)
        mae_avg = np.mean(subset['MAE'].values)

        perf[key_string].append(mae_avg)

        # 4) Average out the Root Mean Squared Error (RMSE's)
        rmse_avg = np.mean(subset['RMSE'].values)

        perf[key_string].append(rmse_avg)

        # 5) Average out the max error's
        max_err_avg = np.mean(subset['Max Error'].values)

        perf[key_string].append(max_err_avg)

for key in perf.keys():
    print(key, perf[key])

CPU Bilinear [9.0, 33.5738, 50.5098, 693.333]
GPU Bilinear [18.5, 33.5738, 50.5098, 693.333]
CPU Cubic [127.0, 35.7862, 57.7545, 755.0]
GPU Cubic [80.5, 35.7862, 57.7545, 755.0]
CPU Kriging [141.5, 28.0409, 42.8661, 1102.08]
GPU Kriging [72.5, 28.0409, 42.8661, 1102.08]


## Oceanic Plateaus


### Kerguelen Plateau
#### Removal Fraction = 0.01
TestingResults1 - rows 326-337

In [57]:
# indexes are a bit off, go low-row-index - 2 and high-row-index - 1
kerguelen001 = results.iloc[324:336]
kerguelen001

Unnamed: 0,Machine,InterpolationType,GridType,BatchSize,Time,RemovalFraction,MAE,RMSE,Max Error
324,CPU,Bilinear,B,50068,9,0.01,6.6799,15.5192,351.0
325,CPU,Cubic,B,50068,133,0.01,6.27658,14.825,473.25
326,CPU,Kriging,B,50068,158,0.01,5.03055,11.3488,304.221
327,GPU,Bilinear,B,50068,25,0.01,6.6799,15.5192,351.0
328,GPU,Cubic,B,50068,85,0.01,6.27658,14.825,473.25
329,GPU,Kriging,B,50068,78,0.01,5.03055,11.3488,304.221
330,CPU,Bilinear,B,50068,9,0.01,6.6799,15.5192,351.0
331,CPU,Cubic,B,50068,146,0.01,6.27658,14.825,473.25
332,CPU,Kriging,B,50068,150,0.01,5.03055,11.3488,304.221
333,GPU,Bilinear,B,50068,20,0.01,6.6799,15.5192,351.0


In [58]:
interp_types = ['Bilinear', 'Cubic', 'Kriging']
machines = ['CPU', 'GPU']

perf = {
    'CPU Bilinear': [],
    'GPU Bilinear': [],
    'CPU Cubic': [],
    'GPU Cubic': [],
    'CPU Kriging': [],
    'GPU Kriging': []
}

for interp in interp_types:
    for machine in machines:
        key_string = machine + " " + interp
        # 1) Get all entries for machine and interpolation type from larger set
        subset = kerguelen001.loc[(kerguelen001['Machine'] == machine) & (kerguelen001['InterpolationType'] == interp)]
        
        # 2) Sum over all times and divide by length to get average
        average = np.mean(subset['Time'].values)

        perf[key_string].append(average)

        # 3) Average out the Mean Absolute Error (MAE's)
        mae_avg = np.mean(subset['MAE'].values)

        perf[key_string].append(mae_avg)

        # 4) Average out the Root Mean Squared Error (RMSE's)
        rmse_avg = np.mean(subset['RMSE'].values)

        perf[key_string].append(rmse_avg)

        # 5) Average out the max error's
        max_err_avg = np.mean(subset['Max Error'].values)

        perf[key_string].append(max_err_avg)

for key in perf.keys():
    print(key, perf[key])

CPU Bilinear [9.0, 6.6799, 15.5192, 351.0]
GPU Bilinear [22.5, 6.6799, 15.5192, 351.0]
CPU Cubic [139.5, 6.27658, 14.825, 473.25]
GPU Cubic [83.5, 6.27658, 14.825, 473.25]
CPU Kriging [154.0, 5.03055, 11.3488, 304.221]
GPU Kriging [75.5, 5.03055, 11.3488, 304.221]


#### Removal Fraction = 0.05
TestingResults1 - rows 338-349

In [61]:
# indexes are a bit off, go low-row-index - 2 and high-row-index - 1
kerguelen005 = results.iloc[336:348]
kerguelen005

Unnamed: 0,Machine,InterpolationType,GridType,BatchSize,Time,RemovalFraction,MAE,RMSE,Max Error
336,CPU,Bilinear,B,250344,45,0.05,6.74368,15.9349,414.333
337,CPU,Cubic,B,250344,661,0.05,6.30093,15.0715,529.75
338,CPU,Kriging,B,250344,703,0.05,4.98024,11.2783,342.524
339,GPU,Bilinear,B,250344,82,0.05,6.74368,15.9349,414.333
340,GPU,Cubic,B,250344,246,0.05,6.30093,15.0715,529.75
341,GPU,Kriging,B,250344,359,0.05,4.98024,11.2783,342.524
342,CPU,Bilinear,B,250344,47,0.05,6.74368,15.9349,414.333
343,CPU,Cubic,B,250344,645,0.05,6.30093,15.0715,529.75
344,CPU,Kriging,B,250344,708,0.05,4.98024,11.2783,342.524
345,GPU,Bilinear,B,250344,61,0.05,6.74368,15.9349,414.333


In [62]:
interp_types = ['Bilinear', 'Cubic', 'Kriging']
machines = ['CPU', 'GPU']

perf = {
    'CPU Bilinear': [],
    'GPU Bilinear': [],
    'CPU Cubic': [],
    'GPU Cubic': [],
    'CPU Kriging': [],
    'GPU Kriging': []
}

for interp in interp_types:
    for machine in machines:
        key_string = machine + " " + interp
        # 1) Get all entries for machine and interpolation type from larger set
        subset = kerguelen005.loc[(kerguelen005['Machine'] == machine) & (kerguelen005['InterpolationType'] == interp)]
        
        # 2) Sum over all times and divide by length to get average
        average = np.mean(subset['Time'].values)

        perf[key_string].append(average)

        # 3) Average out the Mean Absolute Error (MAE's)
        mae_avg = np.mean(subset['MAE'].values)

        perf[key_string].append(mae_avg)

        # 4) Average out the Root Mean Squared Error (RMSE's)
        rmse_avg = np.mean(subset['RMSE'].values)

        perf[key_string].append(rmse_avg)

        # 5) Average out the max error's
        max_err_avg = np.mean(subset['Max Error'].values)

        perf[key_string].append(max_err_avg)

for key in perf.keys():
    print(key, perf[key])

CPU Bilinear [46.0, 6.74368, 15.9349, 414.333]
GPU Bilinear [71.5, 6.74368, 15.9349, 414.333]
CPU Cubic [653.0, 6.30093, 15.0715, 529.75]
GPU Cubic [246.0, 6.30093, 15.0715, 529.75]
CPU Kriging [705.5, 4.98024, 11.2783, 342.524]
GPU Kriging [358.5, 4.98024, 11.2783, 342.524]
