In [2]:
# import libraries

import pandas as pd
import xarray as xr
import numpy as np
from pathlib import Path
import seaborn as sns
from matplotlib import pyplot as plt

In [3]:
# read in files

folderpath = '../data/data_frames/'
paths = Path(folderpath).glob('bootstrap*.csv')
pathlist = list(paths)
data = pd.concat(map(pd.read_csv, pathlist), ignore_index=True)
data

Unnamed: 0.1,Unnamed: 0,time,row,col,crs,team_icecon,bootstrap_icecon,edtl,visual_ice,ocean_fraction,land_fraction,snow_fraction,sun_elevation,cloud_cover,pixels,x,y
0,1603394,2019-01-12,346,98,b'',0.940,0.953,2.236068,0.518772,0.105046,0.193481,0.182700,6.177596,2.87,652741.0,-1387500.0,-2812500.0
1,1603698,2019-01-12,347,98,b'',0.968,1.000,2.000000,0.385017,0.069531,0.439040,0.106412,6.177596,2.87,651683.0,-1387500.0,-2837500.0
2,1603699,2019-01-12,347,99,b'',0.972,1.000,2.236068,0.955612,0.043312,0.001042,0.000034,6.177596,2.87,652380.0,-1362500.0,-2837500.0
3,1604003,2019-01-12,348,99,b'',0.976,1.000,1.414214,0.988944,0.011056,0.000000,0.000000,6.177596,2.87,540447.0,-1362500.0,-2862500.0
4,1604004,2019-01-12,348,100,b'',0.980,1.000,2.236068,0.994977,0.005023,0.000000,0.000000,6.177596,2.87,64698.0,-1337500.0,-2862500.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
191144,48990872,2017-12-26,322,56,b'',0.908,1.000,5.385165,0.977938,0.022062,0.000000,0.000000,5.929697,3.21,186064.0,-2437500.0,-2212500.0
191145,48990873,2017-12-26,322,57,b'',0.924,1.000,5.830952,1.000000,0.000000,0.000000,0.000000,5.929697,3.21,7014.0,-2412500.0,-2212500.0
191146,48995889,2017-12-26,338,209,b'',0.000,0.000,4.123106,0.002214,0.997786,0.000000,0.000000,3.373443,1.54,263755.0,1387500.0,-2612500.0
191147,48995890,2017-12-26,338,210,b'',0.000,0.000,5.000000,0.000044,0.999956,0.000000,0.000000,3.373443,1.54,182265.0,1412500.0,-2612500.0


In [4]:
# put values into percentages

data['visual_ice'] = data['visual_ice'] * 100
data['team_icecon'] = data['team_icecon'] * 100
data['bootstrap_icecon'] = data['bootstrap_icecon'] * 100
data['ocean_fraction'] = data['ocean_fraction'] * 100
data['snow_fraction'] = data['snow_fraction'] * 100
data['land_fraction'] = data['land_fraction'] * 100

# convert date from string into date-time object

data['date'] = pd.to_datetime(data['time'], yearfirst=True)


# convert ice fraction from visual product into ice percentage relative to ocean fraction

data['visual_icecon'] = ( data['visual_ice'] / (data['visual_ice'] + data['ocean_fraction']) ) * 100
data = data.dropna(subset=['visual_icecon'])

# drop un-needed columns

data = data.drop(columns=['Unnamed: 0', 'crs', 'time'])
data

Unnamed: 0,row,col,team_icecon,bootstrap_icecon,edtl,visual_ice,ocean_fraction,land_fraction,snow_fraction,sun_elevation,cloud_cover,pixels,x,y,date,visual_icecon
0,346,98,94.0,95.3,2.236068,51.877238,10.504626,19.348103,18.270034,6.177596,2.87,652741.0,-1387500.0,-2812500.0,2019-01-12,83.160769
1,347,98,96.8,100.0,2.000000,38.501695,6.953074,43.904015,10.641217,6.177596,2.87,651683.0,-1387500.0,-2837500.0,2019-01-12,84.703313
2,347,99,97.2,100.0,2.236068,95.561176,4.331218,0.104234,0.003372,6.177596,2.87,652380.0,-1362500.0,-2837500.0,2019-01-12,95.664116
3,348,99,97.6,100.0,1.414214,98.894434,1.105566,0.000000,0.000000,6.177596,2.87,540447.0,-1362500.0,-2862500.0,2019-01-12,98.894434
4,348,100,98.0,100.0,2.236068,99.497666,0.502334,0.000000,0.000000,6.177596,2.87,64698.0,-1337500.0,-2862500.0,2019-01-12,99.497666
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
191144,322,56,90.8,100.0,5.385165,97.793770,2.206230,0.000000,0.000000,5.929697,3.21,186064.0,-2437500.0,-2212500.0,2017-12-26,97.793770
191145,322,57,92.4,100.0,5.830952,100.000000,0.000000,0.000000,0.000000,5.929697,3.21,7014.0,-2412500.0,-2212500.0,2017-12-26,100.000000
191146,338,209,0.0,0.0,4.123106,0.221418,99.778582,0.000000,0.000000,3.373443,1.54,263755.0,1387500.0,-2612500.0,2017-12-26,0.221418
191147,338,210,0.0,0.0,5.000000,0.004389,99.995611,0.000000,0.000000,3.373443,1.54,182265.0,1412500.0,-2612500.0,2017-12-26,0.004389


In [5]:
# filter out cells with pixel percentiles lower than the global max (excluding anomolously high cells >800,000)

data['pixel_percentile'] = (data['pixels'] / 731467)*100
data = data.loc[data['pixel_percentile'] > 80].copy()

In [6]:
# calculate error and absolute error (root square error)

data['team_error'] = data['team_icecon'] - data['visual_icecon']
data['bootstrap_error'] = data['bootstrap_icecon'] - data['visual_icecon']
data['team_error_abs'] = (data['team_error']**2)**0.5
data['bootstrap_error_abs'] = (data['bootstrap_error']**2)**0.5
data

Unnamed: 0,row,col,team_icecon,bootstrap_icecon,edtl,visual_ice,ocean_fraction,land_fraction,snow_fraction,sun_elevation,...,pixels,x,y,date,visual_icecon,pixel_percentile,team_error,bootstrap_error,team_error_abs,bootstrap_error_abs
0,346,98,94.0,95.3,2.236068,51.877238,10.504626,19.348103,18.270034,6.177596,...,652741.0,-1387500.0,-2812500.0,2019-01-12,83.160769,89.237245,10.839231,12.139231,10.839231,12.139231
1,347,98,96.8,100.0,2.000000,38.501695,6.953074,43.904015,10.641217,6.177596,...,651683.0,-1387500.0,-2837500.0,2019-01-12,84.703313,89.092604,12.096687,15.296687,12.096687,15.296687
2,347,99,97.2,100.0,2.236068,95.561176,4.331218,0.104234,0.003372,6.177596,...,652380.0,-1362500.0,-2837500.0,2019-01-12,95.664116,89.187892,1.535884,4.335884,1.535884,4.335884
6,318,68,82.4,98.7,3.000000,83.036456,8.527424,8.397380,0.038740,5.609157,...,658241.0,-2137500.0,-2112500.0,2019-01-16,90.686913,89.989159,-8.286913,8.013087,8.286913,8.013087
7,318,69,88.0,95.3,3.162278,75.326665,3.846259,13.657738,7.169338,5.609157,...,659238.0,-2112500.0,-2112500.0,2019-01-16,95.141952,90.125460,-7.141952,0.158048,7.141952,0.158048
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
191135,320,55,80.0,95.6,3.162278,99.294047,0.542695,0.163258,0.000000,5.929697,...,644377.0,-2462500.0,-2162500.0,2017-12-26,99.456418,88.093790,-19.456418,-3.856418,19.456418,3.856418
191136,320,56,84.0,100.0,3.605551,94.652789,5.347211,0.000000,0.000000,5.929697,...,645458.0,-2437500.0,-2162500.0,2017-12-26,94.652789,88.241575,-10.652789,5.347211,10.652789,5.347211
191138,321,54,80.8,97.1,3.605551,98.271508,1.728492,0.000000,0.000000,5.929697,...,642641.0,-2487500.0,-2187500.0,2017-12-26,98.271508,87.856458,-17.471508,-1.171508,17.471508,1.171508
191139,321,55,84.4,100.0,4.123106,83.470470,16.529530,0.000000,0.000000,5.929697,...,643642.0,-2462500.0,-2187500.0,2017-12-26,83.470470,87.993307,0.929530,16.529530,0.929530,16.529530


In [7]:
# ALL MONTHS CORE STATISTICS

mae_team = data.team_error_abs.mean()
mae_bootstrap = data.bootstrap_error_abs.mean()
corr_team = data[['team_error', 'cloud_cover', 'snow_fraction', 'ocean_fraction', 'land_fraction', 'edtl', 'sun_elevation']].corr()
corr_bootstrap = data[['bootstrap_error', 'cloud_cover', 'snow_fraction', 'ocean_fraction', 'land_fraction', 'edtl', 'sun_elevation']].corr()

# print out nicely

print("Correlation between visual and team (PMW) ice concentrations")
print(data[['visual_icecon', 'bootstrap_icecon']].corr())
print()
print("Correlation between visual and bootstrap (PMW) ice concentrations")
print(data[['visual_icecon', 'bootstrap_icecon']].corr())
print()
print(f"mean absolute error for team is {mae_team}")
print()
print(f"mean absolute error for team is {mae_bootstrap}")
print()
print('big correlation matrix table for team error')
print(corr_team)
print()
print('big correlation matrix table for bootstrap error')
print(corr_bootstrap)


Correlation between visual and team (PMW) ice concentrations
                  visual_icecon  bootstrap_icecon
visual_icecon          1.000000          0.841789
bootstrap_icecon       0.841789          1.000000

Correlation between visual and bootstrap (PMW) ice concentrations
                  visual_icecon  bootstrap_icecon
visual_icecon          1.000000          0.841789
bootstrap_icecon       0.841789          1.000000

mean absolute error for team is 12.324043949641023

mean absolute error for team is 13.25168092805122

big correlation matrix table for team error
                team_error  cloud_cover  snow_fraction  ocean_fraction  \
team_error        1.000000    -0.092610       0.058349        0.203644   
cloud_cover      -0.092610     1.000000      -0.128419        0.192379   
snow_fraction     0.058349    -0.128419       1.000000       -0.407085   
ocean_fraction    0.203644     0.192379      -0.407085        1.000000   
land_fraction     0.045062     0.044929       0.058311

In [8]:
# WINTER MONTHS CORE STATISTICS

winter_data = data.loc[data['date'].dt.month.isin([1, 2, 3, 12])]

# calculate mean absolute error (root square error) and correlation tables

mae_team = winter_data.team_error_abs.mean()
mae_bootstrap = winter_data.bootstrap_error_abs.mean()
corr_team = winter_data[['team_error', 'cloud_cover', 'snow_fraction', 'ocean_fraction', 'land_fraction', 'edtl', 'sun_elevation']].corr()
corr_bootstrap = winter_data[['bootstrap_error', 'cloud_cover', 'snow_fraction', 'ocean_fraction', 'land_fraction', 'edtl', 'sun_elevation']].corr()

# print out nicely

print("Correlation between visual and team (PMW) ice concentrations")
print(winter_data[['visual_icecon', 'team_icecon']].corr())
print()
print("Correlation between visual and bootstrap (PMW) ice concentrations")
print(winter_data[['visual_icecon', 'bootstrap_icecon']].corr())
print()
print(f"mean absolute error for team is {mae_team}")
print()
print(f"mean absolute error for team is {mae_bootstrap}")
print()
print('big correlation matrix table for team error')
print(corr_team)
print()
print('big correlation matrix table for bootstrap error')
print(corr_bootstrap)


Correlation between visual and team (PMW) ice concentrations
               visual_icecon  team_icecon
visual_icecon       1.000000     0.877659
team_icecon         0.877659     1.000000

Correlation between visual and bootstrap (PMW) ice concentrations
                  visual_icecon  bootstrap_icecon
visual_icecon          1.000000          0.819649
bootstrap_icecon       0.819649          1.000000

mean absolute error for team is 8.779648097271995

mean absolute error for team is 9.311979175691729

big correlation matrix table for team error
                team_error  cloud_cover  snow_fraction  ocean_fraction  \
team_error        1.000000    -0.088908       0.000544        0.297102   
cloud_cover      -0.088908     1.000000      -0.098113        0.155895   
snow_fraction     0.000544    -0.098113       1.000000       -0.182961   
ocean_fraction    0.297102     0.155895      -0.182961        1.000000   
land_fraction    -0.059962     0.062613       0.120497       -0.133894   
edtl 

In [9]:
# NOVEMBER CORE STATISTICS

nov_data = data.loc[data['date'].dt.month.isin([11])]

# calculate mean absolute error (root square error) and correlation tables

mae_team = nov_data.team_error_abs.mean()
mae_bootstrap = nov_data.bootstrap_error_abs.mean()
corr_team = nov_data[['team_error', 'cloud_cover', 'snow_fraction', 'ocean_fraction', 'land_fraction', 'edtl', 'sun_elevation']].corr()
corr_bootstrap = nov_data[['bootstrap_error', 'cloud_cover', 'snow_fraction', 'ocean_fraction', 'land_fraction', 'edtl', 'sun_elevation']].corr()

# print out nicely

print("Correlation between visual and team (PMW) ice concentrations")
print(nov_data[['visual_icecon', 'team_icecon']].corr())
print()
print("Correlation between visual and bootstrap (PMW) ice concentrations")
print(nov_data[['visual_icecon', 'bootstrap_icecon']].corr())
print()
print(f"mean absolute error for team is {mae_team}")
print()
print(f"mean absolute error for team is {mae_bootstrap}")
print()
print('big correlation matrix table for team error')
print(corr_team)
print()
print('big correlation matrix table for bootstrap error')
print(corr_bootstrap)

Correlation between visual and team (PMW) ice concentrations
               visual_icecon  team_icecon
visual_icecon       1.000000     0.530298
team_icecon         0.530298     1.000000

Correlation between visual and bootstrap (PMW) ice concentrations
                  visual_icecon  bootstrap_icecon
visual_icecon          1.000000          0.519594
bootstrap_icecon       0.519594          1.000000

mean absolute error for team is 19.199935232777214

mean absolute error for team is 20.008089093340843

big correlation matrix table for team error
                team_error  cloud_cover  snow_fraction  ocean_fraction  \
team_error        1.000000    -0.044728       0.154517        0.503301   
cloud_cover      -0.044728     1.000000      -0.087273       -0.046866   
snow_fraction     0.154517    -0.087273       1.000000       -0.458573   
ocean_fraction    0.503301    -0.046866      -0.458573        1.000000   
land_fraction    -0.035927     0.073018       0.249194       -0.533920   
edt

In [10]:
# CLOSE TO SHORE & WINTER MONTHS CORE STATISTICS

close_winter_data = data.loc[data['date'].dt.month.isin([1, 2, 3, 12])].loc[data['edtl'] < 1.5]

# calculate mean absolute error (root square error) and correlation tables

mae_team = close_winter_data.team_error_abs.mean()
mae_bootstrap = close_winter_data.bootstrap_error_abs.mean()
corr_team = close_winter_data[['team_error', 'cloud_cover', 'snow_fraction', 'ocean_fraction', 'land_fraction', 'edtl', 'sun_elevation']].corr()
corr_bootstrap = close_winter_data[['bootstrap_error', 'cloud_cover', 'snow_fraction', 'ocean_fraction', 'land_fraction', 'edtl', 'sun_elevation']].corr()

# print out nicely

print("Correlation between visual and team (PMW) ice concentrations")
print(close_winter_data[['visual_icecon', 'team_icecon']].corr())
print()
print("Correlation between visual and bootstrap (PMW) ice concentrations")
print(close_winter_data[['visual_icecon', 'bootstrap_icecon']].corr())
print()
print(f"mean absolute error for team is {mae_team}")
print()
print(f"mean absolute error for team is {mae_bootstrap}")
print()
print('big correlation matrix table for team error')
print(corr_team)
print()
print('big correlation matrix table for bootstrap error')
print(corr_bootstrap)

Correlation between visual and team (PMW) ice concentrations
               visual_icecon  team_icecon
visual_icecon       1.000000     0.866695
team_icecon         0.866695     1.000000

Correlation between visual and bootstrap (PMW) ice concentrations
                  visual_icecon  bootstrap_icecon
visual_icecon          1.000000          0.786778
bootstrap_icecon       0.786778          1.000000

mean absolute error for team is 10.114782828395011

mean absolute error for team is 10.994886018468494

big correlation matrix table for team error
                team_error  cloud_cover  snow_fraction  ocean_fraction  \
team_error        1.000000    -0.035658      -0.029030        0.427919   
cloud_cover      -0.035658     1.000000      -0.085642        0.139237   
snow_fraction    -0.029030    -0.085642       1.000000       -0.246635   
ocean_fraction    0.427919     0.139237      -0.246635        1.000000   
land_fraction    -0.088092     0.086263       0.041850       -0.178101   
edt