In [1]:
from filmModels import *
from dataPreprocess import *
from filmPostProcess import *
import os



In [2]:
# set parameters

label = 'visible_ice'
tab_params = ['proj_x_norm', 'proj_y_norm', 'depth_norm', 'interval_length_norm', 'year_norm', 'month_cyclic_norm']
label_predict = label + '_code'

film=False
restrict_to_hds = True
seed = 'kfold' # 'kfold', -1 for random, or other positive numbers for seed for train-test split

chip_size = 128
train_max_epoch = 30
max_iterations = 5
batch_size = 20
print_train_progress = True
print_test_model = True
hidden_width=32 
hidden_nblocks=4
L2_param = 1e-5

train_test_ratio = 0.7


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# path of csv containing tabular data
bh_file_path = 'data_stephen_fix_header.csv'

chips_path = r'C:\Users\mouju\Desktop\film\raster_chips\concat_geo90_arcticdem'
# chips_path = r'C:\Users\mouju\Desktop\film\raster_chips\concat_80bh_hdslanczos_34_128_128'

meta_params = {'chip_size': chip_size,
               'train_max_epoch': train_max_epoch,
               'max_iterations': max_iterations, 
               'batch_size': batch_size,
               'print_train_progress': print_train_progress,
               'print_test_model':print_test_model,
               'device':device,
               'hidden_width': hidden_width,
               'hidden_nblocks':hidden_nblocks,
               'L2_param':L2_param,
               'film':film, 
               'seed':seed, 
               'train_test_ratio': train_test_ratio, 
               'restrict_to_hds':restrict_to_hds}

In [3]:
# preprocessing data

# list of columns in the dataframe to normalize
list_cols = ['latitude', 'longitude', 'proj_x', 'proj_y', 'depth', 'interval_length', 'time']

df = pd.read_csv(bh_file_path, header=[0])
df, n_classes = prepare_df(df, list_cols, label)
print(f'Number of classes is : {n_classes}')

Dataframe has length 2837
Number of unique boreholes is 566
Latitude ranges from 68.36933 to 69.44755
Longitude ranges from -133.82843 to -132.89346
List of columns normalized: ['latitude_norm', 'longitude_norm', 'proj_x_norm', 'proj_y_norm', 'depth_norm', 'interval_length_norm', 'year_norm', 'month_cyclic_norm']
visible_ice: 'None' entries have been replaced by 'No visible ice'
visible_ice column entries has been ordered:
[NaN, 'Pure ice', 'No visible ice', 'High', 'Medium to high', 'Low']
Categories (5, object): ['No visible ice' < 'Low' < 'Medium to high' < 'High' < 'Pure ice']
with corresponding codes in visible_ice_code column:
[-1  4  0  3  2  1]
visible_ice: binary column generated
materials: 'ICE' and 'ice' entries has been standardized into 'Ice'
'material_ice' column generated
materials has been categorized into codes in materials_code
Null entries of visible_ice dropped
Number of classes is : 5


In [4]:
if meta_params['restrict_to_hds']:
    n_bh_temp = len(df)
    df = crop_hds_discard_chips(df)
    print(f'Number of boreholes has decreased from {n_bh_temp} to {len(df)}')
else:
    n_bh_temp = len(df)
    df = df.query("borehole != 'ENG.YARC03097-01--HUS_H (NTGS 7)'")
    df = df.query("borehole != 'W14103137-CR4N'")
    df = df.query("borehole != 'W14103137-S6-BH15'")
    df = df.query("borehole != 'W14103137-S6-BH14'")
    df = df.query("borehole != 'W14103137-S6-BH16'")
    print(f'Number of boreholes has decreased from {n_bh_temp} to {len(df)}')

Number of dataframe rows: 399
Number of boreholes has decreased from 2752 to 399


In [5]:
# create custom data for film
loaded_dataset = filmDataset(chips_path, df, tab_params, label_predict, chip_size)

meta_params['n_channels'] = loaded_dataset.n_channels
meta_params['n_classes'] = n_classes
if n_classes == 1:
    meta_params['loss_fn'] = torch.nn.BCELoss()
else:
    meta_params['loss_fn'] = torch.nn.CrossEntropyLoss()
meta_params['full_dataset'] = loaded_dataset

Dataset contains 28 channels


In [6]:
# Record and average accuracies from multiple models
if seed == 'kfold':
    results = iterate_kfold(meta_params, loaded_dataset)
else:
    results = iterate(meta_params, loaded_dataset)

FOLD 0
--------------------------------
epoch  1: running loss: 1.61262, validation loss: 1.58920
epoch  2: running loss: 1.58193, validation loss: 1.56571
epoch  3: running loss: 1.53597, validation loss: 1.51447
epoch  4: running loss: 1.42111, validation loss: 1.43175
epoch  5: running loss: 1.31674, validation loss: 1.40863
epoch  6: running loss: 1.29917, validation loss: 1.41246
epoch  7: running loss: 1.29399, validation loss: 1.41332
epoch  8: running loss: 1.28676, validation loss: 1.40502
epoch  9: running loss: 1.28428, validation loss: 1.39696
epoch 10: running loss: 1.27747, validation loss: 1.37873
epoch 11: running loss: 1.27537, validation loss: 1.37894
epoch 12: running loss: 1.26545, validation loss: 1.36890
epoch 13: running loss: 1.25661, validation loss: 1.36408
epoch 14: running loss: 1.24663, validation loss: 1.33797
epoch 15: running loss: 1.23669, validation loss: 1.32088
epoch 16: running loss: 1.23062, validation loss: 1.32535
epoch 17: running loss: 1.22561,

In [7]:
display_results(meta_params, results)

mean


Unnamed: 0,precision,recall,f1,support
0,0.541498,0.556636,0.542059,22.8
1,0.0,0.0,0.0,16.2
2,0.450272,0.749302,0.557428,33.0
3,0.0,0.0,0.0,2.0
4,0.0,0.0,0.0,5.8


std


Unnamed: 0,precision,recall,f1,support
0,0.051345,0.092182,0.048763,3.487119
1,0.0,0.0,0.0,4.955805
2,0.05826,0.130303,0.065077,3.03315
3,0.0,0.0,0.0,0.632456
4,0.0,0.0,0.0,2.712932


Accuracy mean: 0.4686708860759493, std: 0.03992211211912409


# Temperature Scaling

In [8]:
# from temperature_scaling import ModelWithTemperature