In [2]:
import pandas as pd
import numpy as np
import os
import torch
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
import re
import sys
from datetime import timedelta
# from torch.nn.functional import normalize

### Import classes

In [6]:

proj_paths = ["/Users/gopal/Google Drive/_Research/Research projects/ML/manclassify/app_data/Thailand",
              "/Users/gopalpenny/Library/CloudStorage/GoogleDrive-gopalpenny@gmail.com/My Drive/_Research/Research projects/ML/manclassify/app_data/Thailand"]

proj_path = [path for path in proj_paths if os.path.exists(path)][0]

class_path = os.path.join(proj_path,"Thailand_classification")
ts_path = os.path.join(proj_path,"Thailand_download_timeseries")
# pd.read_csv("
os.listdir(class_path)

loc_id = 0

s2_csv_name = f"pt_ts_loc{loc_id}_s2.csv"
s2_csv_name

class_colname = 'Subclass2019'

proj_normpath = os.path.normpath(proj_path)
proj_dirname = proj_normpath.split(os.sep)[-1]
proj_name = re.sub("_classification$","",proj_dirname)
class_path = os.path.join(proj_path, proj_name + "_classification")
ts_path = os.path.join(proj_path, proj_name + "_download_timeseries")
pt_classes = pd.read_csv(os.path.join(class_path, "location_classification.csv"))
pt_classes = pt_classes[['loc_id', 'Class', class_colname]].dropna()

pt_classes

Unnamed: 0,loc_id,Class,Subclass2019
0,0,Farm,Plantation
1,1,Farm,Crop(Single)
2,2,Farm,Crop(Single)
3,3,Farm,Crop(Single)
4,4,Farm,Plantation
...,...,...,...
496,496,Farm,Crop(Single)
497,497,Farm,Crop(Single)
498,498,Farm,Plantation
499,499,Farm,Plantation


## Generate the torch tensor dataset

### Define function to read timeseries

* Read timeseries
* Filter timeseries to date range (+/- 60 days)
* Remove observations with clouds
* Take the mean value for each day (occurs when multiple overpasses happen on the same day)

In [190]:
# prep dataset
date_range = pd.to_datetime(['2019-06-01','2020-05-31'])

def prep_s2_loc(loc_id, date_range, proj_path):
    ts_path = os.path.join(proj_path,"Thailand_download_timeseries")
    s2_csv_name = f"pt_ts_loc{loc_id}_s2.csv"
    s2_csv_path = os.path.join(ts_path, s2_csv_name)
    s2_ts = pd.read_csv(s2_csv_path)

    # extract dates from image ids
    s2_ts['datestr'] = [re.sub("(^[0-9]+)[a-zA-Z].*","\\1",x) for x in s2_ts.image_id]
    s2_ts['date'] = pd.to_datetime(s2_ts.datestr, format = "%Y%m%d")

    # subset to cloud-free days AND within date_range
    s2_ts = s2_ts[(s2_ts.date >= date_range[0] - timedelta(days = 60)) & 
                  (s2_ts.date <= date_range[1] + timedelta(days = 60)) & 
                  (s2_ts.cloudmask == 0)]

    # calculate day from startday
    date_diff = (s2_ts.date - date_range[0])
    s2_ts['day'] = [x.days for x in date_diff]
    s2_ts['loc_id'] = loc_id

    # select only predictor and position columns, return tensor
    s2_ts_x = s2_ts[['loc_id','day','B8','B4','B3','B2']]
    return s2_ts_x

# s2_ts_loc125 = prep_s2_loc(125, date_range, proj_path)
# s2_ts_loc125.groupby(['loc_id','day'],as_index = False).mean()

### Get the torch tensor dataset (prep and save OR read)

In [36]:
# from ipywidgets import IntProgress
# from IPython.display import display

if os.path.exists(os.path.join(proj_path, 's2_ts_prepped.pt')):
    loc_ts_tor = torch.load(os.path.join(proj_path, 's2_ts_prepped.pt'))
    
else:
    # f = IntProgress(min=0, max=pt_classes.shape[0]) # instantiate the bar
    display(f) # display the bar
    
    s2_ts_list = []
    loc_id_list = []
    for i in np.arange(pt_classes.shape[0]):
        # loc_id = 499
        # print(loc_id)
        loc_id = pt_classes.loc_id.iloc[i]
        # loc_id_list.append(loc_id)
        s2_ts_loc = prep_s2_loc(loc_id, date_range, proj_path)
        s2_ts_loc = s2_ts_loc.groupby(['loc_id','day'],as_index = False).mean()
        s2_ts_tor = torch.tensor(s2_ts_loc.to_numpy())
        s2_ts_list.append(s2_ts_tor)
        # f.value += 1
        
    loc_ts_tor = torch.cat(s2_ts_list)

    torch.save(loc_ts_tor, os.path.join(proj_path, 's2_ts_prepped.pt'))

sys.getsizeof(loc_ts_tor)

72

### Prep the dataset tensors

* Subset to training classes (crops & plantations)
* Check max number of rows
* Normalize & center
* Split loc_id into training and test datasets

In [30]:
# Create a merged class column where "Other" is used for nonfarm classes
pt_classes['class'] = ['Other' if x!='Farm' else y for x,y in zip(pt_classes['Class'],pt_classes['Subclass2019'])]
pt_classes

0        Plantation
1      Crop(Single)
2      Crop(Single)
3      Crop(Single)
4        Plantation
           ...     
496    Crop(Single)
497    Crop(Single)
498      Plantation
499      Plantation
500    Crop(Double)
Name: class, Length: 501, dtype: object

In [45]:
print('All classes')
print(pt_classes.groupby(['Class','Subclass2019','class']).count())

train_classes = ['Crop(Double)','Crop(Single)','Plantation', 'Other']
pt_classes_ag = pt_classes[pt_classes['class'].isin(train_classes)][['class','loc_id']]
print('\nTraining dataset (pt_classes_ag)\n',pt_classes_ag)

All classes
                                     loc_id
Class     Subclass2019 class               
Farm      Crop(Double) Crop(Double)      68
          Crop(Single) Crop(Single)     278
          Mixed        Mixed              2
          Plantation   Plantation       109
          Unsure       Unsure             4
NonFarm   Forest       Other              3
          Golf         Other              1
          Mixed        Other              4
          Unsure       Other              1
          Urban        Other              1
Uncertain Mixed        Other              9
          Unsure       Other             12
Water     Mixed        Other              5
          Water        Other              4

Training dataset (pt_classes_ag)
             class  loc_id
0      Plantation       0
1    Crop(Single)       1
2    Crop(Single)       2
3    Crop(Single)       3
4      Plantation       4
..            ...     ...
496  Crop(Single)     496
497  Crop(Single)     497
498    Plantati

In [81]:
loc_ts_tor

tensor([[   0.,   55., 4320.,  263.,  582.,  304.],
        [   0.,  120., 3896.,  472.,  785.,  560.],
        [   0.,  145., 3809.,  340.,  623.,  346.],
        ...,
        [ 500.,  343., 2752., 1473., 1296., 1006.],
        [ 500.,  348., 2590., 1245., 1153.,  844.],
        [ 500.,  363., 3241., 1605., 1565., 1281.]], dtype=torch.float64)

In [89]:
loc_ts_tor = loc_ts_tor[(loc_ts_tor[:,1] >= 0) & (loc_ts_tor[:,1] <= 365)]

row_means= loc_ts_tor.mean(dim = 1)#.shape #.unsqueeze(0).repeat(5,1)
loc_ts_tor = loc_ts_tor[~torch.isnan(row_means)]
col_means= loc_ts_tor.mean(dim = 0)#.shape #.unsqueeze(0).repeat(5,1)
col_std= loc_ts_tor.std(dim = 0)#.shape #.unsqueeze(0).repeat(5,1)
col_means[[0,1]] = 0
col_std[[0]] = 1
col_std[[1]] = 365 # normalize days by 365 -- each year ranges from 0 to 1

loc_ts_tor_std = col_std.unsqueeze(0).repeat(loc_ts_tor.shape[0],1)
loc_ts_tor_mean = col_means.unsqueeze(0).repeat(loc_ts_tor.shape[0],1)

loc_ts_norm = (loc_ts_tor - loc_ts_tor_mean) / loc_ts_tor_std

# get max of number of observations per location
# idx = np.arange(loc_ts_norm.shape[0])
loc_id = np.unique(loc_ts_norm[:,0])
num_obs = pd.DataFrame({'loc_id' : np.unique(loc_ts_norm[:,0]).astype('int')})
num_obs['num_obs'] = [loc_ts_norm[loc_ts_norm[:,0]==i,:].shape[0] for i in num_obs['loc_id']]
print("Max number of observations for any loc_id")
print(num_obs.iloc[[num_obs['num_obs'].idxmax()]])

Max number of observations for any loc_id
     loc_id  num_obs
481     481       91


In [86]:
loc_ts_norm[1:5,:]

tensor([[ 0.0000,  0.3288,  1.5379, -1.1047, -0.6794, -0.5649],
        [ 0.0000,  0.3973,  1.4139, -1.3247, -1.1319, -1.1984],
        [ 0.0000,  0.4521,  0.5745, -1.4665, -1.4839, -1.3997],
        [ 0.0000,  0.5205,  1.4938, -1.3297, -1.0649, -0.9705]],
       dtype=torch.float64)

In [90]:
loc_train = pt_classes_ag.groupby('class', group_keys = False).apply(lambda x: x.sample(frac = 0.8))
loc_nontrain = pt_classes_ag[~pt_classes_ag['loc_id'].isin(loc_train.loc_id)]

loc_valid = loc_nontrain.groupby('class', group_keys = False).apply(lambda x: x.sample(frac = 0.5))
loc_test = loc_nontrain[~loc_nontrain['loc_id'].isin(loc_valid.loc_id)]

print('Training (loc_train summary)\n', loc_train.groupby('class').count())
print('\nValidate (loc_test summary)\n', loc_valid.groupby('class').count())
print('\nTesting (loc_test summary)\n', loc_test.groupby('class').count())

Training (loc_train summary)
               loc_id
class               
Crop(Double)      54
Crop(Single)     222
Other             32
Plantation        87

Validate (loc_test summary)
               loc_id
class               
Crop(Double)       7
Crop(Single)      28
Other              4
Plantation        11

Testing (loc_test summary)
               loc_id
class               
Crop(Double)       7
Crop(Single)      28
Other              4
Plantation        11


In [61]:
loc_train

Unnamed: 0,class,loc_id
128,Crop(Double),128
336,Crop(Double),336
57,Crop(Double),57
106,Crop(Double),106
134,Crop(Double),134
...,...,...
422,Plantation,422
204,Plantation,204
446,Plantation,446
0,Plantation,0


## Prepare the S2 dataset class

In [148]:
class s2Dataset(Dataset):
    """Sentinel 2 dataset"""
    
    def __init__(self, x, y, max_obs):
        """
        Args:
            x (tensor): contains loc_id and predictors as columns, s2 observations as rows
            y (tensor): contains loc_id as rows (& first column), class as 1-hot columns
        """
        self.x = x
        self.y = y
        self.max_obs = max_obs
        # self.proj_path = proj_path
        # proj_normpath = os.path.normpath(proj_path)
        # proj_dirname = proj_normpath.split(os.sep)[-1]
        # self.proj_name = re.sub("_classification$","",proj_dirname)
        # self.class_path = os.path.join(proj_path, self.proj_name + "_classification")
        # self.ts_path = os.path.join(proj_path, self.proj_name + "_download_timeseries")
        # self.pt_classes = pd.read_csv(os.path.join(self.class_path,"location_classification.csv"))
        # self.pt_classes = classes[['loc_id', class_colname]].dropna()
        # self.classes = pd.unique(self.pt_classes[class_colname])
        # self.labels = self.pt_classes.assign(val = 1).pivot_table(columns = class_colname, index = 'loc_id', values = 'val', fill_value= 0)

    
    def __getitem__(self, idx):
        # get loc_id
        loc_id = self.y[idx,0]
        self.last_loc_id = loc_id
        
        # select location id
        x_loc = self.x[self.x[:,0]==loc_id]
        x_prep = x_loc[:,1:] # remove loc_id column
        
        # pad zeros to max_obs
        n_pad = self.max_obs - x_prep.shape[0]
        
        x = torch.cat((x_prep, torch.zeros(n_pad, x_prep.shape[1])), dim = 0)
        
        x = x.float()
        
        
        
        # get one-hot encoding for the point as tensor
        y = torch.tensor(self.y[idx,1:]).float()
        
        return x, y
        
    def __len__(self):
        return self.y.shape[0]

### get training data

* `y_train` directly from `loc_train` & pivot
* `x_train` from `loc_ts_norm`, subset to `y_train[:,0]`

In [91]:
# get y_train values from loc_train
y_train_df = (loc_train.assign(val = 1) \
  .pivot_table(columns = 'class', index = ['loc_id'], values = 'val', fill_value= 0) \
  .reset_index(['loc_id']))
y_train = y_train_df.to_numpy()
print('y_train:\n',y_train)

# get x_train values from loc_ts_norm (based on loc_id)
x_train = loc_ts_norm[torch.isin(loc_ts_norm[:,0],torch.tensor(y_train[:,0]).to(torch.float64)),:]

# get y_test values from loc_test
y_valid_df = (loc_valid.assign(val = 1) \
  .pivot_table(columns = 'class', index = ['loc_id'], values = 'val', fill_value= 0) \
  .reset_index(['loc_id']))
y_valid = y_valid_df.to_numpy()
print('y_valid:\n',y_valid[0:10,])

# get x_train values from loc_ts_norm (based on loc_id)
x_valid = loc_ts_norm[torch.isin(loc_ts_norm[:,0],torch.tensor(y_valid[:,0]).to(torch.float64)),:]

# get y_test values from loc_test
y_test_df = (loc_test.assign(val = 1) \
  .pivot_table(columns = 'class', index = ['loc_id'], values = 'val', fill_value= 0) \
  .reset_index(['loc_id']))
y_test = y_test_df.to_numpy()
print('y_test:\n',y_test[0:10,])

# get x_train values from loc_ts_norm (based on loc_id)
x_test = loc_ts_norm[torch.isin(loc_ts_norm[:,0],torch.tensor(y_test[:,0]).to(torch.float64)),:]

x_test

y_train:
 [[  1   0   1   0   0]
 [  2   0   1   0   0]
 [  6   0   0   1   0]
 ...
 [498   0   0   0   1]
 [499   0   0   0   1]
 [500   1   0   0   0]]
y_valid:
 [[ 0  0  0  0  1]
 [ 4  0  0  0  1]
 [ 5  1  0  0  0]
 [24  0  1  0  0]
 [26  0  1  0  0]
 [28  0  1  0  0]
 [30  0  1  0  0]
 [62  1  0  0  0]
 [70  0  1  0  0]
 [76  0  1  0  0]]
y_test:
 [[  3   0   1   0   0]
 [ 22   0   0   0   1]
 [ 91   0   0   0   1]
 [ 98   0   1   0   0]
 [109   0   0   0   1]
 [112   0   1   0   0]
 [123   1   0   0   0]
 [128   1   0   0   0]
 [132   0   0   0   1]
 [136   0   1   0   0]]


tensor([[ 3.0000e+00,  1.3699e-02,  2.7465e+00, -1.2764e+00, -1.0649e+00,
         -1.2665e+00],
        [ 3.0000e+00,  5.4795e-02,  1.2686e+00, -6.6289e-01, -8.2466e-01,
         -9.3495e-01],
        [ 3.0000e+00,  1.2329e-01,  1.5123e+00, -8.2627e-01, -8.7774e-01,
         -1.0830e+00],
        ...,
        [ 4.8700e+02,  9.0411e-01,  1.1788e+00, -7.4013e-01, -1.7661e-01,
         -3.9421e-01],
        [ 4.8700e+02,  9.3151e-01,  9.7071e-01, -5.6675e-01, -1.4867e-01,
         -3.3698e-01],
        [ 4.8700e+02,  9.5890e-01,  1.1180e+00, -1.9275e-01,  4.1651e-01,
          1.2581e-01]], dtype=torch.float64)

### build pytorch dataset: `s2_dateset`

In [150]:
s2_train = s2Dataset(x = x_train, y = y_train, max_obs = 100)
s2_valid = s2Dataset(x = x_valid, y = y_valid, max_obs = 100)
s2_test = s2Dataset(x = x_test, y = y_test, max_obs = 100)

# example item in dataset
idx_test = 2
x, y = s2_train.__getitem__(idx_test)

print(f'x example, shape: {x.shape} \n(idx={idx_test}) columns: day, B8, B4, B3, B2\n',x)
# print()
print(f'\n\ny example (idx={idx_test}): crops(double) crops(single) plantation\n',y)
print(y.shape)
# sys.getsizeof(x)

x example, shape: torch.Size([100, 5]) 
(idx=2) columns: day, B8, B4, B3, B2
 tensor([[ 1.3699e-02,  3.6786e-01, -7.2624e-01, -4.6153e-01, -6.0636e-01],
        [ 5.4795e-02,  2.0966e-01, -6.0287e-01, -5.5650e-01, -5.5012e-01],
        [ 1.2329e-01,  1.0605e+00, -9.8465e-01, -6.9896e-01, -9.1719e-01],
        [ 3.2877e-01,  5.7878e-01, -8.9462e-01, -6.9058e-01, -5.2052e-01],
        [ 3.5616e-01,  5.2035e-01, -5.9120e-01, -2.9393e-01,  4.8882e-04],
        [ 3.6986e-01,  1.7973e-01, -1.0480e+00, -1.0537e+00, -1.1481e+00],
        [ 3.8356e-01, -1.9794e-02, -8.9962e-01, -1.0537e+00, -9.8824e-01],
        [ 3.9726e-01,  1.4838e-01, -6.7622e-01, -6.1237e-01, -4.8795e-01],
        [ 4.1096e-01, -2.9343e-01, -7.6292e-01, -9.6154e-01, -8.6391e-01],
        [ 4.3836e-01, -4.5020e-01, -6.0620e-01, -5.5371e-01, -2.5705e-01],
        [ 4.6575e-01, -5.7419e-01, -6.5622e-01, -8.3583e-01, -6.3005e-01],
        [ 4.7945e-01, -4.5020e-01, -5.9287e-01, -8.7774e-01, -9.2015e-01],
        [ 4.9315e-01, 

### generate sampling weights for data loader

In [151]:
# adapted from https://discuss.pytorch.org/t/how-to-handle-imbalanced-classes/11264/2
target_classes = torch.stack([torch.argmax(s2_train.__getitem__(i)[1]) for i in range(s2_train.__len__())])
# count of samples in each class
class_sample_count = np.array([torch.sum(target_classes == i) for i in torch.unique(target_classes)])

# weight for each class (classed must go from 0 to n-1 classes)
weight = 1. / class_sample_count
sample_weights = np.array([weight[i] for i in target_classes])
sampler = WeightedRandomSampler(weights = sample_weights, num_samples = len(sample_weights))

In [110]:
len(sample_weights)

395

In [153]:
# s2_train

train_dl = DataLoader(s2_train, batch_size = 20, drop_last = True, sampler = sampler)
valid_dl = DataLoader(s2_valid, batch_size = 20, drop_last = False)
test_dl = DataLoader(s2_test, batch_size = 20, drop_last = False)

In [113]:
len(train_dl)

19

In [120]:
i = 1
for train, labels in train_dl:
    if i == 1:
        print("i == 1:\n",train[1, 1, :])
    if i == 10:
        print("i == 10:\n",train[1, 1, :])
    i += 1



i == 1:
 tensor([ 0.0192,  1.6662, -1.2697, -0.9671, -0.9142])
i == 10:
 tensor([ 0.2192,  0.4505, -1.3748, -1.5146, -1.3583])


In [154]:
train_features, train_labels = next(iter(train_dl))
tf_test = train_features[:,:,:]
# tf_test
# train_labels
# tf_test
tf_test = tf_test.float()
print(tf_test.shape)

print(tf_test[0, 0:3, :])

torch.Size([20, 100, 5])
tensor([[ 0.0137,  0.3679, -0.7262, -0.4615, -0.6064],
        [ 0.0548,  0.2097, -0.6029, -0.5565, -0.5501],
        [ 0.1233,  1.0605, -0.9846, -0.6990, -0.9172]])


In [155]:
train_labels.shape

torch.Size([20, 4])

In [20]:
# class PositionalEncoding(nn.Module):

#     def __init__(self, d_model: int, dropout: float = 0.1, max_len: int = 5000):
#         super().__init__()
#         self.dropout = nn.Dropout(p=dropout)

#         position = torch.arange(max_len).unsqueeze(1)
#         div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
#         pe = torch.zeros(max_len, 1, d_model)
#         pe[:, 0, 0::2] = torch.sin(position * div_term)
#         pe[:, 0, 1::2] = torch.cos(position * div_term)
#         self.register_buffer('pe', pe)

#     def forward(self, x: Tensor) -> Tensor:
#         """
#         Args:
#             x: Tensor, shape [seq_len, batch_size, embedding_dim]
#         """
#         x = x + self.pe[:x.size(0)]
#         return self.dropout(x)

In [156]:
import torch.nn as nn


nhead = 6 # number of attention heads
head_dim = 8 # dimension of each word for each attention head
dmodel = nhead * head_dim # embed_dim -- each word (row) is embedded to this dimension then split
# across the nhead attention heads

data_in = tf_test[:, :, 1:] # select only the data
positions = tf_test[:,:,0:1] # split out positional data
data_dim = data_in.shape[-1]

In [22]:
torch.exp(torch.tensor([5.2333e-01]))/torch.sum(torch.exp(torch.tensor([-1.3249e-01, 5.2333e-01, -2.9124e-01])))

tensor([0.5097])

In [157]:
from torch import nn, Tensor
class TransformerClassifier(nn.Module):
    def __init__(self, ntoken: int, dmodel: int, nhead: int, dhid: int, 
                 nlayers: int, data_dim: int, nclasses: int):
        """
        data_dim: dimension of data (i.e., num of columns) including position as first dimension
        """
        super().__init__()
        self.positional_layer = nn.Linear(1, dmodel)
        self.embed_layer = nn.Linear(data_dim - 1, dmodel) # transform data to embed dimension (dmodel)
        
        # dim_feedforward: https://stackoverflow.com/questions/68087780/pytorch-transformer-argument-dim-feedforward
        # shortly: dim_feedforward is a hidden layer between two forward layers at the end of the encoder layer, passed for each word one-by-one
        self.encoderlayer = nn.TransformerEncoderLayer(d_model = dmodel, nhead = nhead, dim_feedforward = dhid)
        self.encoder = nn.TransformerEncoder(self.encoderlayer, nlayers)
        
        self.num_params = ntoken * dmodel
        
        self.class_encoder = nn.Linear(dmodel, nclasses)
    
    def forward(self, src: Tensor) -> Tensor:
        
        positions = src[:, :, 0:1]
        data = src[:, :, 1:]
        pe = self.positional_layer(positions)
        data_embed = self.embed_layer(data)
        data_and_pe = pe + data_embed
        encoder_out = self.encoder(data_and_pe)
        
        maxpool = torch.max(encoder_out,dim = 1)[0]
        
        # softmax ensures output of model is probability of class membership -- which sum to 1
        # BUT this is already done with CrossEntropyLoss so it's not necessary for this loss function
        classes = self.class_encoder(maxpool) #, dim = 1
        
        # classes = nn.functional.softmax(classes, 1) # don't use softmax with cross entropy loss... or do?
        # don't: https://stackoverflow.com/questions/55675345/should-i-use-softmax-as-output-when-using-cross-entropy-loss-in-pytorch
        # do: Machine Learning with Pytorch and Scikitlearn (p 471: Loss functions for classifiers) -- BUT NOT WITH CROSS ENTROPY LOSS (p478
        
        return classes

        # data_in = tf_test[:, :, 1:] # select only the data
        # positions = tf_test[:,:,0:1] # split out positional data
        # data_dim = data_in.shape[-1]
        
        
tfnetwork = TransformerClassifier(100, dmodel = 36, nhead = 6, dhid = 100, nlayers = 3, data_dim = 5, nclasses = 4)

tfnetwork(tf_test).shape

torch.Size([20, 4])

In [158]:
from torchinfo import summary
print(tuple(tf_test.shape))
summary(tfnetwork, input_size = (5, 100, 5))

(20, 100, 5)


Layer (type:depth-idx)                        Output Shape              Param #
TransformerClassifier                         [5, 4]                    12,808
├─Linear: 1-1                                 [5, 100, 36]              72
├─Linear: 1-2                                 [5, 100, 36]              180
├─TransformerEncoder: 1-3                     [5, 100, 36]              --
│    └─ModuleList: 2-1                        --                        --
│    │    └─TransformerEncoderLayer: 3-1      [5, 100, 36]              12,808
│    │    └─TransformerEncoderLayer: 3-2      [5, 100, 36]              12,808
│    │    └─TransformerEncoderLayer: 3-3      [5, 100, 36]              12,808
├─Linear: 1-4                                 [5, 4]                    148
Total params: 51,632
Trainable params: 51,632
Non-trainable params: 0
Total mult-adds (M): 0.11
Input size (MB): 0.01
Forward/backward pass size (MB): 2.78
Params size (MB): 0.09
Estimated Total Size (MB): 2.89

In [234]:
train_features, train_labels = next(iter(train_dl))

tfnetwork = TransformerClassifier(100, dmodel = 36, nhead = 6, dhid = 100, nlayers = 3, data_dim = 5, nclasses = 4)

train_out = tfnetwork(train_features)


In [163]:
train_labels.dtype

torch.float32

In [229]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(tfnetwork.parameters(), lr = 0.001)

print(train_out.shape)

def get_num_correct(train_out, train_labels):
    pred = torch.argmax(train_out, dim = 1)
    actual = torch.argmax(train_labels, dim = 1)
    num_correct = torch.sum(pred == actual).item()
    # print('type',type(num_correct))
    # x = num_correct# item()
    # print('num_correct', num_correct.item())
    return num_correct


num_correct = get_num_correct(train_out, train_labels)
num_correct

torch.Size([20, 4])


3

In [220]:
accuracy = num_correct / train_labels.size(0)
print('num_correct:', num_correct.item())
print('accuracy:', accuracy.item())
print('num in training sample:', train_labels.size(0))
tfnetwork.train()
loss = loss_fn(train_out, train_labels)
# loss.backward()
optimizer.step()
optimizer.zero_grad()
# tf_train
# tf_test.shape
f"accuracy: {accuracy.item()}"

torch.Size([20, 4])
<class 'int'>
num_correct: 3
accuracy: 0.15000000596046448
num in training sample: 20


'accuracy: 0.15000000596046448'

In [219]:
get_num_correct(train_out, train_labels).item()

<class 'torch.Tensor'>


3

In [235]:
i = 1
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(tfnetwork.parameters(), lr = 0.001)


# print(i)
# for train_features, train_labels in train_dl:
#     i += 1
#     print(i)
n_epochs = 1000
loss_hist_train = [0] * n_epochs
accuracy_hist_train = [0] * n_epochs
loss_hist_valid = [0] * n_epochs
accuracy_hist_valid = [0] * n_epochs
for epoch in range(n_epochs):
    tfnetwork.train()
    for x_batch, y_batch in train_dl:
        
        # Forward pass
        pred = tfnetwork(x_batch)
        loss = loss_fn(pred, y_batch)
        
        loss_hist_train[epoch] += loss.item() * y_batch.size(0)
        accuracy_hist_train[epoch] += get_num_correct(pred, y_batch)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    loss_hist_train[epoch] /= float(len(train_dl.dataset))
    accuracy_hist_train[epoch] /= float(len(train_dl.dataset))
        

    
    # print('train_out.shape', train_out.shape)
    accuracy = get_accuracy(pred, y_batch)
    
    with torch.no_grad():
        for x_batch, y_batch in train_dl:

            # Forward pass
            pred = tfnetwork(x_batch)
            loss = loss_fn(pred, y_batch)

            loss_hist_valid[epoch] += loss.item() * y_batch.size(0)
            accuracy_hist_valid[epoch] += get_num_correct(pred, y_batch)

        loss_hist_valid[epoch] /= float(len(train_dl.dataset))
        accuracy_hist_valid[epoch] /= float(len(train_dl.dataset))
        
    
    #     tfnetwork.eval()
    #     for x_batch, y_batch in valid_dl:
    #         # Forward pass
    #         pred = tfnetwork(x_batch)
    #         loss = loss_fn(pred, y_batch)

    #     tfnetwork.eval()
    # valid_features, valid_labels = /
    print(f'Epoch [{epoch+1}/{n_epochs}], Loss: {loss_hist_train[epoch]:.4f}, Accuracy: {accuracy_hist_train[epoch]:.4f}'
          f' Val Accuracy: {accuracy_hist_valid[epoch]:.4f}')

Epoch [1/1000], Loss: 1.2917, Accuracy: 0.3139 Val Accuracy: 0.5165
Epoch [2/1000], Loss: 1.0431, Accuracy: 0.5190 Val Accuracy: 0.5089
Epoch [3/1000], Loss: 0.9446, Accuracy: 0.5570 Val Accuracy: 0.5696
Epoch [4/1000], Loss: 0.8469, Accuracy: 0.6101 Val Accuracy: 0.5722
Epoch [5/1000], Loss: 0.8658, Accuracy: 0.5924 Val Accuracy: 0.6253
Epoch [6/1000], Loss: 0.8070, Accuracy: 0.6430 Val Accuracy: 0.6557
Epoch [7/1000], Loss: 0.7509, Accuracy: 0.7089 Val Accuracy: 0.6380
Epoch [8/1000], Loss: 0.7394, Accuracy: 0.6684 Val Accuracy: 0.6506
Epoch [9/1000], Loss: 0.7102, Accuracy: 0.7038 Val Accuracy: 0.7291
Epoch [10/1000], Loss: 0.7344, Accuracy: 0.6886 Val Accuracy: 0.7063
Epoch [11/1000], Loss: 0.6577, Accuracy: 0.7367 Val Accuracy: 0.6962
Epoch [12/1000], Loss: 0.6434, Accuracy: 0.7468 Val Accuracy: 0.7646
Epoch [13/1000], Loss: 0.6492, Accuracy: 0.7089 Val Accuracy: 0.6911
Epoch [14/1000], Loss: 0.6310, Accuracy: 0.7063 Val Accuracy: 0.7671
Epoch [15/1000], Loss: 0.6281, Accuracy: 0.

In [176]:
tfnetwork

TransformerClassifier(
  (positional_layer): Linear(in_features=1, out_features=36, bias=True)
  (embed_layer): Linear(in_features=4, out_features=36, bias=True)
  (encoderlayer): TransformerEncoderLayer(
    (self_attn): MultiheadAttention(
      (out_proj): NonDynamicallyQuantizableLinear(in_features=36, out_features=36, bias=True)
    )
    (linear1): Linear(in_features=36, out_features=100, bias=True)
    (dropout): Dropout(p=0.1, inplace=False)
    (linear2): Linear(in_features=100, out_features=36, bias=True)
    (norm1): LayerNorm((36,), eps=1e-05, elementwise_affine=True)
    (norm2): LayerNorm((36,), eps=1e-05, elementwise_affine=True)
    (dropout1): Dropout(p=0.1, inplace=False)
    (dropout2): Dropout(p=0.1, inplace=False)
  )
  (encoder): TransformerEncoder(
    (layers): ModuleList(
      (0): TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=36, out_features=36, bias=True)
        )
       

In [40]:
tfnetwork_out = tfnetwork(tf_test)
torch.max(tfnetwork_out,dim = 1)[0].shape

torch.Size([5, 36])

In [25]:
torch.triu(torch.ones(4, 4) * float('-inf'), diagonal=0)

tensor([[-inf, -inf, -inf, -inf],
        [0., -inf, -inf, -inf],
        [0., 0., -inf, -inf],
        [0., 0., 0., -inf]])

In [41]:
dmodel

48

## Old S2 pytorch dataset

In [None]:
# class s2Dataset(Dataset):
#     """Sentinel 2 dataset"""
    
#     def __init__(self, proj_path, class_colname):
#         """
#         Args:
#             proj_path (string): path to manclassify project
#         """
#         self.proj_path = proj_path
#         proj_normpath = os.path.normpath(proj_path)
#         proj_dirname = proj_normpath.split(os.sep)[-1]
#         self.proj_name = re.sub("_classification$","",proj_dirname)
#         self.class_path = os.path.join(proj_path, self.proj_name + "_classification")
#         self.ts_path = os.path.join(proj_path, self.proj_name + "_download_timeseries")
#         self.pt_classes = pd.read_csv(os.path.join(self.class_path,"location_classification.csv"))
#         self.pt_classes = classes[['loc_id', class_colname]].dropna()
#         # self.pt_classes['loc_id'] = self.pt_classes['loc_id'] + 10.5 # for testing index only
#         self.classes = pd.unique(self.pt_classes[class_colname])
#         self.labels = self.pt_classes.assign(val = 1).pivot_table(columns = class_colname, index = 'loc_id', values = 'val', fill_value= 0)

    
#     def __getitem__(self, idx):
#         loc_id = self.labels.index[idx]
#         self.last_loc_id = loc_id
        
#         # select location id
#         s2_ts_x = s2_ts[['B8','B4','B3','B2','day']]
#         x = torch.tensor(s2_ts_x.to_numpy())
        
#         # get one-hot encoding for the point as tensor
#         y = torch.tensor(self.labels.iloc[idx].to_numpy())
        
#         return x, y
        
#     def __len__(self):
#         return self.pt_classes.shape[0]


# proj_path = "/Users/gopal/Google Drive/_Research/Research projects/ML/manclassify/app_data/Thailand"
# # date_rangeX = pd.to_datetime(['2019-06-01','2020-05-31'])
# s2_train = s2Dataset(proj_path = proj_path, class_colname = 'Subclass2019')
# x = s2_train.__getitem__(10)
# sys.getsizeof(x)

In [None]:
tensor_orig = loc_s1_ts_tensor
day_col = 1
def get_avg_days_to_nearest(tensor_orig, day_col, out = 'both'):
    """
    Add the number of days as 2 additional columns to a tensor
    
    tensor_orig: sorted tensor containing a column with number of days
    day_col: index of the column with days
    """
    days = tensor_orig[:,day_col] # pull out day column
    day_diff = np.abs(days[1:] - days[:-1]) # get days between
    day_diff = np.expand_dims(day_diff, 1)

    # create array with day_diff columns before and after
    day_diff_before_after = np.concatenate((np.expand_dims(np.append(np.inf,day_diff), 1), 
                                            np.expand_dims(np.append(day_diff,np.inf), 1)), axis = 1)

    day_diff_before_after = torch.from_numpy(day_diff_before_after)
    
    
    if out == 'both':
        tensor_day_diff = torch.concat((tensor_orig, day_diff_before_after),dim=1)
    elif out == 'mean':
        # get average distance (before & after)
        day_diff_mean = torch.mean(day_diff_before_after, 1, True)
        tensor_day_diff = torch.concat((tensor_orig, day_diff_mean),dim=1)
    else:
        print("ERROR")
    
    return tensor_day_diff

get_avg_days_to_nearest(tensor_orig, day_col = 1)