In [4]:
import os 
import pandas as pd
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
import torch
# from ipywidgets import IntProgress
import scipy.io as sio
from scipy.io import loadmat, savemat
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from einops import rearrange,reduce,repeat
import glob

### Load Training Data

In [9]:
fileName = '1_matlab_input_data/2'
dirc = os.getcwd()
data_set = []
data_doa_set = []
dataPath = os.path.join(dirc,fileName)
if os.path.exists(dataPath):
    for root,dirs,files in os.walk(dataPath):
        for file in files:
            if file.startswith('features'):
                inputPath = os.path.join(root,file)
                input_data_train = sio.loadmat(inputPath)
                input_data_train = torch.from_numpy(input_data_train['features'])
                data_set.append(input_data_train) #NOTE: put all the train_data of different SNR into one list
            elif file.startswith('doa'):
                inputPath = os.path.join(root,file)
                input_data_train_doa = sio.loadmat(inputPath)
                input_data_train_doa = torch.from_numpy(input_data_train_doa['doa_input'])
                input_data_train_doa = torch.squeeze(input_data_train_doa)
                data_doa_set.append(input_data_train_doa) #NOTE: put all the train_data of different SNR into one list
else:
    print('the path does not exist!')
    
# Stack all the samples from different SNR together along a new dimension.
# data_concatenate = torch.cat(data_set)
data_stack = torch.stack(data_set)
print('the stacked dataset is ', data_stack.shape)
data_doa_stack = torch.stack(data_doa_set)
print('the stacked doa dataset is ', data_doa_stack.shape)

# # Shuffle the data along the first dimension (the data is concatenated along the first dimension)
# perm_length = torch.randperm(data_stack.shape[0])
# data_stack = data_stack[perm_length]
# data_doa_stack = data_doa_stack[perm_length]

print(f'the shuffled data shape is {data_stack.shape}\n',
      f'{data_stack.shape[0]} is the number of SNR\n',
      f'{data_stack.shape[1]} is the number of CSI samples\n',
      f'{data_stack.shape[2]} is the number of antenna links\n',
      f'{data_stack.shape[3]} is the number of APs\n',
      f'{data_stack.shape[4]} is the number of positions')

print(f'the shuffled data shape is {data_doa_stack.shape}\n',
      f'{data_doa_stack.shape[0]} is the number of SNR\n',
      f'{data_doa_stack.shape[1]} is the number of positions\n')

the stacked dataset is  torch.Size([31, 24, 8, 4, 3871])
the stacked doa dataset is  torch.Size([31, 3871])
the shuffled data shape is torch.Size([31, 24, 8, 4, 3871])
 31 is the number of SNR
 24 is the number of CSI samples
 8 is the number of antenna links
 4 is the number of APs
 3871 is the number of positions
the shuffled data shape is torch.Size([31, 3871])
 31 is the number of SNR
 3871 is the number of positions



In [19]:
# Change the original data dimension [3871, 4, 4, 24] to [79 x 49 x 4 x 4 x 24] 
## NOTE: The following code is for training dataset of multiple SNR case
train_data = rearrange(data_stack, 'a b c d e-> a e (d c b) 1') 
## NOTE: The following code is for training dataset of single SNR case
# input_train_data_reshape = rearrange((input_data_shuffle), 'a b c d-> d (c b a)')
print('train_data',train_data.shape)

# # #NOTE: Scaling: make the mean of each dimension (744 * 9 *4 dimensions) to be 0, and the standard deviation to be 1
# input_train_data_reshape = rearrange((input_data_reshape), 'a b c-> b a c')
# scaler = preprocessing.StandardScaler()
# # scaler = preprocessing.MinMaxScaler() # feature_range=(-1,1)
# train_data_scale = scaler.fit_transform(input_train_data_reshape)
# train_data_scale = rearrange((train_data_scale), 'a b c-> b a c')

# # print(train_data_scale[10])
# # print(np.mean(train_data_scale, axis=0))
# # print(np.std(train_data_scale, axis=0))

train_data torch.Size([31, 3871, 768, 1])


#### Generate Labels for all positions 

In [16]:
labels_ips = torch.arange(0,3871)
# Change the label dimension from 1d to [79 x 49]
# labels_ips = labels_ips.reshape(49, 79).t().to(device)
print('label shape',labels_ips.shape)

label shape torch.Size([3871])


In [10]:
if torch.cuda.is_available():       
    device = torch.device("cuda")
    print(f'There are {torch.cuda.device_count()} GPU(s) available.')
    print('Device name:', torch.cuda.get_device_name(0))

else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

There are 1 GPU(s) available.
Device name: NVIDIA GeForce RTX 2080 Ti


### Create Pytorch Dataloader

#### ZZF interpretation:
This part is the part which worth studying!! How to put data into the dataloader object to `save the memory use` and `boost the training speed`.

In [17]:
from torch.utils.data import (TensorDataset, DataLoader, RandomSampler,
                              SequentialSampler)

def data_loader(train_inputs, val_inputs, train_labels, val_labels,
                batch_size=50):
    """Convert train and validation sets to torch.Tensors and load them to
    DataLoader.
    """

    # Convert data type to torch.Tensor
    train_inputs, val_inputs, train_labels, val_labels =\
    tuple(torch.tensor(data) for data in
          [train_inputs, val_inputs, train_labels, val_labels])

#     # Specify batch_size
#     batch_size = 50

    # Create DataLoader for training data
    train_data = TensorDataset(train_inputs, train_labels)
    train_sampler = RandomSampler(train_data)
    train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)

    # Create DataLoader for validation data
    val_data = TensorDataset(val_inputs, val_labels)
    val_sampler = SequentialSampler(val_data)
    val_dataloader = DataLoader(val_data, sampler=val_sampler, batch_size=batch_size)

    return train_dataloader, val_dataloader

#### HACK!
What does `data_loader` need?
+ `Training dataset` with the dimension being (# samples, # dimensions)
+ `Training data label` with the dimension being (# samples, ind) where ind means the ind of the categories. In this example, the categories are NEGATIVE and POSITIVE, being 0 and 1 respectively.
+ `Test dataset` with the dimension being (# samples, # dimensions)
+ `Test data label` with the dimension being (# samples, ind) where ind means the ind of the categories. In this example, the categories are NEGATIVE and POSITIVE, being 0 and 1 respectively.
+ `Batch size`
What is the result of `data_loader`?
+ The result is an `iterable`, `train_dataloader` and `val_dataloader`
+ Each iteration over `train_dataloader` and `val_dataloader` give you a list with length being the 2. The two elements in the list are `data` and `label` with the type being `torch.tensor`. The shape of the `data` and `label` are (`batch size`, `the maximum length of a sentence (62)`) and (`batch size`, 1). 

In [20]:
from sklearn.model_selection import train_test_split

# Train Test Split
train_inputs, val_inputs, train_labels, val_labels = train_test_split(
    train_data[30], labels_ips, test_size=0.1, random_state=42)

In [24]:
train_inputs.shape
val_inputs.shape

torch.Size([388, 768, 1])

In [None]:
# Load data to PyTorch DataLoader
train_dataloader, val_dataloader = \
data_loader(train_inputs, val_inputs, train_labels, val_labels, batch_size=50)

### CNN Model

![cnn architecture](https://raw.githubusercontent.com/chriskhanhtran/CNN-Sentence-Classification-PyTorch/master/cnn-architecture.JPG)

In [88]:
# Sample configuration:
filter_sizes = [2, 3, 4]
num_filters = [2, 2, 2]

#### NOTE: About the dimensions of the 1dCNN
+ The shape requirement for Conv1d is (`batch_size`, `#channel`, `#sequence length`)
+ The # of channel can be considered as the `feature dimension`!
+ Here, in this case, the dimension of Embedding can be considered as the `feature dimension`.
+ `sequence length` is the length of each sentence.

In [180]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class CNN_NLP(nn.Module):
    """An 1D Convulational Neural Network for Sentence Classification."""
    def __init__(self,
                 pretrained_embedding=None,
                 freeze_embedding=False,
                 vocab_size=None,
                 embed_dim=300,
                 filter_sizes=[3, 4, 5],
                 num_filters=[100, 100, 100],
                 num_classes=2,
                 dropout=0.5):
        """
        The constructor for CNN_NLP class.

        Args:
            pretrained_embedding (torch.Tensor): Pretrained embeddings with
                shape (vocab_size, embed_dim)
            freeze_embedding (bool): Set to False to fine-tune pretraiend
                vectors. Default: False
            vocab_size (int): Need to be specified when not pretrained word
                embeddings are not used.
            embed_dim (int): Dimension of word vectors. Need to be specified
                when pretrained word embeddings are not used. Default: 300
            filter_sizes (List[int]): List of filter sizes. Default: [3, 4, 5]
            num_filters (List[int]): List of number of filters, has the same
                length as `filter_sizes`. Default: [100, 100, 100]
            n_classes (int): Number of classes. Default: 2
            dropout (float): Dropout rate. Default: 0.5
        """

        super(CNN_NLP, self).__init__()
        # Embedding layer
        if pretrained_embedding is not None: # ! this case!
            self.vocab_size, self.embed_dim = pretrained_embedding.shape #! (20280,300)
            self.embedding = nn.Embedding.from_pretrained(pretrained_embedding,
                                                          freeze=freeze_embedding) #! nice feature! Whether to fine-tune the embedding layer
        else:
            self.embed_dim = embed_dim
            self.embedding = nn.Embedding(num_embeddings=vocab_size,
                                          embedding_dim=self.embed_dim,
                                          padding_idx=0,
                                          max_norm=5.0)
        # Conv Network
        self.conv1d_list = nn.ModuleList([
            nn.Conv1d(in_channels=self.embed_dim,
                      out_channels=num_filters[i],
                      kernel_size=filter_sizes[i])
            for i in range(len(filter_sizes))
        ])
        # Fully-connected layer and Dropout
        self.fc = nn.Linear(np.sum(num_filters), num_classes) #! ???
        self.dropout = nn.Dropout(p=dropout)

    def forward(self, input_ids):
        """Perform a forward pass through the network.

        Args:
            input_ids (torch.Tensor): A tensor of token ids with shape
                (batch_size, max_sent_length)

        Returns:
            logits (torch.Tensor): Output logits with shape (batch_size,
                n_classes)
        """

        # Get embeddings from `input_ids`. Output shape: (b, max_len, embed_dim)
        x_embed = self.embedding(input_ids).float() #!: from (50, 62) to (50, 62, 300)

        # Permute `x_embed` to match input shape requirement of `nn.Conv1d`.
        #! The shape requirement for Conv1d is (batch_size, #channel, #sequence length)
        #! The # of channel can be considered as the feature dimension!
        #! Here, in this case, the dimension of Embedding can be considered as the feature dimension.
        #! sequence length is the length of each sentence.
        # Output shape: (b, embed_dim, max_len) #NOTE (50, 300, 62)
        x_reshaped = x_embed.permute(0, 2, 1) #NOTE: This is the as using `einops.rearrange`

        # Apply CNN and ReLU. Output shape: (b, num_filters[i], L_out) #NOTE (50, 100, ?) #zzf: Length is no longer 62. It is equal to `62-kernel_size+1`
        x_conv_list = [F.relu(conv1d(x_reshaped)) for conv1d in self.conv1d_list]
        # print(f'x_conv_list: the length of the output is {len(x_conv_list)}')
        # print(f'the shape of the first convolutional layer output:{x_conv_list[0].shape}') #NOTE: (50,100,60), 60 = 62-3+1, where 3 is the first kernel size
        # print(f'the shape of the second convolutional layer output:{x_conv_list[1].shape}') #NOTE: (50,100,59), 59 = 62-4+1, where 4 is the second kernel size
        # print(f'the shape of the thrid convolutional layer output:{x_conv_list[2].shape}') #NOTE: (50,100,58), 58 = 62-5+1, where 5 is the third kernel size

        # Max pooling. Output shape: (b, num_filters[i], 1)
        #! What max pooling does is choose the maximum value from the third dimension of the input tensor. Then the size of the third dimension becomes 1.
        x_pool_list = [F.max_pool1d(x_conv, kernel_size=x_conv.shape[2])
            for x_conv in x_conv_list]
        # print(f'the shape of the first max pooling layer output:{x_pool_list[0].shape}')  #NOTE: (50,100,1), x_conv.shape[2] is 60 for the first maxpool layer
        # print(f'the shape of the second max pooling layer output:{x_pool_list[1].shape}') #NOTE: (50,100,1), x_conv.shape[2] is 59 for the second maxpool layer
        # print(f'the shape of the third max pooling layer output:{x_pool_list[2].shape}') #NOTE: (50,100,1), x_conv.shape[2] is 58 for the third maxpool layer
        
        # Concatenate x_pool_list to feed the fully connected layer.
        # Output shape: (b, sum(num_filters)) #NOTE: from three (50,100,1) to (50,300)
        #! The following operation gets rid of the third dimesion of the input and concatenate the three inputs together along the second dimension.
        x_fc = torch.cat([x_pool.squeeze(dim=2) for x_pool in x_pool_list],
                         dim=1) 
        # print(f'the size of the concatenated output is {x_fc.shape}') # (50,300)
        
        # Compute logits. Output shape: (b, n_classes)
        logits = self.fc(self.dropout(x_fc))
        # print(f'the size of the logits is {logits.shape}') # (50,2)
        return logits

In [181]:
import torch.optim as optim

def initilize_model(pretrained_embedding=None,
                    freeze_embedding=False,
                    vocab_size=None,
                    embed_dim=300,
                    filter_sizes=[3, 4, 5],
                    num_filters=[100, 100, 100],
                    num_classes=2,
                    dropout=0.5,
                    learning_rate=0.01):
    """Instantiate a CNN model and an optimizer."""

    assert (len(filter_sizes) == len(num_filters)), "filter_sizes and \
    num_filters need to be of the same length."

    # Instantiate CNN model
    cnn_model = CNN_NLP(pretrained_embedding=pretrained_embedding,
                        freeze_embedding=freeze_embedding,
                        vocab_size=vocab_size,
                        embed_dim=embed_dim,
                        filter_sizes=filter_sizes,
                        num_filters=num_filters,
                        num_classes=2,
                        dropout=0.5)
    
    # Send model to `device` (GPU/CPU)
    cnn_model.to(device)

    # Instantiate Adadelta optimizer
    optimizer = optim.Adadelta(cnn_model.parameters(),
                               lr=learning_rate,
                               rho=0.95)
    # optimizer = optim.Adam(cnn_model.parameters(),
    #                         lr=learning_rate)
    # optimizer = optim.SGD(cnn_model.parameters(),
    #                       lr=0.01)

    return cnn_model, optimizer

In [182]:
import random
import time

# Specify loss function
loss_fn = nn.CrossEntropyLoss()

def set_seed(seed_value=42):
    """Set seed for reproducibility."""

    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    torch.cuda.manual_seed_all(seed_value)

def train(model, optimizer, train_dataloader, val_dataloader=None, epochs=10):
    """Train the CNN model."""
    
    # Tracking best validation accuracy
    best_accuracy = 0

    # Start training loop
    print("Start training...\n")
    print(f"{'Epoch':^7} | {'Train Loss':^12} | {'Val Loss':^10} |"
          f"{    'Val Acc':^9} | {'Elapsed':^9}")
    print("-"*60)

    for epoch_i in range(epochs):
        # =======================================
        #               Training
        # =======================================

        # Tracking time and loss
        t0_epoch = time.time()
        total_loss = 0

        # Put the model into the training mode
        model.train()

        for step, batch in enumerate(train_dataloader):
            # Load batch to GPU
            # b_input_ids, b_labels = tuple(t.to(device) for t in batch)
            b_input_ids, b_labels = batch[0].to(device), batch[1].to(device)
            b_labels = b_labels.type(torch.cuda.LongTensor) #! HACK: has been fixed by casting the datatype from torch.tensor to torch.cuda.longTensor
            # print(b_input_ids.shape, b_labels.shape)

            # Zero out any previously calculated gradients
            model.zero_grad()

            # Perform a forward pass. This will return logits.
            logits = model(b_input_ids)
            # Compute loss and accumulate the loss values
            loss = loss_fn(logits, b_labels)
            total_loss += loss.item()

            # Perform a backward pass to calculate gradients
            loss.backward()

            # Update parameters
            optimizer.step()

        # Calculate the average loss over the entire training data
        avg_train_loss = total_loss / len(train_dataloader) #! This is the average loss per batch

        # =======================================
        #               Evaluation
        # =======================================
        if val_dataloader is not None:
            # After the completion of each training epoch, measure the model's
            # performance on our validation set.
            val_loss, val_accuracy = evaluate(model, val_dataloader)

            # Track the best accuracy
            if val_accuracy > best_accuracy:
                best_accuracy = val_accuracy

            # Print performance over the entire training data
            time_elapsed = time.time() - t0_epoch
            print(f"{epoch_i + 1:^7} | {avg_train_loss:^12.6f} |"
                f"{val_loss:^10.6f} | {val_accuracy:^9.2f} | {time_elapsed:^9.2f}")
            
    print("\n")
    print(f"Training complete! Best accuracy: {best_accuracy:.2f}%.")

def evaluate(model, val_dataloader):
    """After the completion of each training epoch, measure the model's
    performance on our validation set.
    """
    # Put the model into the evaluation mode. The dropout layers are disabled
    # during the test time.
    model.eval()

    # Tracking variables
    val_accuracy = []
    val_loss = []

    # For each batch in our validation set...
    for batch in val_dataloader:
        # Load batch to GPU
        # b_input_ids, b_labels = tuple(t.to(device) for t in batch)
        b_input_ids, b_labels = batch[0].to(device), batch[1].to(device)
        
        b_labels = b_labels.type(torch.cuda.LongTensor) #! HACK: has been fixed by casting the datatype from torch.tensor to torch.cuda.longTensor

        # Compute logits
        with torch.no_grad():
            logits = model(b_input_ids)

        # Compute loss
        loss = loss_fn(logits, b_labels)
        val_loss.append(loss.item())

        # Get the predictions
        preds = torch.argmax(logits, dim=1).flatten() #NOTE: `dim=1` means to 

        # Calculate the accuracy rate
        accuracy = (preds == b_labels).cpu().numpy().mean() * 100
        val_accuracy.append(accuracy)

    # Compute the average accuracy and loss over the validation set.
    val_loss = np.mean(val_loss)
    val_accuracy = np.mean(val_accuracy)

    return val_loss, val_accuracy

In [183]:
# CNN-non-static: fastText pretrained word vectors are fine-tuned during training.
set_seed(42)
cnn_non_static, optimizer = initilize_model(pretrained_embedding=embeddings,
                                            freeze_embedding=False, #! Turn on the fine-tuning
                                            learning_rate=0.25,
                                            dropout=0.5)
# optimizer = optim.Adadelta(cnn_non_static.parameters(),
#                             lr=0.01,
#                             rho=0.95)
train(cnn_non_static, optimizer, train_dataloader, val_dataloader, epochs=20)


Start training...

 Epoch  |  Train Loss  |  Val Loss  | Val Acc  |  Elapsed 
------------------------------------------------------------
   1    |   0.587595   | 0.472113  |   77.12   |   1.87   
   2    |   0.450446   | 0.431196  |   79.66   |   1.63   
   3    |   0.384983   | 0.414759  |   81.20   |   1.61   
   4    |   0.326249   | 0.403201  |   81.66   |   1.64   
   5    |   0.274251   | 0.406854  |   81.84   |   1.64   
   6    |   0.215751   | 0.399692  |   82.02   |   1.60   
   7    |   0.170066   | 0.431065  |   80.84   |   1.60   
   8    |   0.133689   | 0.445953  |   80.84   |   1.60   
   9    |   0.103541   | 0.477012  |   81.56   |   1.63   
  10    |   0.076184   | 0.461447  |   81.66   |   1.61   
  11    |   0.062485   | 0.470175  |   82.39   |   1.62   
  12    |   0.048177   | 0.488282  |   82.47   |   1.82   
  13    |   0.037161   | 0.507841  |   82.93   |   1.83   
  14    |   0.031491   | 0.514615  |   83.11   |   1.81   
  15    |   0.024513   | 0.534048  