# Project

In [1]:
# enable auto reload
%load_ext autoreload
%autoreload 2

In [2]:
import matplotlib.pyplot as plt
import numpy as np
import os
from IPython.display import Image

## 1. Load Preprocessed Data

In [3]:
RAW_PATH = './data'            # path to raw data
NP_PATH = './preprocessed'     # path to preprocessed data
FRAME_LEN = 1024               # length of one data frame
FRAME_INTV = 512               # interval between data frames

if not os.path.exists(NP_PATH):
    os.mkdir(NP_PATH)

In [4]:
from src.utils.data_utils import get_preprocessed_data
### for more information, please look up <get_preprocessed_data> in
### './src/utils/data_utils.py'

DATA, train_idx, test_idx, trainY, testY, train_diameter, test_diameter, train_rpm, test_rpm = get_preprocessed_data(RAW_PATH, NP_PATH, FRAME_LEN, FRAME_INTV)

## 2. Semisupervised Learning!!!!

In [5]:
import torch
import torch.nn as nn
import torch.optim as optim

In [6]:
SEED=0
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic=True
torch.backends.cudnn.benchmark=True

you can modify the source

In [7]:
SAVE_PATH = './saved_models'
if not os.path.exists(SAVE_PATH):
    os.mkdir(SAVE_PATH)

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(DEVICE)

cuda


In [8]:
MASK_P = 0.005
SCREEN_DIAM = 2

MASK_PARAMS = {
    'mask_p' : MASK_P,
    'screen_diam' : SCREEN_DIAM,
    'trainY' : trainY,
    'train_diameter' : train_diameter
}

In [12]:
from src.utils.data_utils import get_shuffle_n_mask

train_shuffle, val_shuffle, MASK = get_shuffle_n_mask(train_idx.shape[0], mask_params=MASK_PARAMS)

# of labeled data : 69
# of unlabeled data : 13897
percentage : 0.49%


		0	1	2	3	4	5	| total
0"	|	14	-	-	-	-	-	| 14
0.007"	|	-	5	4	4	6	3	| 22
0.14"	|	-	0	0	0	-	-	| 0
0.021"	|	-	2	5	5	6	5	| 23
0.028"	|	-	6	4	-	-	-	| 10
------------------------------------------------------------------------
total	|	14	13	13	9	12	8	| 69


# Your code from here

In [13]:
### YOU CAN IMPORT ANY INTERNAL/EXTERNAL LIBRARIES HERE

In [14]:
### YOU CAN MODIFY FOLLOWING CONFIGURATIONS
LR = 1e-3        # learning rate of the optimizer
BATCH = 128      # batch size
EPOCH = 500      # maximum epochs to run
TOL = 10         # if best valid loss does not get updated for TOL epochs, early stop training
SAV_INTV = 10    # save model weights in every SAV_INTV epoch

In [33]:
MODEL_NAME = 'ProjectModel1'

class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        
        ### YOUR CODE FROM HERE
        
        ### TO HERE
        
        self.apply(self.init_weights)
        
    def init_weights(self, m):
        if type(m)==nn.Linear or type(m)==nn.Conv1d:
            nn.init.xavier_uniform_(m.weight)
            if m.bias is not None:
                nn.init.zeros_(m.bias)
                
    def forward(self, x, get_latent=False):
        ### YOUR CODE FROM HERE
        
        return NotImplementedError
        ### TO HERE
    ### YOU CAN ADD ANY FUNCTIONS/INSTANCES INSIDE THE CLASS

In [34]:
def batch_step(model, X, Y, M=None, train=False, optimizer=None):
    if M is None:
        M = torch.zeros_like(Y) == 0
        M = M.to(device=DEVICE, dtype=torch.bool)
        
    ### YOUR CODE FROM HERE
    
    ### TO HERE
    
    return class_loss, correct, wrong

In [35]:
### YOU CAN ALSO MODIFY the function <epoch_step> in 
### './src/utils/data_utils.py', but be careful!!

In [36]:
#for debugging
from src.models.SimpleModel import SimpleModel as MyModel
from src.experiment import batch_step

In [37]:
model = MyModel()
model.cuda()

### IF YOU WANT, YOU ALSO CAN MODIFY THE OPTIMIZER
optimizer = optim.Adam(model.parameters(), lr=LR)

In [38]:
np.save('%s/%s_mask.npy'%(SAVE_PATH, MODEL_NAME), MASK)

In [39]:
TRAIN_PARAMS = {
    'model' : model,
    'optimizer' : optimizer,
    'train_idx' : train_idx,
    'trainY' : trainY,
    'data' : DATA,
    'model_name' : MODEL_NAME,
    'train_shuffle' : train_shuffle,
    'val_shuffle' : val_shuffle,
    'mask' : MASK,
    'sav_intv' : SAV_INTV,
    'tol' : TOL,
    'sav_path' : SAVE_PATH,
    'epoch' : EPOCH,
    'batch' : BATCH,
    'frame_len' : FRAME_LEN,
    'batch_step' : batch_step
}

In [40]:
from src.experiment import train
train(**TRAIN_PARAMS)

(1.33s)[Epoch 1]
	(train) loss : 0.94593,	acc : 0.12594
	(eval) loss : 0.97209,	acc : 0.17717
(1.30s)[Epoch 2]
	(train) loss : 0.93286,	acc : 0.20113
	(eval) loss : 0.97239,	acc : 0.21475
(1.30s)[Epoch 3]
	(train) loss : 0.91397,	acc : 0.21742
	(eval) loss : 0.95572,	acc : 0.21510
(1.29s)[Epoch 4]
	(train) loss : 0.87620,	acc : 0.23335
	(eval) loss : 0.97832,	acc : 0.21618
(1.32s)[Epoch 5]
	(train) loss : 0.84590,	acc : 0.32680
	(eval) loss : 0.97209,	acc : 0.32677
(1.32s)[Epoch 6]
	(train) loss : 0.81995,	acc : 0.39429
	(eval) loss : 0.98622,	acc : 0.41052
(1.41s)[Epoch 7]
	(train) loss : 0.79167,	acc : 0.44191
	(eval) loss : 1.01991,	acc : 0.40909
(1.35s)[Epoch 8]
	(train) loss : 0.74700,	acc : 0.41971
	(eval) loss : 0.88853,	acc : 0.46886
(1.32s)[Epoch 9]
	(train) loss : 0.65493,	acc : 0.44057
	(eval) loss : 0.78501,	acc : 0.48533
(1.40s)[Epoch 10]
	(train) loss : 0.55550,	acc : 0.43269
	(eval) loss : 0.79520,	acc : 0.43414
(1.41s)[Epoch 11]
	(train) loss : 0.65074,	acc : 0.39859
	(

In [41]:
TEST_PARAMS = {
    'model' : model,
    'data' : DATA,
    'model_name' : MODEL_NAME,
    'load_version' : 'best',
    'sav_path' : SAVE_PATH,
    'batch' : BATCH,
    'frame_len' : FRAME_LEN,
    'batch_step' : batch_step
}

In [46]:
from src.experiment import test
test(**TEST_PARAMS, test_idx=test_idx, testY=testY)

Test Result of model <ProjectModel1>:best
  [Loss]	1.40342
  [Accuracy]	51.17%


In [47]:
from src.experiment import score_table
score_table(diameter = test_diameter, test_idx=test_idx, testY=testY, **TEST_PARAMS)



		0	1	2	3	4	5	| total
0"	|	1.0	-	-	-	-	-	| 1.0
0.007"	|	-	0.596	0.895	0.546	0.036	0.0	| 0.415
0.14"	|	-	0.130	0.656	0.0	-	-	| 0.262
0.021"	|	-	0.588	0.927	0.593	0.0	0.0	| 0.421
0.028"	|	-	0.837	0.174	-	-	-	| 0.507
------------------------------------------------------------------------
total	|	1.0	0.537	0.665	0.380	0.018	0.0	| 0.511


In [49]:
print('[EVALUATE TRAIN DATA]')
test(**TEST_PARAMS, test_idx=train_idx, testY=trainY)
print('\n[EVALUATE LABELED DATA]')
test(**TEST_PARAMS, test_idx=train_idx[MASK], testY=trainY[MASK])
print('\n[EVALUATE UNLABELED DATA]')
test(**TEST_PARAMS, test_idx=train_idx[~MASK], testY=trainY[~MASK])

[EVALUATE TRAIN DATA]
Test Result of model <ProjectModel1>:best
  [Loss]	1.39447
  [Accuracy]	51.16%

[EVALUATE LABELED DATA]
Test Result of model <ProjectModel1>:best
  [Loss]	0.87852
  [Accuracy]	71.01%

[EVALUATE UNLABELED DATA]
Test Result of model <ProjectModel1>:best
  [Loss]	1.39704
  [Accuracy]	51.06%


In [50]:
from src.utils.plot_utils import *
from src.experiment import get_latents
from sklearn.manifold import TSNE


In [51]:
z, preds = get_latents(**TEST_PARAMS, test_idx=test_idx, testY=testY)
z = z.cpu().numpy()
preds = preds.cpu().numpy()
print(z.shape, preds.shape)

(3539, 32) (3539,)


In [52]:
tsne = TSNE(n_components=2)
z_tsne = tsne.fit_transform(z)
print(z_tsne.shape)

(3539, 2)


In [53]:
%matplotlib notebook
draw_magic(z_tsne, [preds, testY, test_diameter, test_rpm], DATA, test_idx, frame_len=FRAME_LEN, frame_intv=FRAME_INTV)

<IPython.core.display.Javascript object>