# Import packages and get authenticated

In [None]:
import numpy as np
import pprint

import pandas as pd
pd.set_option('display.max_columns', 500)
from tqdm import tqdm_notebook as tqdm
from IPython.display import display
import os
import sys
sys.path.append('/content/drive/My Drive/中研院/repo/')
# sys.path.append('~/project_FDDAT/repo/')
sys.path.append('../') # add this line so Data and data are visible in this file
from os.path import expanduser
home = expanduser("~")

from falldetect.utilities import *
from falldetect.models import *
from falldetect.dataset_util import *
from falldetect.training_util import *
from falldetect.eval_util import *

import time
import datetime
from datetime import datetime
import json
import argparse

# Plotting
# checklist 1: comment inline, uncomment Agg
%matplotlib inline
import matplotlib
# matplotlib.use('Agg')
import matplotlib.pyplot as plt
matplotlib.rc( 'savefig', facecolor = 'white' )

from sklearn.decomposition import PCA
from sklearn.metrics import mean_squared_error

import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F

# Get user inputs
In ipython notebook, these are hardcoded. In production python code, use parsers to provide these inputs

In [None]:
parser = argparse.ArgumentParser(description='FD_DAT')
parser.add_argument('--input_folder', metavar='input_folder', help='input_folder',
                    default='../')
parser.add_argument('--output_folder', metavar='output_folder', help='output_folder',
                    default='../')
parser.add_argument('--extractor_type', metavar='extractor_type', help='extractor_type',
                    default='CNN')
parser.add_argument('--num_epochs', type=int, metavar='num_epochs', help='number of epochs',
                    default='5')
parser.add_argument('--CV_n', type=int, metavar='CV_n', help='CV folds',
                    default='2')
parser.add_argument('--rep_n', type=int, metavar='rep_n', help='number of repitition',
                    default='5')
parser.add_argument('--cuda_i', type=int, metavar='cuda_i', help='cuda index',
                    default='1')
parser.add_argument('--tasks_list', metavar='tasks_list', help='a list of all tasks',
                    default='UMAFall_waist_UPFall_belt UPFall_wrist_UMAFall_ankle')
parser.add_argument('--show_diagnosis_plt', metavar='show_diagnosis_plt', help='show diagnosis plt or not',
                    default='False')




# split_mode = 'LOO'
# split_mode = '5fold'

# checklist 2: comment first line, uncomment second line seizures_FN
args = parser.parse_args(['--input_folder', '../../data_mic/stage1/preprocessed_18hz_5fold', 
                          '--output_folder', '../../data_mic/stage2/test',
# args = parser.parse_args(['--input_folder', '../../data_mic/stage1/preprocessed_WithoutNormal_18hz_5fold', 
#                           '--output_folder', '../../data_mic/stage2/modeloutput_WithoutNormal_18hz_5fold',
                          '--extractor_type', 'CNN',
                          '--num_epochs', '5',
#                           '--CV_n', '2',
#                           '--rep_n', '2',
#                           '--cuda_i', '2',
                          '--show_diagnosis_plt', 'True',
                          '--tasks_list', 'UPFall_wrist-UMAFall_ankle',])
#                           '--tasks_list', 'UMAFall_waist-UMAFall_wrist UPFall_wrist-UMAFall_ankle',])
                          
# args = parser.parse_args()

In [None]:
home_dir = home+'/project_FDDAT/'
input_folder = args.input_folder
output_folder = args.output_folder
extractor_type = args.extractor_type
num_epochs = args.num_epochs
CV_n = args.CV_n
rep_n = args.rep_n
show_diagnosis_plt = bool(args.show_diagnosis_plt)

with open('../../repo/falldetect/params.json') as json_file:
    falldetect_params = json.load(json_file)
cuda_i = falldetect_params['cuda_i']

tasks_list = []
for item in args.tasks_list.split(' '):
    tasks_list.append((item.split('-')[0], item.split('-')[1]))
    
inputdir = input_folder+'/'
outputdir = output_folder+'/'
if not os.path.exists(outputdir):
    os.makedirs(outputdir)
    
test_mode = 'test' in outputdir.split('/')[-2]

device = torch.device('cuda:{}'.format(int(cuda_i)) if torch.cuda.is_available() else 'cpu')


In [None]:
training_params = {
    'HP_name': 'hp',
    'classes_n': 2,
    'CV_n': CV_n,
    'num_epochs': num_epochs,
    'channel_n': 4,
    'batch_size': 4,
    'learning_rate': 0.001,
    'extractor_type': extractor_type,
    'device': device,
    'dropout': 0.5,
    'hiddenDim_f': 3,
    'hiddenDim_y': 3,
    'hiddenDim_d': 3,
    'win_size': 18,
    'win_stride': 6,
    'step_n': 9,
    'show_diagnosis_plt': show_diagnosis_plt,
  }

In [None]:
i_CV = 1

src_names = ['UPFall_neck','UPFall_wrist','UPFall_belt','UPFall_rightpocket','UPFall_ankle',
             'UMAFall_chest','UMAFall_wrist','UMAFall_waist','UMAFall_leg','UMAFall_ankle',
             'SFDLA_chest','SFDLA_wrist','SFDLA_waist','SFDLA_thigh','SFDLA_ankle',
             'FARSEEING_lowback', 'FARSEEING_thigh']
# src_names = ['FARSEEING_lowback', 'FARSEEING_thigh']

In [None]:
def plot_epoch(train_loader, val_loader, model, src_name, outputdir):
    model.eval()

    data = src_train_loader.dataset.data.to(device)
    labels = src_train_loader.dataset.labels.to(device).long()
    feature_out, class_out, _ = model(data)
    out_sigmoid = torch.sigmoid(class_out).data.detach().cpu().numpy()

    model_pred = np.argmax(out_sigmoid, 1)
    labels_np = labels.data.detach().cpu().numpy()
    TP = ((model_pred==1) & (labels_np==1)).sum()
    FN = ((model_pred==0) & (labels_np==1)).sum()
    train_sensitivity = TP/(TP+FN)
    
    fig = plt.figure(figsize=(30, 5), dpi=120)

    ax1 = fig.add_subplot(1, 2, 1)
    ax1.plot(out_sigmoid[:,1],'.b', label='src_class_sigmoid', markersize=3)
    ax1.plot(out_sigmoid[:,1].round(),'b', alpha=0.5, label='src_class_decision')
    ax1.plot(labels.data.detach().cpu().numpy(),'r', alpha=0.5, label='src_class_labels')
    ax1.axhline(0.5, color='k', label='threshold')
    ax1.legend(loc='upper right')
    ax1.set_title('train', fontsize=20)

    data = src_val_loader.dataset.data.to(device)
    labels = src_val_loader.dataset.labels.to(device).long()
    feature_out, class_out, _ = model(data)
    out_sigmoid = torch.sigmoid(class_out).data.detach().cpu().numpy()
    
    model_pred = np.argmax(out_sigmoid, 1)
    labels_np = labels.data.detach().cpu().numpy()
    TP = ((model_pred==1) & (labels_np==1)).sum()
    FN = ((model_pred==0) & (labels_np==1)).sum()
    val_sensitivity = TP/(TP+FN)

    ax2 = fig.add_subplot(1, 2, 2)

    ax2.plot(out_sigmoid[:,1],'.b', label='tgt_class_sigmoid', markersize=3)
    ax2.plot(out_sigmoid[:,1].round(),'b', alpha=0.5, label='tgt_class_decision')
    ax2.plot(labels.data.detach().cpu().numpy(),'r', alpha=0.5, label='tgt_class_labels')
    ax2.axhline(0.5, color='k', label='threshold')
    ax2.legend(loc='upper right')
    ax2.set_title('val', fontsize=20)
    
    fig.suptitle('src_name: {} sensitivity ({:.3f}, {:.3f})'.format(src_name, train_sensitivity, val_sensitivity), fontsize=16)
    fig.savefig(outputdir+'src_name.png')


In [None]:
# # ---> 87     train_data = np.concatenate(src_feature_out.data.detach().cpu().numpy(),src_feature_out.data.detach().cpu().numpy())
# aaa = src_feature_out.data.detach().cpu().numpy()
# np.concatenate((aaa,aaa),axis=0).shape


# np.concatenate((src_domain_labels,src_domain_labels)).shape


In [None]:
bad_src_names_list = []
results_train = {}
results_val = {}
    
# for src_name in src_names:
for task_item in tasks_list:
    (src_name, tgt_name) = task_item
    
    print('\n\nsrc_name: ', src_name)
    print('tgt_name: ', tgt_name)

    # TODO: don't need to extract training_params
    classes_n = training_params['classes_n']
    CV_n = training_params['CV_n']
    num_epochs = training_params['num_epochs']
    channel_n = training_params['channel_n']
    batch_size = training_params['batch_size']
    learning_rate = training_params['learning_rate']
    extractor_type = training_params['extractor_type']
    device = training_params['device']
    show_diagnosis_plt = training_params['show_diagnosis_plt']

#     src_dataset_name = src_name.split('_')[0]
#     src_sensor_loc = src_name.split('_')[1]

#     src_inputdir = inputdir + '{}/{}/'.format(src_dataset_name, src_sensor_loc)
    
    src_dataset_name = src_name.split('_')[0]
    src_sensor_loc = src_name.split('_')[1]

    tgt_dataset_name = tgt_name.split('_')[0]
    tgt_sensor_loc = tgt_name.split('_')[1]

    src_inputdir = inputdir + '{}/{}/'.format(src_dataset_name, src_sensor_loc)
    tgt_inputdir = inputdir + '{}/{}/'.format(tgt_dataset_name, tgt_sensor_loc)

    print('------------------------------Working on i_CV {}------------------------------'.format(i_CV))
    # 1. prepare dataset
    src_train_loader, src_val_loader = get_data_loader(src_inputdir, i_CV, batch_size, learning_rate)
    tgt_train_loader, tgt_val_loader = get_data_loader(tgt_inputdir, i_CV, batch_size, learning_rate)

#     tgt_train_loader, tgt_val_loader = get_data_loader(tgt_inputdir, i_CV, batch_size, learning_rate)

    # the model expect the same input dimension for src and tgt data
    src_train_size = src_train_loader.dataset.data.data.detach().cpu().numpy().shape[0]
    src_val_size = src_val_loader.dataset.data.data.detach().cpu().numpy().shape[0]
    src_input_dim = src_train_loader.dataset.data.data.detach().cpu().numpy().shape[2]

    # 2. prepare model

    total_step = len(src_train_loader)

    train_performance_dict_list = list( {} for i in range(num_epochs) )
    val_src_performance_dict_list = list( {} for i in range(num_epochs) )

    if extractor_type == 'CNN':
        model = DannModel(device, class_N=classes_n, domain_N=2, channel_n=channel_n, input_dim=src_input_dim).to(device).float()
#         model = DannModel(device, class_N=classes_n, domain_N=2, channel_n=channel_n, input_dim=src_input_dim).to(device).float()
    elif extractor_type == 'CNNLSTM':
        dropout = training_params['dropout']
        hiddenDim_f = training_params['hiddenDim_f']
        hiddenDim_y = training_params['hiddenDim_y']
        hiddenDim_d = training_params['hiddenDim_d']
        win_size = training_params['win_size']
        win_stride = training_params['win_stride']
        step_n = training_params['step_n']
        model = CnnLstm(device, class_N=classes_n, channel_n=channel_n, dropout=dropout, hiddenDim_f=hiddenDim_f, hiddenDim_y=hiddenDim_y, hiddenDim_d=hiddenDim_d, win_size=win_size, win_stride=win_stride, step_n=step_n).to(device)

    model_name = model.__class__.__name__
    # loss and optimizer
    class_criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=0.01)

#     plot_epoch(src_train_loader, src_val_loader, model)
    # 3. fit the model
    for epoch in range(num_epochs):
        train_performance_dict = train_epoch(src_train_loader, device, model, class_criterion, optimizer, epoch)
        
        train_performance_dict = val_epoch(src_train_loader, device, model, class_criterion, optimizer, epoch, 'src')
        train_performance_dict_list[epoch] = train_performance_dict

        val_src_performance_dict = val_epoch(src_val_loader, device, model, class_criterion, optimizer, epoch, 'src')
        val_src_performance_dict_list[epoch] = val_src_performance_dict
        
    torch.cuda.empty_cache()
    
    
    def get_PAD(src_train_loader, tgt_train_loader, src_val_loader, tgt_val_loader, model, c=3000):
#         start_time = time.time()

        model.eval()

        data = src_train_loader.dataset.data.to(device)
        src_domain_labels = np.zeros(data.shape[0])
        src_feature_out, _, _ = model(data)

        data = tgt_train_loader.dataset.data.to(device)
        tgt_domain_labels = np.ones(data.shape[0])
        tgt_feature_out, _, _ = model(data)

        train_data = np.concatenate((src_feature_out.data.detach().cpu().numpy(),tgt_feature_out.data.detach().cpu().numpy()),axis=0)
        train_label = np.concatenate((src_domain_labels,tgt_domain_labels))

        print(train_data.shape, train_label.shape)

        svm_model = svm.SVC(C=c, probability=True, verbose=2)
        svm_model.fit(train_data, train_label)

        data = src_val_loader.dataset.data.to(device)
        src_domain_labels = np.zeros(data.shape[0])
        src_feature_out, _, _ = model(data)

        data = tgt_val_loader.dataset.data.to(device)
        tgt_domain_labels = np.ones(data.shape[0])
        tgt_feature_out, _, _ = model(data)

        val_data = np.concatenate((src_feature_out.data.detach().cpu().numpy(),tgt_feature_out.data.detach().cpu().numpy()),axis=0)
        val_label = np.concatenate((src_domain_labels,tgt_domain_labels))

        svm_out = svm_model.predict_proba(val_data)
        mse = mean_squared_error(val_label, svm_out[:,1])
        PAD = 2. * (1. - 2. * mse)
        print('\nmse=', mse)
        print('PAD=', PAD)

#         time_elapsed = time.time() - start_time
#         print('time elapsed:', time.strftime("%H:%M:%S", time.gmtime(time_elapsed)))

#         sys.exit()
        
        return PAD
    
    val_PAD = get_PAD(src_train_loader, tgt_train_loader, src_val_loader, tgt_val_loader, model, c=3000)


    
    

    plot_epoch(src_train_loader, src_val_loader, model, src_name, outputdir)
    

    print(src_name, train_performance_dict_list[epoch]['src_sensitivity'])
    results_train[src_name] =  train_performance_dict_list[epoch]['src_sensitivity']
    results_val[src_name] =  val_src_performance_dict_list[epoch]['src_sensitivity']
    
    if train_performance_dict_list[epoch]['src_sensitivity'] < 0.7:
        print('{} is bad src'.format(src_name))
        bad_src_names_list.append(src_name)
        
#     sys.exit()

        
print(bad_src_names_list)
print('train results')
pp = pprint.PrettyPrinter(indent=4)
pp.pprint(results_train)
print('val results')
pp = pprint.PrettyPrinter(indent=4)
pp.pprint(results_val)

In [None]:
plt.plot(svm_out[:,1])
plt.plot(val_label)

# functions developed