In [47]:
#import module
import torch
import torch.nn as nn
import numpy as np
import copy
import time
import os
import pandas as pd
import matplotlib.pyplot as plt
import glob

import logging
from datetime import timedelta,datetime
from pathlib import Path
from pymongo import MongoClient,ASCENDING,DESCENDING
from torch.utils.data import Dataset, DataLoader

#connect to MongoDB
client = MongoClient('129.254.196.65', 27017, username = 'viewer', password = 'mongo2020!', authSource = 'lifelogging')
db = client["lifelogging"]
col_data = db['data']
col_label = db['label']
col_researcher = db['researcher']
col_pred=db['pred']
#constant 
WINDOW_SIZE = 80 # 32Hz * 2.5sec
SAVE_DIR='../data_mongodb/2020_e4/models/'

#set log file
logging.basicConfig(filename=SAVE_DIR+'result.log', level=logging.INFO)


In [5]:
class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
            nn.Conv2d(32, 32, kernel_size=2, stride=1, padding=1),
            nn.MaxPool2d(kernel_size=3, stride=2))

        self.layer2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.Conv2d(64, 64, kernel_size=2, stride=1, padding=1),
            nn.MaxPool2d(kernel_size=2, stride=2))

        self.fc1 = nn.Linear(1024, 1000)
        self.fc2 = nn.Linear(1000, 4)

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.reshape(out.size(0), -1)
        out = self.fc1(out)
        out = self.fc2(out)
        return out

In [6]:
#get researcher_ids
researcher_cursor=col_researcher.find({})
researcher_ids=[]
for researcher_data in researcher_cursor:
    if str(researcher_data['researcherID']).startswith('user'):
        researcher_ids.append(researcher_data['researcherID'])
print(researcher_ids)

['user24', 'user23', 'user05', 'user04', 'user06', 'user12', 'user08', 'user07', 'user02', 'user10', 'user03', 'user11', 'user28', 'user21', 'user26', 'user01', 'user22', 'user29', 'user25', 'user09', 'user30', 'user008', 'user27', 'user006']


In [40]:
#결측구간을 확인
def check_missing_sessionss(df):
    #데이터가 없다면
    if(len(df)==0):
        return []
    
    mask = df.index.to_series().diff()
    section_start_points=mask[mask>pd.Timedelta("00:00:00.50")].index
    
    group_df=[]
    
    #만약 결측구간이 없다면
    if(len(section_start_points)==0):
        return [df]
    #결측 구간이 있다면
    else:
        #처음 구간
        group_df.append(df[:section_start_points[0]][:-1])
        
        #중간 구간
        for i in range(1,len(section_start_points)):
            group_df.append(df[section_start_points[i-1]:section_start_points[i]][:-1])
        
        #마지막 구간
        group_df.append(df[section_start_points[len(section_start_points)-1]:])
                
    
    return group_df
    
#측정 시작일과 종료일을 구하고, 간격을 하루로 나눔.
def get_date_arr(researcher_id):
    if(researcher_id!=101):start_date = datetime.strptime(col_data.find_one({"researcherID":researcher_id},sort=[('timeStamp', ASCENDING)])['timeStamp'][:10],"%Y-%m-%d")+timedelta(hours=5)
    else:start_date=datetime.strptime("2020-08-30","%Y-%m-%d")+timedelta(hours=5)
    end_date = datetime.strptime(col_data.find_one({"researcherID":researcher_id},sort=[('timeStamp', DESCENDING)])['timeStamp'][:10],"%Y-%m-%d")+timedelta(hours=5)
    date_arr=[]
    for i in range(((end_date-start_date).days+1)):
        date_arr.append(((start_date+timedelta(days=i)).strftime("%Y-%m-%d %H:00:00"),(start_date+timedelta(days=i+1)).strftime("%Y-%m-%d %H:00:00")))
    return date_arr

#센서데이터를 저장 (sensor data window, timestamp)
def sensor_label_from_db(researcher_id):

    #측정 시작일과 종료일을 구하고, 간격을 하루로 나눔.
    def get_date_arr(researcher_id):
        if(researcher_id!=101):
            start_date = datetime.strptime(col_data.find_one({"researcherID":researcher_id},sort=[('timeStamp', ASCENDING)])['timeStamp'][:10],"%Y-%m-%d")+timedelta(hours=5)
            print(start_date)
        else:
            start_date=datetime.strptime("2020-08-30","%Y-%m-%d")+timedelta(hours=5)
            print(start_date)
        end_date = datetime.strptime(col_data.find_one({"researcherID":researcher_id},sort=[('timeStamp', DESCENDING)])['timeStamp'][:10],"%Y-%m-%d")+timedelta(hours=5)
        print(end_date)
        date_arr=[]
        for i in range(((end_date-start_date).days+1)):
            date_arr.append(((start_date+timedelta(days=i)).strftime("%Y-%m-%d %H:00:00"),(start_date+timedelta(days=i+1)).strftime("%Y-%m-%d %H:00:00")))
        print(date_arr)
        return date_arr
    
    
    #결측구간을 확인
    def check_missing_session(df):
        #데이터가 없다면
        if(len(df)==0):
            return []

        mask = df.index.to_series().diff()
        section_start_points=mask[mask>pd.Timedelta("00:00:00.050")].index

        group_df=[]

        #만약 결측구간이 없다면
        if(len(section_start_points)==0):
            return [df]
        #결측 구간이 있다면
        else:
            #처음 구간
            group_df.append(df[:section_start_points[0]][:-1])

            #중간 구간
            for i in range(1,len(section_start_points)):
                group_df.append(df[section_start_points[i-1]:section_start_points[i]][:-1])

            #마지막 구간
            group_df.append(df[section_start_points[len(section_start_points)-1]:])
        return group_df
    
    since=time.time()
    stack=[]
    matched_time=[]
    cnt=0
    
    date_arr=get_date_arr(researcher_id)
    for date_val in date_arr:
        
        #하루마다 e4 데이터 수집
        e4_data_dict={}
        e4_cursor = col_data.find({"researcherID":researcher_id,"timeStamp":{'$gte':date_val[0],'$lte':date_val[1]},"type":"e4Acc"})
        for e4_data in e4_cursor:
            e4_data_dict[str(e4_data['timeStamp'])] = {'e4_x':float(e4_data['data']['x']),'e4_y':float(e4_data['data']['y']),'e4_z':float(e4_data['data']['z'])}
        e4_df = pd.DataFrame.from_dict(e4_data_dict, "index")
        e4_df.index = pd.to_datetime(e4_df.index)
        
        #결측값 구간을 제외한 dataframe 배열
        checked_e4_df_arr=check_missing_session(e4_df)
        
        """
        #기존 dataframe과 결측값 구간 제외한 dataframe 배열 출력(테스트)
        print(e4_df)
        print(checked_e4_df_arr)
        """
        
        #sliding window 생성
        for e4_df_checked in checked_e4_df_arr:
            matrix=[]
            matrix.append(np.array(e4_df_checked))
            window=[]
            for j in range(0, len(e4_df_checked) - WINDOW_SIZE + 1, WINDOW_SIZE): 
                cnt+=1
                window = matrix[0][j:j + WINDOW_SIZE]
                stack.append(np.array(window))
                matched_time.append(e4_df_checked.index[j].strftime("%Y-%m-%d %H:%M:%S.%f"))
            if(cnt//4000>0):
                print("stack_size",np.shape(stack),date_val[0],date_val[1])
                cnt=0
    print("getting sensor_data is completed, num_of_windows",np.shape(stack))
    time_elapsed=time.time()-since
    print('getting sensor_data time: {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    return np.array(stack),matched_time



In [41]:
class SensorTrainDataset(Dataset):
    """ Sensor dataset for training."""
    # Initialize data (pre-processing)
    def __init__(self,cv_train_dataset,matched_time):
        self.len = cv_train_dataset.shape[0]
        self.x_data = torch.from_numpy(cv_train_dataset).float()
        self.time_data = matched_time
    def __getitem__(self, index):
        return self.x_data[index], self.time_data[index]

    def __len__(self):
        return self.len
    
def get_researcher_dataloader(researcher_id):
    (stack,matched_time)=sensor_label_from_db(researcher_id)
    np_arr=[]
    since = time.time()
    for idx in range(len(stack)):
        np_arr.append(stack[idx][:-5].transpose().reshape(1,15,15))
    cv_train_dataset=np.array(np_arr)
    dset = SensorTrainDataset(cv_train_dataset,matched_time)
    train_loader = DataLoader(dset, batch_size=128, shuffle=False)
    
    time_elapsed=time.time()-since
    print('data preprocessing time: {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    return train_loader

In [42]:
def data_prediction(researcher_id,model, criterion, dataloader):
    since = time.time()
    model.eval()
    results_arr=[]
    cnt=0
    # Iterate over data.
    for inputs, time_data in dataloader:
        cnt+=1
        inputs = inputs.to(device)
        
        #inputs, labels = inputs.cuda(), labels.cuda()
        model = model.to(device) # cuda
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        preds = preds.cpu() # to cpu
        temp={
                "researcherID":[researcher_id]*inputs.size(0),
                "timeStamp":list(time_data),
                "prediction":np.array(preds),
                "type":["e4acc"]*inputs.size(0), # modified cse
        }
        temp_df=pd.DataFrame(temp)
        results_arr.extend(temp_df.to_dict(orient='records'))
        if(cnt//500>0):
            print("prediction,,,,",np.shape(results_arr))
            cnt=0
    time_elapsed=time.time()-since
    print("predicted {0} windows".format(len(dataloader.dataset)))
    print('prediction complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    return results_arr

In [48]:
#set model_saved_path
model_saved_path="E:\\data_mongodb\\2020_e4\\models\\"
model_files=glob.glob(model_saved_path+"*.pt")

#set cuda device
print(torch.cuda.is_available())
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

#set path to save file
result_save_path="E:\\data_mongodb\\2020_e4\\preds\\"

#set criterion
criterion = nn.CrossEntropyLoss()

for model_file in model_files:
    print(model_file)
    researcher_id=model_file.split('\\')[-1].split('_')[0]
    print(researcher_id,"started")    
    if (researcher_id == "user006" or researcher_id == "user008" or researcher_id == "user01" or researcher_id == "user02" or researcher_id == "user03" or researcher_id == "user04"):
        print("skipping...")
    else:
        model_pred=ConvNet()
        if(device =="cuda:0") : 
            model_pred=model_pred.to(device)
            model_pred.load_state_dict(torch.load(model_file))
        else:
            model_pred.load_state_dict(torch.load(model_file,map_location=torch.device('cpu')))

        data_loader = get_researcher_dataloader(researcher_id)
        results_arr=data_prediction(researcher_id,model_pred, criterion, data_loader)
        #col_pred.insert(results_arr) #read only
    print(researcher_id,"ended")


True
cuda:0
E:\data_mongodb\2020_e4\models\user006_model.pt
user006 started
skipping...
user006 ended
E:\data_mongodb\2020_e4\models\user008_model.pt
user008 started
skipping...
user008 ended
E:\data_mongodb\2020_e4\models\user01_model.pt
user01 started
skipping...
user01 ended
E:\data_mongodb\2020_e4\models\user02_model.pt
user02 started
skipping...
user02 ended
E:\data_mongodb\2020_e4\models\user03_model.pt
user03 started
skipping...
user03 ended
E:\data_mongodb\2020_e4\models\user04_model.pt
user04 started
skipping...
user04 ended
E:\data_mongodb\2020_e4\models\user05_model.pt
user05 started
2020-08-29 05:00:00
2020-09-27 05:00:00
[('2020-08-29 05:00:00', '2020-08-30 05:00:00'), ('2020-08-30 05:00:00', '2020-08-31 05:00:00'), ('2020-08-31 05:00:00', '2020-09-01 05:00:00'), ('2020-09-01 05:00:00', '2020-09-02 05:00:00'), ('2020-09-02 05:00:00', '2020-09-03 05:00:00'), ('2020-09-03 05:00:00', '2020-09-04 05:00:00'), ('2020-09-04 05:00:00', '2020-09-05 05:00:00'), ('2020-09-05 05:00:00



user05 ended
E:\data_mongodb\2020_e4\models\user08_model.pt
user08 started
2020-08-29 05:00:00
2020-09-28 05:00:00
[('2020-08-29 05:00:00', '2020-08-30 05:00:00'), ('2020-08-30 05:00:00', '2020-08-31 05:00:00'), ('2020-08-31 05:00:00', '2020-09-01 05:00:00'), ('2020-09-01 05:00:00', '2020-09-02 05:00:00'), ('2020-09-02 05:00:00', '2020-09-03 05:00:00'), ('2020-09-03 05:00:00', '2020-09-04 05:00:00'), ('2020-09-04 05:00:00', '2020-09-05 05:00:00'), ('2020-09-05 05:00:00', '2020-09-06 05:00:00'), ('2020-09-06 05:00:00', '2020-09-07 05:00:00'), ('2020-09-07 05:00:00', '2020-09-08 05:00:00'), ('2020-09-08 05:00:00', '2020-09-09 05:00:00'), ('2020-09-09 05:00:00', '2020-09-10 05:00:00'), ('2020-09-10 05:00:00', '2020-09-11 05:00:00'), ('2020-09-11 05:00:00', '2020-09-12 05:00:00'), ('2020-09-12 05:00:00', '2020-09-13 05:00:00'), ('2020-09-13 05:00:00', '2020-09-14 05:00:00'), ('2020-09-14 05:00:00', '2020-09-15 05:00:00'), ('2020-09-15 05:00:00', '2020-09-16 05:00:00'), ('2020-09-16 05:00:0

prediction,,,, (320000,)
prediction,,,, (384000,)
prediction,,,, (448000,)
prediction,,,, (512000,)
predicted 565023 windows
prediction complete in 0m 13s
user09 ended
E:\data_mongodb\2020_e4\models\user10_model.pt
user10 started
2020-08-29 05:00:00
2020-09-28 05:00:00
[('2020-08-29 05:00:00', '2020-08-30 05:00:00'), ('2020-08-30 05:00:00', '2020-08-31 05:00:00'), ('2020-08-31 05:00:00', '2020-09-01 05:00:00'), ('2020-09-01 05:00:00', '2020-09-02 05:00:00'), ('2020-09-02 05:00:00', '2020-09-03 05:00:00'), ('2020-09-03 05:00:00', '2020-09-04 05:00:00'), ('2020-09-04 05:00:00', '2020-09-05 05:00:00'), ('2020-09-05 05:00:00', '2020-09-06 05:00:00'), ('2020-09-06 05:00:00', '2020-09-07 05:00:00'), ('2020-09-07 05:00:00', '2020-09-08 05:00:00'), ('2020-09-08 05:00:00', '2020-09-09 05:00:00'), ('2020-09-09 05:00:00', '2020-09-10 05:00:00'), ('2020-09-10 05:00:00', '2020-09-11 05:00:00'), ('2020-09-11 05:00:00', '2020-09-12 05:00:00'), ('2020-09-12 05:00:00', '2020-09-13 05:00:00'), ('2020-09

stack_size (21378, 80, 3) 2020-08-31 05:00:00 2020-09-01 05:00:00
stack_size (42626, 80, 3) 2020-09-01 05:00:00 2020-09-02 05:00:00
stack_size (66433, 80, 3) 2020-09-04 05:00:00 2020-09-05 05:00:00
stack_size (83693, 80, 3) 2020-09-05 05:00:00 2020-09-06 05:00:00
stack_size (105894, 80, 3) 2020-09-06 05:00:00 2020-09-07 05:00:00
stack_size (125219, 80, 3) 2020-09-07 05:00:00 2020-09-08 05:00:00
stack_size (142797, 80, 3) 2020-09-08 05:00:00 2020-09-09 05:00:00
stack_size (159174, 80, 3) 2020-09-09 05:00:00 2020-09-10 05:00:00
stack_size (180890, 80, 3) 2020-09-10 05:00:00 2020-09-11 05:00:00
stack_size (197390, 80, 3) 2020-09-11 05:00:00 2020-09-12 05:00:00
stack_size (214056, 80, 3) 2020-09-12 05:00:00 2020-09-13 05:00:00
stack_size (229510, 80, 3) 2020-09-13 05:00:00 2020-09-14 05:00:00
stack_size (250735, 80, 3) 2020-09-15 05:00:00 2020-09-16 05:00:00
stack_size (271699, 80, 3) 2020-09-16 05:00:00 2020-09-17 05:00:00
stack_size (291907, 80, 3) 2020-09-17 05:00:00 2020-09-18 05:00:00

stack_size (387527, 80, 3) 2020-09-18 05:00:00 2020-09-19 05:00:00
stack_size (409536, 80, 3) 2020-09-19 05:00:00 2020-09-20 05:00:00
stack_size (431053, 80, 3) 2020-09-20 05:00:00 2020-09-21 05:00:00
stack_size (453172, 80, 3) 2020-09-21 05:00:00 2020-09-22 05:00:00
stack_size (471743, 80, 3) 2020-09-22 05:00:00 2020-09-23 05:00:00
stack_size (490496, 80, 3) 2020-09-23 05:00:00 2020-09-24 05:00:00
stack_size (512371, 80, 3) 2020-09-24 05:00:00 2020-09-25 05:00:00
stack_size (533381, 80, 3) 2020-09-25 05:00:00 2020-09-26 05:00:00
stack_size (550775, 80, 3) 2020-09-26 05:00:00 2020-09-27 05:00:00
stack_size (572591, 80, 3) 2020-09-27 05:00:00 2020-09-28 05:00:00
getting sensor_data is completed, num_of_windows (572591, 80, 3)
getting sensor_data time: 33m 28s
data preprocessing time: 0m 2s
prediction,,,, (64000,)
prediction,,,, (128000,)
prediction,,,, (192000,)
prediction,,,, (256000,)
prediction,,,, (320000,)
prediction,,,, (384000,)
prediction,,,, (448000,)
prediction,,,, (512000,)
p

stack_size (423796, 80, 3) 2020-09-21 05:00:00 2020-09-22 05:00:00
stack_size (445204, 80, 3) 2020-09-22 05:00:00 2020-09-23 05:00:00
stack_size (463788, 80, 3) 2020-09-23 05:00:00 2020-09-24 05:00:00
stack_size (483585, 80, 3) 2020-09-24 05:00:00 2020-09-25 05:00:00
stack_size (507432, 80, 3) 2020-09-25 05:00:00 2020-09-26 05:00:00
stack_size (525450, 80, 3) 2020-09-26 05:00:00 2020-09-27 05:00:00
stack_size (544089, 80, 3) 2020-09-27 05:00:00 2020-09-28 05:00:00
getting sensor_data is completed, num_of_windows (544089, 80, 3)
getting sensor_data time: 38m 40s
data preprocessing time: 0m 2s
prediction,,,, (64000,)
prediction,,,, (128000,)
prediction,,,, (192000,)
prediction,,,, (256000,)
prediction,,,, (320000,)
prediction,,,, (384000,)
prediction,,,, (448000,)
prediction,,,, (512000,)
predicted 544089 windows
prediction complete in 0m 12s
user24 ended
E:\data_mongodb\2020_e4\models\user25_model.pt
user25 started
2020-08-30 05:00:00
2020-09-28 05:00:00
[('2020-08-30 05:00:00', '2020-0

data preprocessing time: 0m 2s
prediction,,,, (64000,)
prediction,,,, (128000,)
prediction,,,, (192000,)
prediction,,,, (256000,)
prediction,,,, (320000,)
prediction,,,, (384000,)
prediction,,,, (448000,)
prediction,,,, (512000,)
predicted 572822 windows
prediction complete in 0m 13s
user26 ended
E:\data_mongodb\2020_e4\models\user27_model.pt
user27 started
2020-09-03 05:00:00
2020-10-07 05:00:00
[('2020-09-03 05:00:00', '2020-09-04 05:00:00'), ('2020-09-04 05:00:00', '2020-09-05 05:00:00'), ('2020-09-05 05:00:00', '2020-09-06 05:00:00'), ('2020-09-06 05:00:00', '2020-09-07 05:00:00'), ('2020-09-07 05:00:00', '2020-09-08 05:00:00'), ('2020-09-08 05:00:00', '2020-09-09 05:00:00'), ('2020-09-09 05:00:00', '2020-09-10 05:00:00'), ('2020-09-10 05:00:00', '2020-09-11 05:00:00'), ('2020-09-11 05:00:00', '2020-09-12 05:00:00'), ('2020-09-12 05:00:00', '2020-09-13 05:00:00'), ('2020-09-13 05:00:00', '2020-09-14 05:00:00'), ('2020-09-14 05:00:00', '2020-09-15 05:00:00'), ('2020-09-15 05:00:00',

data preprocessing time: 0m 2s
prediction,,,, (64000,)
prediction,,,, (128000,)
prediction,,,, (192000,)
prediction,,,, (256000,)
prediction,,,, (320000,)
prediction,,,, (384000,)
prediction,,,, (448000,)
prediction,,,, (512000,)
predicted 565322 windows
prediction complete in 0m 13s
user28 ended
E:\data_mongodb\2020_e4\models\user29_model.pt
user29 started
2020-08-30 05:00:00
2020-09-28 05:00:00
[('2020-08-30 05:00:00', '2020-08-31 05:00:00'), ('2020-08-31 05:00:00', '2020-09-01 05:00:00'), ('2020-09-01 05:00:00', '2020-09-02 05:00:00'), ('2020-09-02 05:00:00', '2020-09-03 05:00:00'), ('2020-09-03 05:00:00', '2020-09-04 05:00:00'), ('2020-09-04 05:00:00', '2020-09-05 05:00:00'), ('2020-09-05 05:00:00', '2020-09-06 05:00:00'), ('2020-09-06 05:00:00', '2020-09-07 05:00:00'), ('2020-09-07 05:00:00', '2020-09-08 05:00:00'), ('2020-09-08 05:00:00', '2020-09-09 05:00:00'), ('2020-09-09 05:00:00', '2020-09-10 05:00:00'), ('2020-09-10 05:00:00', '2020-09-11 05:00:00'), ('2020-09-11 05:00:00',

stack_size (442557, 80, 3) 2020-09-22 05:00:00 2020-09-23 05:00:00
stack_size (460308, 80, 3) 2020-09-23 05:00:00 2020-09-24 05:00:00
stack_size (466392, 80, 3) 2020-09-23 05:00:00 2020-09-24 05:00:00
stack_size (481764, 80, 3) 2020-09-24 05:00:00 2020-09-25 05:00:00
stack_size (500868, 80, 3) 2020-09-25 05:00:00 2020-09-26 05:00:00
stack_size (522239, 80, 3) 2020-09-26 05:00:00 2020-09-27 05:00:00
stack_size (541551, 80, 3) 2020-09-27 05:00:00 2020-09-28 05:00:00
getting sensor_data is completed, num_of_windows (541551, 80, 3)
getting sensor_data time: 39m 25s
data preprocessing time: 0m 2s
prediction,,,, (64000,)
prediction,,,, (128000,)
prediction,,,, (192000,)
prediction,,,, (256000,)
prediction,,,, (320000,)
prediction,,,, (384000,)
prediction,,,, (448000,)
prediction,,,, (512000,)
predicted 541551 windows
prediction complete in 0m 12s
user30 ended
