# 00_preprocessing

# Download example data from GoogleDrive

In [1]:
import requests

def download_file_from_google_drive(id, destination):
    URL = "https://docs.google.com/uc?export=download"

    session = requests.Session()

    response = session.get(URL, params = { 'id' : id }, stream = True)
    token = get_confirm_token(response)

    if token:
        params = { 'id' : id, 'confirm' : token }
        response = session.get(URL, params = params, stream = True)

    save_response_content(response, destination)    

def get_confirm_token(response):
    for key, value in response.cookies.items():
        if key.startswith('download_warning'):
            return value

    return None

def save_response_content(response, destination):
    CHUNK_SIZE = 32768

    with open(destination, "wb") as f:
        for chunk in response.iter_content(CHUNK_SIZE):
            if chunk: # filter out keep-alive new chunks
                f.write(chunk)

- 다운로드

In [9]:
file_id = '1z3deyvkth9yGrFxD2ML1OJd_YTBi9C0j'
destination = 'SleepAIDATA.zip'
download_file_from_google_drive(file_id, destination)

- 압축 풀기

In [11]:
!unzip SleepAIDATA.zip -d DATA/

Archive:  SleepAIDATA.zip
   creating: flow/
   creating: flow/S2020-EM-01-0001_video_01/
  inflating: flow/S2020-EM-01-0001_video_01/flow_x_00000.jpg  
  inflating: flow/S2020-EM-01-0001_video_01/flow_x_00001.jpg  
  inflating: flow/S2020-EM-01-0001_video_01/flow_x_00002.jpg  
  inflating: flow/S2020-EM-01-0001_video_01/flow_x_00003.jpg  
  inflating: flow/S2020-EM-01-0001_video_01/flow_x_00004.jpg  
  inflating: flow/S2020-EM-01-0001_video_01/flow_x_00005.jpg  
  inflating: flow/S2020-EM-01-0001_video_01/flow_x_00006.jpg  
  inflating: flow/S2020-EM-01-0001_video_01/flow_x_00007.jpg  
  inflating: flow/S2020-EM-01-0001_video_01/flow_x_00008.jpg  
  inflating: flow/S2020-EM-01-0001_video_01/flow_x_00009.jpg  
  inflating: flow/S2020-EM-01-0001_video_01/flow_x_00010.jpg  
  inflating: flow/S2020-EM-01-0001_video_01/flow_x_00011.jpg  
  inflating: flow/S2020-EM-01-0001_video_01/flow_x_00012.jpg  
  inflating: flow/S2020-EM-01-0001_video_01/flow_x_00013.jpg  
  inflating: flow/S2020-EM-0

# Data Check

In [23]:
import shutil
from glob import glob
import os
import numpy as np
from tqdm import tqdm
from PIL import Image
import pandas as pd
import random
from matplotlib import pyplot as plt
import json

In [21]:
DATA_DIR = './DATA/'
case_lst = glob(DATA_DIR+'*')

## annotation json file

In [112]:
case_dir = case_lst[0]
json_lst = []
for path in glob(case_dir+'/*.json'):
    with open(path, 'r') as j:
        jsn = json.loads(j.read())
        json_lst.append(jsn)
#         print("Keys: %s" %contents.keys())


In [132]:
# 1 Epoch : 30초
# Sleep Staging: (Wake) Wake (Light Sleep) N1 N2 (Deep Sleep) N3 REM
# Tasl: 각 Epoch의 수면단게(Wake, Light Sleep, Deep Sleep) 분류

case_json = json_lst[0]
print(len(case_json['Event']))
print(case_json['Event'][0])

2422
{'Event_Number': 0, 'Event_Label': 'Wake', 'Start_Time': '2020/03/03 22:06:02.000', 'End_Time': '2020/03/03 22:06:32.000', 'Start_Epoch': 1, 'End_Epoch': 2, 'Duration(second)': 30.0}


In [256]:
value = 'N1'
value.isin(['N1', 'N2'])

AttributeError: 'str' object has no attribute 'isin'

In [261]:
import pandas as pd

event_df = pd.DataFrame.from_dict(case_json['Event'])
# print(set(event_df['Event_Label']))
event_df_oi = event_df[event_df['Event_Label'].isin(['Wake', 'N1', 'N2', 'N3', 'REM'])]
event_df_oi['FPS'] = [fps for i in range(len(event_df_oi))]
event_df_oi['Case'] = [case_json['Case_Info']['Case_Number'] for i in range(len(event_df_oi))]\

lst = ['Light Sleep' if value in ['N1', 'N2'] else value for value in event_df_oi['Event_Label']]
lst = ['Deep Sleep' if value == 'REM' else value for value in lst]
event_df_oi['Event_Label'] = lst

event_df_oi.head()
event_df_oi['Event_Label'].value_counts()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()


Light Sleep    536
Deep Sleep     284
Wake           169
Name: Event_Label, dtype: int64

In [263]:
# !mkdir ./PDATA

# os.makedirs('./PDATA/case1/')

event_df.to_csv('./PDATA/case1/annot.csv', index = False)
event_df_oi.to_csv('./PDATA/case1/p_annot.csv', index = False)

## Images

In [264]:
FLOW_DIR = './DATA/case1/flow/S2020-EM-01-0001_video_01'
RGB_DIR = './DATA/case1/rgb/S2020-EM-01-0001_video_01'
flow_lst = glob(FLOW_DIR+'/*.jpg')
rgb_lst = glob(RGB_DIR+'/*.jpg')
flow_lst.sort()
rgb_lst.sort()

In [265]:
fps = case_json['Video_Info'][0]['Frame_Rate']
print(f"Frame per second: {fps}")

def frame2epoch(frameidx): # image frame number (type 'int') to corresponding start epoch (type 'int')
    return int(frameidx//(30*fps)+1)

def epoch2frame(startepoch): # start epoch (type 'int') to corresponding image frame numbers (type 'list')
    startframe = (startepoch-1)*fps*30
    return np.arange(startframe,startframe+fps*30,1).astype('int').tolist()

Frame per second: 5.0


In [266]:
!rm -rf {'./DATA/case1/flowx/'+FLOW_DIR.split('/')[-1]}
!rm -rf {'./DATA/case1/flowy/'+FLOW_DIR.split('/')[-1]}

os.makedirs('./DATA/case1/flowx/'+FLOW_DIR.split('/')[-1], exist_ok=True)
os.makedirs('./DATA/case1/flowy/'+FLOW_DIR.split('/')[-1], exist_ok=True)

flowx_lst = flow_lst[:(len(flow_lst)//2)]
flowy_lst = flow_lst[len(flow_lst)//2:]

for img in flowx_lst:
    shutil.copy(img, './DATA/case1/flowx/'+FLOW_DIR.split('/')[-1])
    
for img in flowy_lst:
    shutil.copy(img, './DATA/case1/flowy/'+FLOW_DIR.split('/')[-1])