In [67]:
import os
import sys
import json
import glob
import shutil
import numpy as np
import pandas as pd

from itertools import chain

In [2]:
def grep_recur(base_path, pattern="*.*"):
    sub_greps = list(chain(*[grep_recur(dp, pattern) for dp in grep_dirs(base_path)]))
    return grep_files(base_path, pattern) + sub_greps

def grep_files(base_path, pattern="*.*"):
    return glob.glob("{}/{}".format(base_path, pattern))

def grep_dirs(base_path):
    file_paths = [os.path.join(base_path, name) for name in os.listdir(base_path)]
    return [p for p in file_paths if os.path.isdir(p)]

In [3]:
search_dir_path = "/ds/vc-one/"
device_jsons = grep_recur(search_dir_path, "device.json")

In [4]:
print("device.json 개수: {}".format(len(device_jsons)))
print("device.json 샘플 경로: {}".format(device_jsons[0]))

device.json 개수: 1513
device.json 샘플 경로: /ds/vc-one/Partial/3UWN2HHPUZOAQYU0VQ7VT9TBCS9NSA/device.json


In [79]:
device_hist = []
for js_path in device_jsons[:]:
    if "VisualMember" in js_path:
        continue
    path_id = js_path.replace("/ds/vc-one/", "")
    path_id = path_id.replace("/device.json", "")
    with open(js_path) as json_data:
        device_meta = (path_id, json.load(json_data))
        device_hist.append(device_meta)

In [83]:
print("내부 멤버 제거 후 개수 : {}".format(len(device_hist)))

내부 멤버 제거 후 개수 : 1496


# 전체 제조사 & 모델

In [84]:
print("디바이스 정보 샘플")
device_hist[0][1]

디바이스 정보 샘플


{'board': 'msm8937',
 'density': '2.0',
 'device': 'land',
 'display': 'MMB29M',
 'dpi': '320',
 'fov-horizontal': '1.1780972450961724',
 'fov-vertical': '1.0995574287564276',
 'heightPixels': '1280',
 'manufacturer': 'Xiaomi',
 'model': 'Redmi 3S',
 'screenX': '2.440950870513916',
 'screenY': '4.330718040466309',
 'sdk-version': '23',
 'version': '6.0.1',
 'widthPixels': '720',
 'xdpi': '294.967',
 'ydpi': '295.563'}

In [85]:
man_to_models = list(map(lambda x: {
    "uid" : x[0],
    "manu" : x[1]["manufacturer"].lower(), 
    "model": x[1]["model"].lower()
}, device_hist))

In [86]:
man_to_models[0]

{'manu': 'xiaomi',
 'model': 'redmi 3s',
 'uid': 'Partial/3UWN2HHPUZOAQYU0VQ7VT9TBCS9NSA'}

In [87]:
df = pd.DataFrame(man_to_models)
df.describe()

Unnamed: 0,manu,model,uid
count,1496,1496,1496
unique,23,220,1496
top,samsung,iphone 6s,crowdworks/1S5GKuDc6iKwNDE0
freq,724,89,1


In [88]:
# df['manu'].unique()
manu_uniq = np.array(['xiaomi', 'google', 'lge', 'apple', 'samsung', 'huawei', 'sony',
                   'pantech', 'foxconn', 'tcl', 'lyf', 'yulong', 'oppo', 'hmd global',
                   'asus', 'motorola', 'vivo', 'advan', 'oneplus', 'gionee', 'lenovo',
                   'coolpad', 'tecno'], dtype=object)
models = df['model'].unique()

print(manu_uniq)
print(df.manu.value_counts())

['xiaomi' 'google' 'lge' 'apple' 'samsung' 'huawei' 'sony' 'pantech'
 'foxconn' 'tcl' 'lyf' 'yulong' 'oppo' 'hmd global' 'asus' 'motorola'
 'vivo' 'advan' 'oneplus' 'gionee' 'lenovo' 'coolpad' 'tecno']
samsung       724
apple         552
lge           122
xiaomi         24
huawei         18
motorola       13
oneplus         5
coolpad         4
lyf             4
hmd global      4
google          3
lenovo          3
tcl             3
pantech         3
asus            2
tecno           2
oppo            2
gionee          2
vivo            2
yulong          1
advan           1
foxconn         1
sony            1
Name: manu, dtype: int64


# apple 핸드폰 (총 560 개)

In [89]:
df_apple = df[df['manu'] == "apple"]
df_apple.describe()

Unnamed: 0,manu,model,uid
count,552,552,552
unique,1,19,552
top,apple,iphone 6s,crowdworks/1AwC8GbHJ3sgJlFQ
freq,552,89,1


In [90]:
df_apple.uid.unique()

array(['Partial/39LOEL67OTOL3AMHZ9HB34V4W4D38N',
       'crowdworks/1ZvyGZ9sPFwiWIWu', 'crowdworks/11pFCzZC8WucxuyG',
       'crowdworks/1QSoImRBavJvJtU8', 'crowdworks/18MhOdkTVA8civGy',
       'crowdworks/19Vj1zDa489WEL0y', 'crowdworks/11r9vCZNTOFBbdLc',
       'crowdworks/1gxSusAKXAjarw4e', 'crowdworks/20Mackl8CD1C4xge',
       'crowdworks/11XwwoJQYPRdJfm4', 'crowdworks/1zyJmxzyMoXaagr2',
       'crowdworks/18Le5Z1W8V6wgCxs', 'crowdworks/1749p5SmNpci18ds',
       'crowdworks/18BbMAgR7oFFXF7A', 'crowdworks/1D1SwCElo5zRdRMe',
       'crowdworks/11rehTXSyCIef0CW', 'crowdworks/1R5P85Y3ohOLuRcG',
       'crowdworks/1aQLSrKEvWClh6UC', 'crowdworks/17anrhadjuQreDlQ',
       'crowdworks/1GW6JTF9qZ6YlrdI', 'crowdworks/1tY2hIi5rSppmGSe',
       'crowdworks/16cDsoeBAXXxZ6oK', 'crowdworks/20Fz8plaKz67WhI8',
       'crowdworks/1lzFgDKc7Gqu03EG', 'crowdworks/1AsJbftWXcHOk8KO',
       'crowdworks/1tyA1wTwk0t8E92m', 'crowdworks/1zkJl2JbCyuOV2EC',
       'crowdworks/13oRO21rMDqTdoSO', 'crowdworks/19WE

In [62]:
df_apple.model.value_counts()

iphone 6s                 89
iphone 7                  85
iphone 8                  81
iphone x                  65
iphone 8 plus             59
iphone 6                  45
iphone 7 plus             42
iphone se                 21
iphone 6s plus            18
iphone11,2                18
iphone11,6                 8
iphone 6 plus              8
iphone11,8                 7
iphone 5s                  6
ipad 6                     5
ipad air 2                 3
ipad pro 2 (10.5-inch)     1
ipad air                   1
ipod touch 6               1
Name: model, dtype: int64

# 파일 이동 

<br>

- from: /ds/vc-one/ 
- to: /ds/archived/vc-one-apple 

In [101]:
for index, row in df_apple.iterrows():
    uid = row['uid'].split('/')[1]
    path_from = "/ds/vc-one/{}".format(row['uid'])
    path_to = "/ds/archived/vc-one-apple/{}".format(uid)
    try:
        # shutil.move(path_from, path_to)
        # print("*** moved {} \t {} ***".format(path_from, path_to))
        continue
    except Exception as e:
        print(e)

# directory flatten

In [124]:
base_path = "/ds/archived/vc-one-apple"
profile_dirs = grep_dirs(base_path)
for pd in profile_dirs:
    folded = grep_dirs(pd)
    
    if len(folded) != 0:
        print("*** folded 폴더 발견 {} ***".format(pd))

In [119]:
folded_dirs = ['Freezed/31HQ4X3T3TTK2YRFJDRGGHPM5VPLSG', 'Freezed/3GU1KF0O4JKBCXPB0JYS79O66KOPB7', 'Freezed/3SB5N7Y3O4N4FXT2ZKBGTJEVEOOG0Q', 'Freezed/3H7XDTSHKDAXNNUNSR410ENZFBTWGY', 'Freezed/3ZOTGHDK5JUS8JUCW5788YTG032OSE', 'Freezed/3ZPPDN2SLWFORDLOTL4LMRUC4AZE9B', 'Freezed/34S6N1K2ZW2VCPDZ8JFHC3IKWK8LHV', 'Freezed/32RIADZISTNO4QKUDLSZU0NBJRYS4Y', 'Freezed/3TU5ZICBREKDABSJXKT7GCOF38B8Q0', 'Freezed/3DY4FPOOA27BXO3WISTHDGISA9IRVR', 'Freezed/35GCEFQ6I670N6N48ZHOGOS8SRN3ZK', 'Freezed/3SLE99ER0OW5Q1RHNSWUY7QKGGBBZO', 'Freezed/3P59JYT76M3FGCIMY7E1BQMZ1ECT2O', 'Freezed/32RIADZISTNO4QKUDLSZU0NBJ9R4S3', 'Freezed/3UWN2HHPUZOAQYU0VQ7VT9TBAQPSNP', 'Freezed/3DY4FPOOA27BXO3WISTHDGISAR8VRL', 'Freezed/3RUIQRXJBC7XYLWPZ4UF4VBRD1HLL2', 'Freezed/37Z929RLGAR8LBZKSMT728Z3Z9GTSY', 'Freezed/3H0W84IWBLLUVDHAN247DCU6AW5REB', 'Freezed/3L0KT67Y8FZ42XYE2SG6APRIEATSYF', 'Freezed/3LYA37P8IR6A16SVNRTKBS27CDPBK5', 'Freezed/3H7Z272LX8QNP6BNRWNMLCKBIDQPLG', 'Freezed/32AT8R96GMSNLOE9SRJH74L5IQ8SUF', 'Freezed/30H4UDGLT317VOT8R8O1LPZ0IJKMPL', 
               'Freezed/3RSDURM96B53SETW1XW1L8N3DL6EYQ', "Freezed/3TPZPLC3M1V692LYLQJMPQQUIRX3PE"]

for dn in folded_dirs[:]:
    print("*** target: {} ***".format(dn))
    uid = dn.split('/')[1]
    path = "/ds/archived/vc-one-apple/{}".format(uid)
    
    for i in [1,2,3]:
        sub_path = "{}/{}".format(path, i)
        files = grep_files(sub_path)
        
        for f in files:
            filename = f.split("/")[-1]
            path_to = "{}/{}".format(path, filename)
            shutil.move(f, path_to)
        
        if len(files) > 0:
            os.rmdir(sub_path)
            print("*** flattend {} ***".format(path))

*** target: Freezed/31HQ4X3T3TTK2YRFJDRGGHPM5VPLSG ***
*** target: Freezed/3GU1KF0O4JKBCXPB0JYS79O66KOPB7 ***
*** target: Freezed/3SB5N7Y3O4N4FXT2ZKBGTJEVEOOG0Q ***
*** target: Freezed/3H7XDTSHKDAXNNUNSR410ENZFBTWGY ***
*** target: Freezed/3ZOTGHDK5JUS8JUCW5788YTG032OSE ***
*** target: Freezed/3ZPPDN2SLWFORDLOTL4LMRUC4AZE9B ***
*** target: Freezed/34S6N1K2ZW2VCPDZ8JFHC3IKWK8LHV ***
*** target: Freezed/32RIADZISTNO4QKUDLSZU0NBJRYS4Y ***
*** target: Freezed/3TU5ZICBREKDABSJXKT7GCOF38B8Q0 ***
*** target: Freezed/3DY4FPOOA27BXO3WISTHDGISA9IRVR ***
*** target: Freezed/35GCEFQ6I670N6N48ZHOGOS8SRN3ZK ***
*** target: Freezed/3SLE99ER0OW5Q1RHNSWUY7QKGGBBZO ***
*** target: Freezed/3P59JYT76M3FGCIMY7E1BQMZ1ECT2O ***
*** target: Freezed/32RIADZISTNO4QKUDLSZU0NBJ9R4S3 ***
*** target: Freezed/3UWN2HHPUZOAQYU0VQ7VT9TBAQPSNP ***
*** target: Freezed/3DY4FPOOA27BXO3WISTHDGISAR8VRL ***
*** target: Freezed/3RUIQRXJBC7XYLWPZ4UF4VBRD1HLL2 ***
*** target: Freezed/37Z929RLGAR8LBZKSMT728Z3Z9GTSY ***
*** target

# 파일명 표준화

- eye-record-2-0-0.sensor.meta
- eye-record-2-0-1.meta
- eye-record-2-0-1.mp4

<br>

- record-2-0-0.json
- record-2-0-0.mp4

In [146]:
base_path = "/ds/archived/vc-one-apple/_partial"
files = grep_recur(base_path)

for f in files[:]:
    if 'eye-record-' in f:
        new_path = f.replace("eye-record-", "record-")
        if '.meta' in new_path:
            new_path = new_path.replace(".meta", ".json")
        # shutil.move(f, new_path)

# apple 수행도 체크

In [158]:
base_path = "/ds/archived/vc-one-apple/partial"
base_mv_path = "/ds/archived/vc-one-apple/completed"
profile_dirs = grep_dirs(base_path)

for pdp in profile_dirs[:]:
    files = grep_files(pdp)
    
    if "{}/{}".format(pdp, "record-3-0-0.mp4") in files:
        uid = pdp.split("/")[-1]
        path_to = "{}/{}".format(base_mv_path, uid)
        shutil.move(pdp, path_to)
        print("*** from {} \t\t to {} ***".format(pdp, path_to))