# loss 발생 원인 탐색

In [1]:
import sys
import math
import faiss
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from IPython.display import display

%matplotlib inline

# number print option

In [2]:
# np.set_printoptions(threshold=np.nan)
np.set_printoptions(suppress=True)
pd.options.display.float_format = '{:20,.2f}'.format

# origin meta loader

In [6]:
sys.path.append("../../")
%run ../../ai/common/imports.py
%run ../../ai/dataset/everyone/npz/meta_loader.py

In [7]:
ar = MetaLoader("/home/chy/archive-data/processed/everyone-npz/", "*")
ar.load()

load npz blocks num: 2912
loading blocks[0] ...
loading blocks[100] ...
loading blocks[200] ...
loading blocks[300] ...
loading blocks[400] ...
loading blocks[500] ...
loading blocks[600] ...
loading blocks[700] ...
loading blocks[800] ...
loading blocks[900] ...
loading blocks[1000] ...
loading blocks[1100] ...
loading blocks[1200] ...
loading blocks[1300] ...
loading blocks[1400] ...
loading blocks[1500] ...
loading blocks[1600] ...
loading blocks[1700] ...
loading blocks[1800] ...
loading blocks[1900] ...
loading blocks[2000] ...
loading blocks[2100] ...
loading blocks[2200] ...
loading blocks[2300] ...
loading blocks[2400] ...
loading blocks[2500] ...
loading blocks[2600] ...
loading blocks[2700] ...
loading blocks[2800] ...
loading blocks[2900] ...
total item-indexes: 1490937



# show frame image

In [None]:
def show_items(items, figsize=(25,25)):
    matplotlib.rcParams['figure.figsize'] = [4, 4]
    fig = plt.figure(figsize=figsize)
    fig.subplots_adjust(hspace=0.4, wspace=0.4)
    nrows, ncols = len(items), 1

    for i, uid in enumerate(items):
        item = items[uid]
        so = item.orientation
        face = item.image_face[:,:,::-1]
        frame = item.image_frame[:,:,::-1]
        device = item.device
        
        ax = fig.add_subplot(nrows, ncols, i+1)
        plt.imshow(frame)
        ax.set_title(device + " / so:" + so)
        ax.set_aspect('equal')
        ax.axis('off')

    plt.show()

# Search Query

In [20]:
x = ar.get_all([1213238])
x['1213238'].path_frame

'/home/chy/archive-data/datasets/eyes/everyone-origin/01713/frames/02275.jpg'

# select attr origin ds

In [9]:
origin_ds = []
for k,v in ar.archive.items():
    uid = int(v.uid)
    cx = round(float(v.camera_x), 2)
    cy = round(float(v.camera_y), 2)
    so = int(v.orientation)
    pad = bool(v.is_pad())
    
    origin_ds.append({
        "uid": uid,
        "so": so,
        "cx": cx, 
        "cy": cy,
        "pad":pad
    })

# helpers ODF

In [None]:
def split_orientation_wise(df):
    df_sos = [[]] * 5
    for i in range(1,5):
        df_sos[i] = df[df.so == i]
    return df_sos

def describe_df(df_sos, selector=None, exclude=['uid', 'epoch', 'so']):
    for i in range(1,5):
        t = df_sos[i]
        if selector is not None:
            t = selector(t)
        t = t.drop(exclude, axis=1)
        display(t.describe(include="all"))
        
def hist_df(df_sos, bins=128, figsize=(10,5), rng=(-10, 10), selector=None):
    for i in range(1,5):
        t = df_sos[i]
        if selector is not None:
            t = selector(t)
        t.hist(bins=bins, figsize=figsize, range=rng)

***
# origin dataset to data-frame (ODF)
***

In [None]:
odf = pd.DataFrame(origin_ds)
odf_sos = split_orientation_wise(odf)

# visualize ODF

In [None]:
describe_df(odf_sos)

In [None]:
hist_df(odf_sos)

*** 
# load training data frame (TDF)
***

In [None]:
tdf = pd.read_csv('/home/chy/dev-bench/auto-history/8c-48f-p-2-5-8-dyn-fc-512-128-1.auto')
tdf.epoch = tdf.astype({"epoch":int})

# compute metrics

In [None]:
tdf['dist'] = np.linalg.norm(tdf[['px', 'py']].values - tdf[['cx','cy']].values, axis=1)
tdf_sos = split_orientation_wise(tdf)

# visualize TDF

In [None]:
describe_df(tdf_sos, selector=lambda tdf: tdf[tdf.epoch==0])

In [None]:
describe_df(tdf_sos, selector=lambda tdf: tdf[tdf.epoch==1])

In [None]:
describe_df(tdf_sos, selector=lambda tdf: tdf[tdf.epoch==2])

In [None]:
def sel_cam_epoch(t, epoch):
    t = t[t.epoch == epoch]
    t = t[['px', 'py', 'cx', 'cy']]
    return t

def hist_tdf(epoch):
    bins = 500
    rng = (-25, 25)
    hist_df(tdf_sos, selector=lambda t: sel_cam_epoch(t, epoch), rng=rng, bins=bins)

In [None]:
hist_tdf(0)

In [None]:
hist_tdf(1)

In [None]:
hist_tdf(2)

# Find RULE ML Debugging

- shuffle feature[n] 

# SO1: upper >> mid case

In [None]:
# SO1 & -5<py<-7 & -3<cy<0
bdf = df_so1[(-6.5<df_so1.py) & (df_so1.py<-5.5) & (-1.5<df_so1.cy) & (df_so1.cy<0)]
bdf.describe()

In [None]:
bad_so1 = bdf.uid[10:40].values.tolist()
bad_so1 = ar.get_all(bad_so1)
show_items(bad_so1, figsize=(100,100))

In [None]:
""" original SO1 y 장축 가운데 응시 """
dfso1 = dfo[0]
gdf_c = dfso1[(-7<dfso1.cy) & (dfso1.cy<-5)]
gdf_c.describe()

In [None]:
good_so1 = gdf_c.uid[30:40].values.tolist()
good_so1 = ar.get_all(good_so1)
show_items(good_so1, figsize=(100,100))

In [None]:
""" original SO1 y 장축 상단 응시 """
orig_so1_upper_df = dfo[0]
orig_so1_upper_df = orig_so1_upper_df[(-1.5<orig_so1_upper_df.cy) & (orig_so1_upper_df.cy<0)]
orig_so1_upper_df.describe()

In [None]:
origin_upper = orig_so1_upper_df.uid[80:100].values.tolist()
origin_upper = ar.get_all(origin_upper)
show_items(origin_upper, figsize=(100,100))

# 예측값과 실제값의 편차 분포

- 왜 +-5를 기준으로 피크를 가질까? 
   - -10을 -6으로, -1을 -6으로 예측하기 때문
   - 원 데이터 평균 -6.47
- 만약 cy가 uniform dist하면 양상이 달라질까? 
- 또한, 원 데이터에서 -10, -8, -7 -6, -1.5, -1 점의 데이터가 많다
- 상대적으로 이 피크의 추이가 py의 추이가 유사해 보인다
- SO1 Y구간은 (장축) (-1.4, -26)