# AKPD Tests

In [None]:
import csv
import gzip
import importlib
import json
import logging
import multiprocessing
import multiprocessing.dummy
from pathlib import Path
from pprint import pprint as pp
import time

FORMAT = '%(asctime)-15s %(levelname)-5s %(message)s'
logging.basicConfig(format=FORMAT, level=logging.INFO)

import cv2
import numpy as np
import pandas as pd; from pandas import Series, DataFrame; pd.set_option('max_columns', 50)

In [None]:
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import seaborn as sns
%matplotlib inline

In [None]:
import weight as W
importlib.reload(W)

In [None]:
from auto_keypoints import predict
importlib.reload(predict)

model = predict.Predictor()

## AKPD Request Test

In [None]:
body = json.load(open("tests/resources/akpd_local.json"))

In [None]:
items = [predict.Item(
    Id             = row["id"],
    left_crop_url  = row["leftCropUrl"],
    right_crop_url = row["rightCropUrl"],
    lcm            = {"x_coord": row["leftCropMetadata"]["x_coord"], "y_coord": row["leftCropMetadata"]["y_coord"]},
    rcm            = {"x_coord": row["rightCropMetadata"]["x_coord"], "y_coord": row["rightCropMetadata"]["y_coord"]},
    cm             = {"pixelCountWidth": row["cameraMetadata"]["pixelCountWidth"], "pixelCountHeight": row["cameraMetadata"]["pixelCountHeight"]},
) for i, row in enumerate(body)]
items

In [None]:
predict.load_images(items)
logging.info(f"loaded images: {len(items)}")
result = model.predict(items)
result[0]["leftCrop"][0]

### Code Util

In [None]:
def akpd_vector(ann):
    assert len(ann['leftCrop']) == len(ann['rightCrop']) == 8
    all_crops = ann['leftCrop'] + ann['rightCrop']
    x = [kp['xFrame'] for kp in all_crops] + \
        [kp['yFrame'] for kp in all_crops]
    hmax_arr = [kp.get('hm_max',0) for kp in all_crops]
    return np.array(x), np.array(hmax_arr)


def akpd_distance(a1, a2):
    x1, hmax_arr1 = akpd_vector(a1)
    x2, hmax_arr2 = akpd_vector(a2)
    dx = x1 - x2
    return np.sqrt(np.dot(dx, dx))


## Loop input file

In [None]:
next_log_time = 0

def variance_of_laplacian(img):
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)    
    return cv2.Laplacian(gray, cv2.CV_64F).var()


def run_row_iter(item, camera_metadata):
    items = [item]
    predict.load_images(items)
    result = model.predict(items)[0]
    weight, length, kf = W.weight_estimator.predict(annotation=result, camera_metadata=camera_metadata)
    return (result, weight, length, kf)
    d_weight = (float(row["estimated_weight_g"]) - weight) / (weight or 1e-9) * 100
    
    
def run_row(args):
    _id, row = args
    basedir    = Path(f".data/{row['pen_id']}/{row['pair_id']}/")
    left_path  = basedir / "left.jpg"
    right_path = basedir / "right.jpg"
    lcm        = json.loads(row["left_crop_metadata"])
    rcm        = json.loads(row["right_crop_metadata"])
    cm         = json.loads(row["camera_metadata"])
    annotation = json.loads(row["annotation"])

    item = predict.Item(
        Id             = str(_id),
        left_crop_url  = str(left_path),
        right_crop_url = str(right_path),
        lcm            = {"x_coord": lcm["x_coord"], "y_coord": lcm["y_coord"]},
        rcm            = {"x_coord": rcm["x_coord"], "y_coord": rcm["y_coord"]},
        cm             = {"pixelCountWidth": cm["pixelCountWidth"], "pixelCountHeight": cm["pixelCountHeight"]},
        _down_sample   = 0.95,
    )

    camera_metadata = W.CameraMetadata(
        focal_length        = cm['focalLength'      ],
        focal_length_pixel  = cm['focalLengthPixel' ],
        baseline_m          = cm['baseline'         ],
        pixel_count_width   = cm['pixelCountWidth'  ],
        pixel_count_height  = cm['pixelCountHeight' ],
        image_sensor_width  = cm['imageSensorWidth' ],
        image_sensor_height = cm['imageSensorHeight'],
    )

    akpd_score_vec = []
    weight_vec = []
    NUM_ITER = 1
    for i in range(NUM_ITER):
        item._down_sample = 1 - (NUM_ITER - i - 1) * 0.06
        result, weight, length, kf = run_row_iter(item, camera_metadata)
        akpd_score_vec.append(result['probability'])
        weight_vec.append(weight)
    
    akpd_dist = akpd_distance(annotation, result)
    x, hm = akpd_vector(result)
    
    try:
        d_weight = (float(row["estimated_weight_g"]) - weight) / (weight or 1e-9) * 100
        d_weight = np.abs(d_weight)
    except:
        # some weird error happen here???
        logging.exception(f"Error weight={weight}")
        d_weight = 0
    
    weights = [w for w, s in zip(weight_vec, akpd_score_vec) if s >= 0.5]
    if weights:
        akpd_score_rate = len(weights) / NUM_ITER
        weight_std = np.std(weights)
    else:
        akpd_score_rate = 0
        weight_std = 0
        
    llv = variance_of_laplacian(item.l_img)
    rlv = variance_of_laplacian(item.r_img)
    
    # LOGGING
    global next_log_time
    t = time.time()
    if t > next_log_time:
        next_log_time = t + LOG_INTERVAL        
        good = '*' if float(row["akpd_score"]) >= 0.5 else ' '
        logging.info("{:04d} {:40s} dKP={}; dw={:0.2f}%; score={:0.3f}{}; akpd_score_rate={:0.1f}; weight_std={:6.1f}; hm={:0.4f}, {:0.4f}".format(
            _id, 
            row['pair_id'], 
            akpd_dist, 
            d_weight, 
            float(row["akpd_score"]),
            good,
            akpd_score_rate,                                                                                          
            weight_std,                                                                                          
            hm.mean(), hm.min())
        )

    result['pair_id'            ] = row['pair_id']
    result['basedir'            ] = str(basedir)
    result['lx_coord'           ] = lcm["x_coord"]
    result['ly_coord'           ] = lcm["y_coord"]
    result['l_width'            ] = lcm["width"]
    result['l_height'           ] = lcm["height"]
    result['rx_coord'           ] = rcm["x_coord"]
    result['ry_coord'           ] = rcm["y_coord"]
    result['r_width'            ] = rcm["width"]
    result['r_height'           ] = rcm["height"]
    result['pixelCountWidth'    ] = cm["pixelCountWidth"]
    result['pixelCountHeight'   ] = cm["pixelCountHeight"]
    result['estimated_weight_g' ] = weight
    result['estimated_k_factor' ] = kf
    result['annotation0'        ] = row["annotation"]
    result['akpd_score0'        ] = row["akpd_score"]
    result['estimated_weight_g0'] = row["estimated_weight_g"]
    result['estimated_k_factor0'] = row["estimated_k_factor"]
    result['akpd_dist'          ] = akpd_dist
    result['hm'                 ] = list(hm)
    result['hm_mean'            ] = hm.mean()
    result['hm_min'             ] = hm.min()
    result['akpd_score_vec'     ] = akpd_score_vec
    result['weight_vec'         ] = weight_vec
    result['akpd_score_rate'    ] = akpd_score_rate
    result['weight_std'         ] = weight_std
    result['llv'                ] = llv
    result['rlv'             ] = rlv
    
    return result

In [None]:
LOG_INTERVAL = 30
LOG_INTERVAL = 10
WORKERS = 3
N = 10000

def load_test(n=N, workers=WORKERS):
    filename = '.data/2020-12-20_biomass.csv.gz'
    with gzip.open(filename, 'rt', encoding='utf-8') as fp:
        start_time = time.time()
        reader = csv.DictReader(fp)

        logging.info(f"N={n} workers={workers}")

        if workers == 0:
            results = list(map(run_row, zip(range(n), reader)))
        else:        
            with multiprocessing.dummy.Pool(workers) as pool:
                results = pool.map(run_row, zip(range(n), reader), chunksize=4)
    
    elapsed_time = time.time() - start_time
    print(f"Processed {n} rows with workers={workers} avg={elapsed_time/n:0.3f}")
    
    with open('.data/2020-12-20_biomass.re-akpd.csv', 'w') as fp:
        fields = "id,pair_id,basedir,lx_coord,ly_coord,l_width,l_height,rx_coord,ry_coord,r_width,r_height,pixelCountWidth,pixelCountHeight," \
            "estimated_weight_g,estimated_k_factor,"\
            "annotation0,akpd_score0,estimated_weight_g0,estimated_k_factor0,"\
            "akpd_dist,hm,hm_mean,hm_min,weight_vec,akpd_score_vec,akpd_score_rate,weight_std,llv,rlv," \
            "leftCrop,rightCrop,probability,modelVerion,scorerVersion".split(",")
        writer = csv.DictWriter(fp, fields)        
        writer.writeheader()
        for result in results:
            r1 = result.copy()
            r1['leftCrop'] = json.dumps(r1['leftCrop'])
            r1['rightCrop'] = json.dumps(r1['rightCrop'])
            writer.writerow(r1)
        print(f"Written {len(results)} rows to {fp.name}")
                
    return results

#results = load_test(10000,workers=3)
results = load_test(200,workers=3)

In [None]:
a0 = json.loads(r0[0]['orig_annotation'])

In [None]:
x0,_=akpd_vector(a0) 
x1,_=akpd_vector(r0[0])
x1-x0

## hm analysis

In [None]:
KP= ["TAIL_NOTCH", "ADIPOSE_FIN", "UPPER_LIP", "ANAL_FIN", "PELVIC_FIN", "EYE", "PECTORAL_FIN", "DORSAL_FIN"]

In [None]:
hma = np.array([r['hm'] for r in results])
hma.shape

In [None]:
len(results)

In [None]:
hm1 = pd.DataFrame(hma[:,0:8],columns=KP)
hm1['side'] = 'left'
hm2 = pd.DataFrame(hma[:,8:],columns=KP)
hm2['side'] = 'right'
hm3 = pd.concat([hm1, hm2])
hm3 = hm3[hm3.median(axis=0).sort_values(ascending=False).index]

sorted_kp = list(hm3.index)
#sorted_kp = list(hm3.median(axis=0).sort_values(ascending=False).index)

hmm = hm3.melt(id_vars=['side'])
hmm

In [None]:
hm3.median(axis=0).sort_values(ascending=False).index

In [None]:
fig, ax = plt.subplots(figsize=(15,10))
sns.violinplot(x='variable', y='value', hue='side', split=True, data=hmm, order=sorted_kp, bw=.05, cut=0, ax=ax)

In [None]:
fig, ax = plt.subplots(figsize=(15,10))
sns.violinplot(x='variable', y='value', hue='side', split=True, data=hmm, order=sorted_kp, bw=.05, cut=0, ax=ax)

### KP  visualization 

In [None]:
cmap = plt.cm.get_cmap('Set3')
cmap = plt.cm.get_cmap('Pastel1')
cmap = plt.cm.get_cmap('Pastel2')
cmap = plt.cm.get_cmap('Accent')
colors = np.array(cmap.colors[:8])

def _enhance(image, clip_limit=5):
    # convert image to LAB color model
    image_lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
    # split the image into L, A, and B channels
    l_channel, a_channel, b_channel = cv2.split(image_lab)
    # apply CLAHE to lightness channel
    clahe = cv2.createCLAHE(clipLimit=clip_limit, tileGridSize=(8, 8))
    cl = clahe.apply(l_channel)
    # merge the CLAHE enhanced L channel with the original A and B channel
    merged_channels = cv2.merge((cl, a_channel, b_channel))
    # convert image from LAB color model back to RGB color model
    final_image = cv2.cvtColor(merged_channels, cv2.COLOR_LAB2BGR)
    return final_image

def parse_kps(ann):
    def _kps(points):
        return np.array([[p['xCrop'], p['yCrop']] for p in points])
    def _hm(points):
        return np.array([p.get('hm_max',0.0) for p in points])
    left_crop = json.loads(ann['leftCrop'])
    right_crop = json.loads(ann['rightCrop'])
    return _kps(left_crop), _hm(left_crop), _kps(right_crop), _hm(right_crop),

def show_img(ax, image_path, pts, hm):
    with image_path.open('rb') as fp:
        np_image = np.frombuffer(fp.read(), dtype=np.uint8)
    img = cv2.imdecode(np_image, cv2.IMREAD_COLOR)
#    if 'right' in str(image_path):
#        img = _enhance(img)
    img = _enhance(img)
    ax.imshow(img, alpha=0.6)

    s = 2000*(1-hm)**2 + 8
    alpha = np.clip(hm**1.5, 0.2, 1)
    rgba = np.hstack([colors, alpha.reshape((-1,1))])
    ax.scatter(x=pts[:,0], y=pts[:,1], marker='o', c=rgba, s=s)
#    ax.scatter(x=pts[:,0], y=pts[:,1], marker='o', facecolors='none', edgecolors=rgba, s=s)
#    ax.scatter(x=pts[:,0], y=pts[:,1], marker=',', s=1)

    outline = pts[[0,1,7,2,5,6,4,3]]
    ax.plot(outline[:,0], outline[:,1], c='orange', ls="-", lw=1.1, alpha=0.99)
    
    
def render_row(df, idx, figsize=(36,12)):
    row = df.iloc[idx]
    lkps, lhm, rkps, rhm = parse_kps(row)
    basedir = Path(row['basedir'])
    fig, ((ax1, ax2)) = plt.subplots(figsize=figsize, ncols=2, nrows=1, constrained_layout=False)
    #plt.subplots_adjust(hspace=50)
    show_img(ax1, basedir/'left.jpg', lkps, lhm)
    show_img(ax2, basedir/'right.jpg', rkps, rhm)
    #fig.subplots_adjust(top=0.6)
    fig.suptitle(f"akpd_score={row['akpd_score0']}", y=0.7)
    return ax1, ax2

In [None]:
ax1, ax2 = render_row(df_score, 5381, figsize=(16,8))

In [None]:
df_score = df.sort_values(by='akpd_score0')

In [None]:
#ax1, ax2 = render_row(results0, 284)
ax1, ax2 = render_row(df, 914)

In [None]:
# 106 -  3D

In [None]:
pk=parse_kps(results[12])
lc=pk[0]

In [None]:
lc[[0,1,7,2,5,6,4,3]][:,0]

In [None]:
hm=np.array([.01, .1, .5 ,.8, .99])
print(np.clip(hm, 0.1, 1)) # alpha
print(1000*(1-hm)**2+3)
print(hm**.1)

In [None]:
rgba_colors = np.zeros((10,4))
# for red the first column needs to be one
rgba_colors[:,0] = 1.0
# the fourth column needs to be your alphas
rgba_colors[:, 3] = alphas

## Blurries

In [None]:
def render_pair(pair_id, figsize=(36,12)):
    pair_id = pair_id.strip()
    pen_id = pair_id.partition('-')[0][1:]
    basedir = Path(f'.data/{pen_id}/{pair_id}')
    assert basedir.exists()
    fig, ((ax1, ax2)) = plt.subplots(figsize=figsize, ncols=2, nrows=1)
    img = _enhance(cv2.imread(str(basedir/'left.jpg')))
    ax1.imshow(img)
    ax2.imshow(_enhance(cv2.imread(str(basedir/'right.jpg'))))
    return ax1, ax2

def variance_of_laplacian(p):
    image = cv2.imread(str(p))
    image = _enhance(image)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)    
    return cv2.Laplacian(gray, cv2.CV_64F).var()

In [None]:
render_pair("""
p198-t20201220_025809.455983-c1l4r
""");

In [None]:
with open(".data/2020-12-28_biomass.1000.weight_vec.csv") as fp:
    reader = csv.DictReader(fp)
    for i, row in zip(range(80),reader):
        basedir = Path(row['basedir']) 
        llv = variance_of_laplacian(basedir/'left.jpg')
        rlv = variance_of_laplacian(basedir/'right.jpg')
        logging.info("{:40s} s={:0.3f} hm={:0.4f} {:7.2f} {:-9.5f} {:-9.5f}".format(
            row['pair_id'],
            float(row['akpd_score0']),
            float(row['hm_min']),
            float(row['weight_std']),
            llv,
            rlv,
        ))

## Skip classifier

In [None]:
from image_quality.skip_classifier import SkipPredictor

skip_pred = SkipPredictor(
    '.data/skip-classifier/model.pt',
    )


In [None]:
skip_left = lambda row: skip_pred.predict(cv2.imread(str(Path(row.basedir) / 'left.jpg')))
skip_right = lambda row: skip_pred.predict(cv2.imread(str(Path(row.basedir) / 'right.jpg')))

%time df['l_skip_score'] = df.apply(skip_left, axis=1)
%time df['r_skip_score'] = df.apply(skip_right, axis=1)

In [None]:
df.to_csv('.data/2020-12-30_biomass.re-akpd.skip_score.csv', index=False)

In [None]:
skip_left = lambda row: skip_pred.predict(cv2.imread(str(Path(row.basedir) / 'left.jpg')))
skip_right = lambda row: skip_pred.predict(cv2.imread(str(Path(row.basedir) / 'right.jpg')))

row=df.iloc[4]
print("{} {}".format(row.pair_id, row.akpd_score0))
print(skip_pred.predict(cv2.imread(str(Path(row.basedir) / 'left.jpg'))))
print(skip_pred.predict(cv2.imread(str(Path(row.basedir) / 'right.jpg'))))
render_pair(row.pair_id);

##  AKPD on thumb nail

In [None]:
df = pd.read_csv('.data/2020-12-30_biomass.re-akpd.skip_score.csv')

In [None]:
next_log_time = 0

def show_img_arr(ax, img, pts, hm, enhance_flag=False):
    if enhance_flag:
        img = _enhance(img)
    ax.imshow(img, alpha=0.999)

    # plot keypoints
    if pts is not None:
        s = 2000*(1-hm)**2 + 8
        alpha = np.clip(hm**1.5, 0.2, 1)
        rgba = np.hstack([colors, alpha.reshape((-1,1))])
        ax.scatter(x=pts[:,0], y=pts[:,1], marker='o', c=rgba, s=s)

        outline = pts[[0,1,7,2,5,6,4,3]]
        ax.plot(outline[:,0], outline[:,1], c='orange', ls="-", lw=1.1, alpha=0.9)
    
    
def render_row_tb(row, ann, l_img, r_img, figsize=(36,10)):
    fig, ((ax1, ax2)) = plt.subplots(figsize=figsize, ncols=2, nrows=1)
    lkps = lhm = rkps = rhm = None
    if ann:
        lkps, lhm, rkps, rhm = parse_kps(ann)
    show_img_arr(ax1, l_img, lkps, lhm)
    show_img_arr(ax2, r_img, rkps, rhm, enhance_flag=True)
    akpd_score1 = ann['probability'] if ann else -1
    fig.subplots_adjust(top=0.9)
    fig.suptitle(f"akpd_score 0->1\n\n{float(row.akpd_score0):0.3f}\n{akpd_score1:0.3f}\n\nsize = {row.l_crop_size:.0%}, {row.r_crop_size:.0%}", y=0.8)
    return ax1, ax2


def run_row_tb(args):
  try:
    _id, row = args
    basedir    = Path(row.basedir)
    left_path  = basedir / "left_tb.jpg"
    right_path = basedir / "right_tb.jpg"
    item = predict.Item(
        Id             = str(_id),
        left_crop_url  = str(left_path),
        right_crop_url = str(right_path),
        lcm            = {"x_coord": row.lx_coord, "y_coord": row.ly_coord},
        rcm            = {"x_coord": row.rx_coord, "y_coord": row.ry_coord},
        cm             = {"pixelCountWidth": row.pixelCountWidth, "pixelCountHeight": row.pixelCountHeight},
        _down_sample   = 1,
    )
    items = [item]
    predict.load_images(items)
    
    xscale = 512 / row.pixelCountWidth
    yscale = 512 / row.pixelCountHeight

    l_img0 = item.l_img
    r_img0 = item.r_img
    
    _xscale = lambda x: int(x*xscale)
    _yscale = lambda x: int(x*yscale)

    xs0 = _xscale(row.lx_coord)
    xs1 = _xscale(row.lx_coord + row.l_width)
    ys0 = _yscale(row.ly_coord)
    ys1 = _yscale(row.ly_coord + row.l_height)
    item.l_img = l_img0[ys0:ys1, xs0:xs1, :]
    item.l_img = cv2.resize(item.l_img, (row.l_width, row.l_height))
    
    xs0 = _xscale(row.rx_coord)
    xs1 = _xscale(row.rx_coord + row.r_width)
    ys0 = _yscale(row.ry_coord)
    ys1 = _yscale(row.ry_coord + row.r_height)
    item.r_img = r_img0[ys0:ys1, xs0:xs1, :]
    item.r_img = cv2.resize(item.r_img, (row.r_width, row.r_height))
    
    if RENDER:
        ax1, ax2 = render_row_tb(row, None, l_img0, r_img0)

        rect = patches.Rectangle((row.lx_coord*xscale, row.ly_coord*yscale), row.l_width*xscale, row.l_height*yscale, 
                                 linewidth=1, edgecolor='y', facecolor="None")
        ax1.add_patch(rect)     
        rect = patches.Rectangle((row.rx_coord*xscale, row.ry_coord*yscale), row.r_width*xscale, row.r_height*yscale, 
                                 linewidth=1, edgecolor='y', facecolor="None")
        ax2.add_patch(rect)     

    akpd_result = model.predict(items)[0]

    pts, hm = akpd_vector(akpd_result)
    result = dict(
        pts           = pts,
        tb_hm         = hm,
        tb_hm_min     = hm.min(),
        tb_akpd_score = akpd_result['probability'],
        akpd_score0   = row.akpd_score0
    )   
    
    if RENDER:
        ax1, ax2 = render_row_tb(row, akpd_result, item.l_img, item.r_img)
    
    # LOGGING
    global next_log_time
    t = time.time()
    if t > next_log_time:
        next_log_time = t + LOG_INTERVAL        
        logging.info("{:04d} {:40s} tb_score={:0.3f}; tb_hm_min={:0.4f}".format(
            _id, 
            row.pair_id, 
            result['tb_akpd_score'],
            result['tb_hm_min'],
        ))

    return result

  except:
    logging.exception(f"id={_id} something breaks")
        

In [None]:
# test run_row_tb
RENDER = 1
row = df.iloc[5]
print(akpd_vector(json.loads(row.annotation0))[0])
run_row_tb((-1, row))

In [None]:
RENDER = 0
LOG_INTERVAL = 15

def run_tb_akpd(n, workers):
    start_time = time.time()
    logging.info(f"N={n} workers={workers}")

    if workers == 0:
        results = list(map(run_row_tb, zip(range(n), df.itertuples(index=False))))
    else:        
        with multiprocessing.dummy.Pool(workers) as pool:
            results = pool.map(run_row_tb, zip(range(n), df.itertuples(index=False)), chunksize=4)
    
    elapsed_time = time.time() - start_time
    print(f"Processed {n} rows with workers={workers} avg={elapsed_time/n:0.3f}")
    
    return results

results = run_tb_akpd(10000, workers=3)
len(results)

In [None]:
results[0]

In [None]:
df1 = pd.DataFrame(results)
df['tb_hm'] = df1['tb_hm']
df['tb_hm_min'] = df1['tb_hm_min']
df['tb_akpd_score'] = df1['tb_akpd_score']

In [None]:
df.to_csv('.data/2020-12-30_biomass.re-akpd.tb_score.csv', index=False)

In [None]:
json.dump(results,open('tb.json','w'))

## AKPD score

In [None]:
from operator import attrgetter, itemgetter

akpd_scores = []
filename = '.data/2020-12-20_biomass.csv.gz'
with gzip.open(filename, 'rt', encoding='utf-8') as fp:
        reader = csv.DictReader(fp)
        for row in reader:
            akpd_scores.append(float(row['akpd_score']))

for r, s in zip(results, akpd_scores):
    r['akpd_score'] = s

results1 = [r for r in results if r['akpd_score'] >= 0.5]
results0 = [r for r in results if r['akpd_score'] < 0.5]

results1.sort(key=itemgetter('akpd_score'))
results0.sort(key=itemgetter('akpd_score'), reverse=True)
print(f"results1={len(results1)}, results0={len(results0)}")

akpd_scores = np.array(akpd_scores)
akpd_scores


In [None]:
(akpd_scores > 0.5).sum()

In [None]:
fig, ax = plt.subplots()
ax.hist(akpd_scores, bins=30)
ax.set_title('akpd scores');

## Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression
import sklearn.metrics
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [None]:
df = pd.read_csv('.data/2020-12-29_biomass.re-akpd.dimensions.csv')

In [None]:
df['lx_off'] = (df.lx_coord + df.l_width / 2) / df.pixelCountWidth                                                     
df['ly_off'] = (df.ly_coord + df.l_height / 2) / df.pixelCountHeight 
df['l_trim_l'] = (df.lx_coord == 0).astype(int)
df['l_trim_r'] = (df.lx_coord + df.l_width + 1 >= df.pixelCountWidth).astype(int)
df['l_trim_t'] = (df.ly_coord == 0).astype(int)
df['l_trim_b'] = (df.ly_coord + df.l_height + 1 >= df.pixelCountHeight).astype(int)
df['l_crop_size'] = ((df.l_width + 1) * (df.l_height + 1)) / (df.pixelCountWidth * df.pixelCountHeight)

df['rx_off'] = (df.rx_coord + df.r_width / 2) / df.pixelCountWidth                                                     
df['ry_off'] = (df.ry_coord + df.r_height / 2) / df.pixelCountHeight 
df['r_trim_l'] = (df.rx_coord == 0).astype(int)
df['r_trim_r'] = (df.rx_coord + df.r_width + 1 >= df.pixelCountWidth).astype(int)
df['r_trim_t'] = (df.ry_coord == 0).astype(int)
df['r_trim_b'] = (df.ry_coord + df.r_height + 1 >= df.pixelCountHeight).astype(int)
df['r_crop_size'] = ((df.r_width + 1) * (df.r_height + 1)) / (df.pixelCountWidth * df.pixelCountHeight)

In [None]:
y = df.akpd_score0 >= 0.5
HM = np.array(list(df.hm.apply(json.loads)))
TB_HM = np.array(list(df.tb_hm))
TB_AKPD_OK = df.tb_akpd_score >= 0.5

In [None]:
X_misc = df[[
#'lx_off',
#'ly_off',
#'l_trim_l',
#'l_trim_r',
#'l_trim_t',
#'l_trim_b',
#'l_crop_size',
#'rx_off',
#'ry_off',
#'r_trim_l',
#'r_trim_r',
#'r_trim_t',
#'r_trim_b',
#'r_crop_size',    
#'llv',
#'rlv',
#'l_skip_score',
#'r_skip_score',
#'hm_min',
#'hm_mean',
'tb_hm_min',
'tb_akpd_score',
]].to_numpy()

X = HM
X = np.hstack([TB_AKPD_OK.astype(float).to_numpy().reshape(-1,1),X_misc])
X = TB_AKPD_OK.astype(float).to_numpy().reshape(-1,1)
X = TB_AKPD_OK.astype(float).to_numpy().reshape(-1,1)
X = TB_AKPD_OK.astype(float).to_numpy().reshape(-1,1)
X = np.hstack([TB_AKPD_OK.astype(float).to_numpy().reshape(-1,1),X_misc])
X = TB_HM
X = X_misc
X = np.hstack([TB_HM,X_misc])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=437)
print((X_train.shape, X_test.shape, y_train.shape, y_test.shape))

scaler = StandardScaler().fit(X_train)

In [None]:
clf = LogisticRegression(random_state=0).fit(scaler.transform(X_train), y_train)
print(clf.coef_)
train_score = clf.score(scaler.transform(X_train), y_train)
test_score = clf.score(scaler.transform(X_test), y_test)
print(f"training score {train_score}")
print(f"test score {test_score}")
sklearn.metrics.confusion_matrix(clf.predict(scaler.transform(X_test)),y_test)

In [None]:
def plot_precision_recall_curve(clf, X_test, y_test, title):
    lr_probs = clf.predict_proba(X_test)
    # keep probabilities for the positive outcome only
    lr_probs = lr_probs[:, 1]
    yhat = clf.predict(X_test)
    lr_precision, lr_recall, _ = sklearn.metrics.precision_recall_curve(y_test, lr_probs)
    lr_f1 = sklearn.metrics.f1_score(y_test, yhat)
    lr_auc = sklearn.metrics.auc(lr_recall, lr_precision)

    no_skill = len(y_test[y_test==1]) / len(y_test)
    plt.plot([0, 1], [no_skill, no_skill], linestyle='--', label='No Skill')
    plt.plot(lr_recall, lr_precision, marker='.', label='Logistic')
    plt.ylabel('Precision')
    plt.xlabel('Recall\n\n' + 'f1=%.5f auc=%.5f' % (lr_f1, lr_auc))
    plt.title(title)
    plt.legend()
    return lr_recall, lr_precision, _

lr_recall, lr_precision, _ = plot_precision_recall_curve(clf, X_test, y_test, 'AKPD score on Thumbnail + min heatmap')

### AUC

In [None]:
y_pred = clf.predict_proba(scaler.transform(X_test))[:,1]

In [None]:
fpr, tpr, thresholds = sklearn.metrics.precision_recall_curve(y_test, y_pred)
auc = sklearn.metrics.roc_auc_score(y_test, y_pred)
plot_roc(fpr, tpr, auc, plt.gca())

In [None]:
clf.predict_proba(X_test[0:10])
#sklearn.metrics.confusion_matrix(clf.predict(scaler.transform(X_test)),y_test)

In [None]:
sklearn.metrics.confusion_matrix(X_test,y_test)

In [None]:
X_test[:,0].sum()

In [None]:
print(clf.coef_[0,:8])
print(clf.coef_[0,8:16])
print(clf.coef_[0,16:])

In [None]:
def plot_roc(fpr, tpr, auc, ax):
    lw = 2
    ax.plot(fpr, tpr, color='darkorange',
             lw=lw, label='AUC = %0.2f' % auc)
    ax.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
    ax.set_xlim([0.0, 1.0])
    ax.set_ylim([0.0, 1.05])
    ax.set_xlabel('False Positive Rate (skiprate)')
    ax.set_ylabel('Recall (KPI)')
    ax.set_title(f'ROC Curve', size=20)
    ax.legend(loc="lower right")

In [None]:
score = -df.r_skip_score
fpr, tpr, thresholds = sklearn.metrics.roc_curve(y, score)
auc = sklearn.metrics.roc_auc_score(y, score)
plot_roc(fpr, tpr, auc, plt.gca())

## profiler

In [None]:
import cProfile
import pstats
from pstats import SortKey

In [None]:
cProfile.run("load_test(200,0)", "restats")
p = pstats.Stats('restats')
p.sort_stats(SortKey.CUMULATIVE).print_stats(30)

In [None]:
p.sort_stats(SortKey.TIME).print_stats(30)

```
Orig
2020-12-24 18:32:06,106 INFO  N=500 WORKERS=6
2020-12-24 18:32:07,924 INFO  000 p67-t20201220_000004.170682-c1l2r        l0=0.653 r1=0.6793
2020-12-24 18:32:18,476 INFO  050 p193-t20201220_000416.820198-c1l2r       l0=0.728 r1=0.6188
2020-12-24 18:32:35,205 INFO  100 p86-t20201220_000900.746319-c1l2r        l0=0.699 r1=0.7934
2020-12-24 18:32:43,916 INFO  150 p86-t20201220_001215.793702-c1l2r        l0=0.612 r1=0.0114
2020-12-24 18:32:59,820 INFO  200 p138-t20201220_001653.210747-c1l2r       l0=0.630 r1=0.1509
2020-12-24 18:33:12,730 INFO  250 p4-t20201220_002520.027040-c1l3r         l0=0.465 r1=0.0393
2020-12-24 18:33:24,633 INFO  300 p85-t20201220_003124.881337-c1l2r        l0=0.008 r1=0.0094
2020-12-24 18:33:37,326 INFO  350 p85-t20201220_003625.595207-c1l2r        l0=0.687 r1=0.8297
2020-12-24 18:33:49,364 INFO  400 p85-t20201220_004024.373046-c1l2r        l0=0.648 r1=0.9228
2020-12-24 18:34:01,708 INFO  450 p86-t20201220_004418.193731-c1l2r        l0=0.690 r1=0.7138

Pair
2020-12-24 18:46:31,496 INFO  N=500 WORKERS=1
2020-12-24 18:46:32,058 INFO  000 p67-t20201220_000004.170682-c1l2r        l0=0.653 r1=0.6793
2020-12-24 18:46:56,986 INFO  050 p193-t20201220_000416.820198-c1l2r       l0=0.728 r1=0.6188
2020-12-24 18:47:25,557 INFO  100 p86-t20201220_000900.746319-c1l2r        l0=0.699 r1=0.7934
2020-12-24 18:47:51,579 INFO  150 p86-t20201220_001215.793702-c1l2r        l0=0.612 r1=0.0114
2020-12-24 18:48:17,300 INFO  200 p138-t20201220_001653.210747-c1l2r       l0=0.630 r1=0.1509
2020-12-24 18:48:42,029 INFO  250 p4-t20201220_002520.027040-c1l3r         l0=0.465 r1=0.0393
2020-12-24 18:49:08,200 INFO  300 p85-t20201220_003124.881337-c1l2r        l0=0.008 r1=0.0094
2020-12-24 18:49:34,432 INFO  350 p85-t20201220_003625.595207-c1l2r        l0=0.687 r1=0.8297
2020-12-24 18:50:00,873 INFO  400 p85-t20201220_004024.373046-c1l2r        l0=0.648 r1=0.9228
2020-12-24 18:50:24,729 INFO  450 p86-t20201220_004418.193731-c1l2r        l0=0.690 r1=0.7138
```

### Left and right separate

- Processed 500 rows with workers=1 avg=0.560 -> 500
- Processed 500 rows with workers=2 avg=0.330 -> 500
- Processed 500 rows with workers=3 avg=0.275 -> 500
- Processed 500 rows with workers=4 avg=0.271 -> 500
- Processed 500 rows with workers=5 avg=0.270 -> 500
- Processed 500 rows with workers=6 avg=0.269 -> 500

### Input as pair

- Processed 500 rows with workers=1 avg=0.519 -> 500
- Processed 500 rows with workers=2 avg=0.303 -> 500
- Processed 500 rows with workers=3 avg=0.266 -> 500
- Processed 500 rows with workers=4 avg=0.257 -> 500
- Processed 500 rows with workers=5 avg=0.259 -> 500
- Processed 500 rows with workers=6 avg=0.251 -> 500


In [None]:
left_path, row['left_crop_url']

In [None]:
row['left_crop_url'].partition('.amazonaws.com')[2]

## Look at internals

In [None]:
self = model
l_input = self.model.get_tensor_by_name(self.config['input_name'])
l_output = self.model.get_tensor_by_name(self.config['output_name'])
item = items[0]
#self.process(item.l_img, self._sess, item.lcm['x_coord'], item.lcm['y_coord'], l_input, l_output)
img = item.l_img
height, width, _ = img.shape
img0 = self.prepare(item.l_img)
imgs = np.stack([img0])
predict_heatmap = self._sess.run(l_output, feed_dict = {l_input: imgs})
final_stage_heatmap = predict_heatmap.squeeze()

In [None]:
print(imgs.shape)
predict_heatmap.shape, final_stage_heatmap.shape

In [None]:
heatmaps, img_height, img_width, x_offset, y_offset = \
  final_stage_heatmap, height, width, item.lcm['x_coord'], item.lcm['y_coord']


In [None]:
maxindex = hm.argmax()
np.unravel_index(hm.argmax(), hm.shape)

In [None]:
np.where(hm == hm.max())

In [None]:
c=6
hm = cv2.resize(heatmaps[..., c], (img_width, img_height))
hm_max = np.where(hm == hm.max())
coordinates = [int(np.mean(hm_max[1])), int(np.mean(hm_max[0]))]
keypoint_x = int(coordinates[0])
keypoint_y = int(coordinates[1])
coordinates

In [None]:
hm.max()

In [None]:
hm.shape, hm_max

In [None]:
fog, ax = plt.subplots(ncols=4, figsize=(15,5))
ax[0].imshow(heatmaps[..., 0])
ax[1].imshow(heatmaps[..., 1])
ax[2].imshow(heatmaps[..., 2])
ax[3].imshow(heatmaps[..., 6])

In [None]:
fog, ax = plt.subplots(ncols=3, figsize=(15,5))
ax[0].imshow(img)
ax[1].imshow(img0[0,...] + 0.5)


## TensorRT

In [None]:
import tensorflow as tf
import tensorflow.contrib.tensorrt as trt
self = model

In [None]:
items = [predict.Item(
    Id             = row["id"],
    left_crop_url  = row["leftCropUrl"],
    right_crop_url = row["rightCropUrl"],
    lcm            = {"x_coord": row["leftCropMetadata"]["x_coord"], "y_coord": row["leftCropMetadata"]["y_coord"]},
    rcm            = {"x_coord": row["rightCropMetadata"]["x_coord"], "y_coord": row["rightCropMetadata"]["y_coord"]},
    cm             = {"pixelCountWidth": row["cameraMetadata"]["pixelCountWidth"], "pixelCountHeight": row["cameraMetadata"]["pixelCountHeight"]},
) for i, row in enumerate(body)]
item = items[0]
predict.load_images(items)
logging.info(f"loaded images: {len(items)}")

In [None]:
with tf.gfile.GFile(".data/model.pb", "rb") as f:
    graph_def = tf.GraphDef()
    graph_def.ParseFromString(f.read())

converted_graph_def = trt.create_inference_graph(
    input_graph_def = graph_def,
    outputs = ["stage_3/mid_conv7/BiasAdd:0"],
    precision_mode='FP16',
#    precision_mode="INT8",
#    use_calibration=True,
)


gd0 = graph_def
gd1 = converted_graph_def

In [None]:
with tf.Graph().as_default() as graph:
    tf.import_graph_def(gd1,
                        input_map=None,
                        return_elements=None,
                        name="",
                        op_dict=None,
                        producer_op_list=None)
self._model = graph
config = tf.ConfigProto()
config.gpu_options.allow_growth=True
config.gpu_options.per_process_gpu_memory_fraction = 1/2
self._sess = tf.Session(graph=self.model, config=config)

In [None]:
result = model.predict(items)
result[0]["leftCrop"][0]

In [None]:
result = model.predict(items)
result[0]["leftCrop"][0]

In [None]:
tf.global_variables_initializer()
l_input = self._model.get_tensor_by_name(self.config['input_name'])
l_output = self._model.get_tensor_by_name(self.config['output_name'])
img0 = self.prepare(item.l_img, 1)
img1 = self.prepare(item.r_img, 1)
imgs = np.stack([img0, img1])

In [None]:
H = self._sess.run(l_output, feed_dict = {l_input: imgs})

In [None]:
%timeit H = self._sess.run(l_output, feed_dict = {l_input: imgs})

In [None]:
26000

In [None]:
170000*.04

In [None]:
26000-_

In [None]:
_/170000