## 1. Imports and Paths

In [1]:
# Playing with labeled image data
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import json

from helper_functions import HarrisKeypointDetector, SimpleFeatureDescriptor, \
    ORB_SIFT_FeatureDescriptor, extract_color_moments, hu_moments

In [2]:
# Define directories
base_dir = '/Users/jetcalz07/Desktop/MIDS/W281_CV/logo_detection_w281/'
image_dir = base_dir + 'data/images/'
bbox_dir = base_dir + 'data/yolofinalset/obj_train_data/'
preproc_dir = base_dir + 'data/preprocessed/'
da_path = preproc_dir + 'da/'
bb_path = preproc_dir + 'bb/'

## 2. Parse Splits
Goal: For each split, get a list of image paths that we can load and loop through later

In [3]:
# Step 1: Get a list of BASE image paths to use when sorting the bbox images
split_file = preproc_dir + 'train_test_split_img.json'
with open(split_file) as f:
    splits = json.load(f)

# Step 2: Get the augmented data maps aligned with the base image splits
bb_map_file = preproc_dir + 'preproc_map_bb.json'
bb_map = pd.read_json(bb_map_file).T
bb_map.index = bb_map.index.set_names(['img_path'])
bb_map.reset_index(level=0, inplace=True)
bb_map['image_name'] = bb_map['image_source'].apply(lambda x: x.split('/')[-1])

# Step 3: get bb image list fro val and test 
val_df = bb_map.loc[bb_map['image_name'].isin(list(splits['X_val'])), ].copy()
val_df['split'] = 'Val'
#val_bbs = bb_map.loc[bb_map['image_name'].isin(list(splits['X_val'])), 'index'].values
test_df = bb_map.loc[bb_map['image_name'].isin(list(splits['X_test'])), ].copy()
test_df['split'] = 'Test'
#test_bbs = bb_map.loc[bb_map['image_name'].isin(list(splits['X_test'])), 'index'].values
print(f"Val bb count: {len(val_df)}")
print(f"Test bb count: {len(test_df)}")

# Step 4: get da image list for train
da_map_file = preproc_dir + 'preproc_map_da.json'
da_map = pd.read_json(da_map_file).T
da_map.index = da_map.index.set_names(['img_path'])
da_map.reset_index(level=0, inplace=True)
da_map['image_name'] = da_map['image_source'].apply(lambda x: x.split('/')[-1])
train_df = da_map.loc[da_map['image_name'].isin(list(splits['X_train'])), ].copy()
train_df['split'] = 'Train'
#train_bbs = da_map.loc[da_map['image_name'].isin(list(splits['X_train'])), 'index'].values
print(f"Train da_bb count: {len(train_df)}")

# Step 5: Combine the train, test, val dfs to extract features in same loop
loop_df = pd.concat([train_df, val_df, test_df], axis=0)

# To-DO: Check agains the size of the directory
train_count = ''
print(f"Checking train bb count: {len(train_count)}")

Val bb count: 361
Test bb count: 434
Train da_bb count: 10789
Checking train bb count: 0


In [4]:
# REMOVE AFTER LOADING ALL PREPROCESSED LOCALLY
paths_list = os.listdir(da_path)
paths_list.extend(os.listdir(bb_path))
#loop_df = train_df.loc[train_df['img_path'].isin(os.listdir(da_path)), ].copy()
loop_df = loop_df.loc[loop_df['img_path'].isin(paths_list), ].copy()
print(len(loop_df))

27


## 3. Extract Features
Plan: Loop through each train, val, test list and extract features

In [5]:
## Initialize empty df for extracted features
features = pd.DataFrame({
    'image_path': pd.Series(dtype='str'),
    'image_source': pd.Series(dtype='str'),
    'bbox_source': pd.Series(dtype='str'),
    'class': pd.Series(dtype='str'),
    'class_code':  pd.Series(dtype='int'),
    'split': pd.Series(dtype='str'),
    'simple_feature': pd.Series(dtype='float32'),
    'harris_orb_feature': pd.Series(dtype='float32'),
    'orb_kp_feature': pd.Series(dtype='float32'),
    'harris_sift_feature': pd.Series(dtype='float32'),
    'sift_kp_feature': pd.Series(dtype='float32'),
    })

In [10]:
## Train Loop
for idx, row in loop_df.iterrows():
    # Step 1: load rgb and gray images
    im_path = da_path if row['split']=='Train' else bb_path
    bbox_rgb = plt.imread(im_path + row['img_path']) # read img
    bbox_gray = cv2.cvtColor(bbox_rgb, cv2.COLOR_RGB2GRAY) # convert to gray
    bbox_hsv = cv2.cvtColor(bbox_rgb, cv2.COLOR_RGB2HSV) # convert to HSV
    bbox_ycrcb = cv2.cvtColor(bbox_rgb, cv2.COLOR_RGB2YCR_CB) # convert to YCRCB

    # Step 2: extract gray features
    harris_kp = HarrisKeypointDetector(bbox_gray, n=2, w=3, k=0.04, p=0.9, verbose=False)
    simple_feature = SimpleFeatureDescriptor(bbox_gray, harris_kp, w=5)
    harris_orb_feature = ORB_SIFT_FeatureDescriptor(bbox_gray, use_orb = True, use_harris=True, nfeatures=10, harris_kp=harris_kp)
    orb_kp_feature = ORB_SIFT_FeatureDescriptor(bbox_gray, use_orb = True, use_harris=False, nfeatures=10, harris_kp=None)
    harris_sift_feature = ORB_SIFT_FeatureDescriptor(bbox_gray, use_orb = False, use_harris=True, nfeatures=10, harris_kp=harris_kp)
    sift_kp_feature = ORB_SIFT_FeatureDescriptor(bbox_gray, use_orb = False, use_harris=False, nfeatures=10, harris_kp=None)
    hu_mom = hu_moments(bbox_gray)

    # Step 3: extract color features
    cm_rgb_mean, cm_rgb_var, cm_rgb_skew = extract_color_moments(bbox_rgb)
    cm_hsv_mean, cm_hsv_var, cm_hsv_skew = extract_color_moments(bbox_hsv)
    cm_ycrcb_mean, cm_ycrcb_var, cm_ycrcb_skew = extract_color_moments(bbox_ycrcb)

    # Append new cols to df
    row_features = {
        'image_path': row['img_path'],
        'image_source': row['image_name'],
        'bbox_source': row['bbox_source'],
        'class': row['class'],
        'class_code':  row['class_code'],
        'split': row['split'],
        'simple_feature': simple_feature,
        'harris_orb_feature': harris_orb_feature,
        'orb_kp_feature': orb_kp_feature,
        'harris_sift_feature': harris_sift_feature,
        'sift_kp_feature': sift_kp_feature,
        'hu_moments': hu_mom,
        'cm_rgb_mean': cm_rgb_mean,
        'cm_rgb_var': cm_rgb_var,
        'cm_rgb_skew': cm_rgb_skew,
        'cm_hsv_mean': cm_hsv_mean,
        'cm_hsv_var': cm_hsv_var,
        'cm_hsv_skew': cm_hsv_skew,
        'cm_ycrcb_mean': cm_ycrcb_mean,
        'cm_ycrcb_var': cm_ycrcb_var,
        'cm_ycrcb_skew': cm_ycrcb_skew,
        }
    features = features.append(row_features, ignore_index=True)

In [13]:
features.head()

Unnamed: 0,image_path,image_source,bbox_source,class,class_code,split,simple_feature,harris_orb_feature,orb_kp_feature,harris_sift_feature,...,cm_hsv_mean,cm_hsv_skew,cm_hsv_var,cm_rgb_mean,cm_rgb_skew,cm_rgb_var,cm_ycrcb_mean,cm_ycrcb_skew,cm_ycrcb_var,hu_moments
0,007762_00_bb_cn_01_da.jpg,007762.jpg,/yolofinalset/obj_train_data/007762.txt,ups,9,Train,"[[235.0, 182.0, 99.0, 48.0, 53.0, 114.0, 70.0,...","[[244, 162, 136, 184, 172, 169, 64, 143, 210, ...","[[49, 159, 241, 69, 151, 31, 143, 175, 94, 165...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",...,"[66.843575, 85.198685, 143.00562]","[44.03637158581885, 49.20866960703483, 44.2360...","[57.06471769083804, 46.907796934682, 76.568763...","[135.63927, 122.2642, 105.42857]","[48.24468383904273, 48.04740744366385, 13.1115...","[82.20428470613402, 74.3073346848345, 49.55403...","[124.3398, 136.05602, 117.33165]","[44.38581399502851, 6.44269098949583, -15.5547...","[72.83247996367594, 10.740798977334148, 20.812...","[[2.5834853971628804, 6.615548235842453, 9.466..."
1,007762_00_bb_cn_05_da.jpg,007762.jpg,/yolofinalset/obj_train_data/007762.txt,ups,9,Train,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...","[[22, 41, 174, 222, 25, 240, 27, 65, 19, 204, ...","[[2, 248, 61, 97, 75, 237, 185, 238, 170, 123,...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",...,"[82.56174, 92.85656, 158.00989]","[-25.99312418546794, 81.45861440615242, -25.83...","[39.446372444848116, 86.88629193410557, 51.226...","[120.641594, 133.52646, 148.39737]","[-50.70741625313586, -46.98765201668591, 9.530...","[78.02708047850764, 70.62955915588626, 48.2108...","[131.37062, 120.343285, 137.60922]","[-43.794253221996826, -8.010189954720355, 16.5...","[69.14655348405229, 10.620294995117195, 20.557...","[[2.6073822893201055, 7.149638409360605, 9.658..."
2,007729_00_bb_cn_01_da.jpg,007729.jpg,/yolofinalset/obj_train_data/007729.txt,ups,9,Train,"[[63.0, 53.0, 75.0, 96.0, 110.0, 58.0, 83.0, 1...","[[39, 13, 115, 75, 247, 251, 163, 226, 93, 65,...","[[35, 106, 40, 27, 13, 189, 176, 105, 76, 84, ...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",...,"[62.035408, 56.532536, 157.0879]","[36.87219120023084, 56.832957298563976, -48.44...","[44.27939601868092, 48.96045638016493, 68.2989...","[145.76274, 148.06302, 141.07286]","[-48.517627111803044, -52.93045473484804, -48....","[76.42337801090564, 74.77819920845438, 60.6660...","[146.58325, 127.43336, 124.88072]","[-50.47412173930094, -6.3120285618586305, 9.17...","[73.22324878399756, 6.792664825752828, 12.4913...","[[2.3634456162413886, 5.454849019796574, 8.628..."
3,007729_00_bb_cn_02_da.jpg,007729.jpg,/yolofinalset/obj_train_data/007729.txt,ups,9,Train,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...","[[115, 129, 105, 72, 213, 86, 121, 73, 81, 85,...","[[47, 187, 224, 18, 217, 149, 190, 223, 155, 1...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",...,"[66.32119, 61.66391, 148.88284]","[34.74491694555355, 62.48506479113207, -42.721...","[45.57819607035312, 55.713400023671745, 66.792...","[135.51367, 138.64299, 135.56056]","[-35.82414157630003, -46.88085501059015, -47.2...","[74.79896171066349, 74.22227353736001, 62.0698...","[137.35065, 126.69788, 126.97758]","[-42.896590309430074, -6.318302700403681, 10.0...","[72.49549691099462, 7.376898758379183, 11.9489...","[[2.3235575299771076, 5.520672566232386, 9.171..."
4,007729_00_bb_cn_03_da.jpg,007729.jpg,/yolofinalset/obj_train_data/007729.txt,ups,9,Train,"[[12.0, 24.0, 95.0, 134.0, 135.0, 13.0, 38.0, ...","[[89, 171, 134, 152, 138, 181, 72, 87, 167, 17...","[[120, 36, 252, 229, 165, 110, 121, 155, 63, 5...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",...,"[85.911896, 79.140465, 120.54265]","[-38.112002165415646, 51.36336313593811, 53.74...","[49.45696599409768, 56.00929732265917, 70.7372...","[105.897064, 103.45844, 109.378944]","[53.40329075219501, 58.71783903289473, 54.6285...","[77.65830522253243, 76.10557126035138, 62.8079...","[104.86863, 128.7419, 130.55351]","[55.83948422423258, 6.8074874158826875, -11.12...","[74.59465173747877, 7.207752979912637, 12.0429...","[[2.3444672579562935, 5.435019293793551, 8.743..."


In [None]:
#features.to_csv()