In [None]:
import sys
import cv2
import math
import os
import numpy as np
import pandas as pd
import random
from sklearn import feature_extraction
import matplotlib.pyplot as plt
%matplotlib inline

## Preprocessing Images

In [None]:
from skimage.measure import compare_ssim
import glob

img_list = glob.glob('mini_images/final_positives_diff_5/*')
transition_path = 'mini_images/transitions_renamed/'

diff_data = []
labels = np.array([])

for img_name in img_list:
    img = cv2.imread(img_name)[:,:,0]
    img_name_list = img_name.split('/')[-1].split('.')[0].split('_')
    img_name_list[0] = str(int(img_name_list[0])-5)
    img_prev_name = '_'.join(img_name_list) + '.jpg'
    img_prev_name = transition_path + img_prev_name
    img_prev = cv2.imread(img_prev_name)[:,:,0]
    (score, diff) = compare_ssim(img, img_prev, full=True)
    diff = (diff * 255).astype("uint8")
    diff_data.append(np.concatenate((diff.flatten(), img.flatten()), axis=0))
    labels  = np.append(labels, [1])

In [None]:
img_list = glob.glob('mini_images/final_negatives/*')
transition_path = 'mini_images/transitions_renamed/'
transition_list = glob.glob('mini_images/transitions_renamed/*')


for img_name in img_list:
    img = cv2.imread(img_name)[:,:,0]
    img_name_list = img_name.split('/')[-1].split('.')[0].split('_')
    img_name_list[0] = str(int(img_name_list[0])-5)
    img_prev_name = '_'.join(img_name_list) + '.jpg'
    img_prev_name = transition_path + img_prev_name
    if img_prev_name in transition_list:
        img_prev = cv2.imread(img_prev_name)[:,:,0]
        (score, diff) = compare_ssim(img, img_prev, full=True)
        diff = (diff * 255).astype("uint8")
#         img = cv2.normalize(img, None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX)        # Normalize data
        diff_data.append(np.concatenate((diff.flatten(), img.flatten()), axis=0))
        labels  = np.append(labels, [0])

### Normalize Data

In [None]:
norm_data = []

for row in total_data:
    img_norm = cv2.normalize(row, None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX).flatten()
    norm_data.append(img_norm)

### Histogram

In [None]:
def compute_hist(img):
    return cv2.calcHist([img],[0],None,[256],[0,256]).flatten()

In [None]:
def compute_hist_multiple(img):
    return np.concatenate((cv2.calcHist([img[:6400]],[0],None,[256],[0,256]).flatten(),
                          cv2.calcHist([img[6400:]],[0],None,[256],[0,256]).flatten()), axis=0)

In [None]:
hist_data = []

for row in diff_data:
    hist_data.append(compute_hist_multiple(row))

### Classification

In [None]:
# Classification
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier

X_train, X_test, y_train, y_test = train_test_split(hist_data, labels, test_size=0.3)

clf_xgb = XGBClassifier(max_depth=3, learning_rate=0.2)
clf_rf = RandomForestClassifier(n_estimators=100, max_depth=10, min_samples_split=5, random_state=0, class_weight='balanced')
clf_xgb.fit(np.array(X_train), y_train)
clf_rf.fit(np.array(X_train), y_train)

In [None]:
pred_xgb = clf_xgb.predict(np.array(X_test))
pred_rf = clf_rf.predict(np.array(X_test))
pred = np.multiply(pred_xgb, pred_rf)

In [None]:
print sum(pred_xgb)*1.0/len(pred_xgb)
print sum(y_test)*1.0/len(y_test)

In [None]:
print sum(pred_rf)*1.0/len(pred_rf)
print sum(y_test)*1.0/len(y_test)

In [None]:
print sum(pred == y_test)*1.0/len(y_test)

In [None]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, pred)
print cm
print 'missed waggles', cm[1,0]*1.0/(cm[1,0]+cm[1,1])

## Batch Processing

In [None]:
# Predicting frames of video -> last five frames made 0


cap = cv2.VideoCapture('videos/newvid_out3.MP4')
frame_num = 0
cut_size = 40
last_five_frames = []
this_w_pred = {}
prev_w_pred = {}


test_data_list = []


for frame_num in range(0,3000):
    ret, img = cap.read()
    if ret:
        print frame_num
        img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        if frame_num % 10 == 0 and frame_num > 0:
            for cnt_x in range(cut_size, img_gray.shape[1]-cut_size, cut_size/2):
                for cnt_y in range(cut_size, img_gray.shape[0]-cut_size, cut_size/2):
                    img_square = img_gray[cnt_y-cut_size:cnt_y+cut_size, cnt_x-cut_size:cnt_x+cut_size]
                    img_prev_square = last_five_frames[0][cnt_y-cut_size:cnt_y+cut_size, cnt_x-cut_size:cnt_x+cut_size]
                    (score, diff) = compare_ssim(img_square, img_prev_square, full=True)
                    diff = (diff * 255).astype("uint8")
                    test_input = compute_hist_multiple(np.concatenate((diff.flatten(), img_square.flatten()), axis=0)).reshape(1,512)
                    test_data = np.concatenate((np.array([[frame_num, cnt_x, cnt_y]]),test_input), 1)
                    test_data_list.append(test_data[0])
            last_five_frames = last_five_frames[-5:]
            prev_w_pred = this_w_pred
        last_five_frames.append(img_gray)


In [None]:
columns = ['frame_num','x', 'y'] + range(512)
test_data_df = pd.DataFrame(test_data_list, columns=columns)

In [None]:
train_cols = range(512)                    
preds = clf_xgb.predict(test_data_df[train_cols].as_matrix())*clf_rf.predict(test_data_df[train_cols].as_matrix())
test_data_df['preds'] = preds

In [None]:
cap = cv2.VideoCapture('videos/newvid_out3.MP4')
fourcc = cv2.VideoWriter_fourcc(*'XVID')
video = cv2.VideoWriter('batch_test.avi',fourcc,30.0,(960,540))
for frame_num in range(0,200):
    ret, img = cap.read()
    if ret:
        for idx, row in test_data_df[(test_data_df['preds'] == 1.0) & (test_data_df['frame_num'] == frame_num)].iterrows():
            cv2.circle(img,(int(row['x']),int(row['y'])),5,(0,0,255),-1)
        video.write(cv2.resize(img, (960, 540)))
video.release()