## Load necessary modules

In [1]:
import os

import warnings
warnings.filterwarnings("ignore", message="numpy.dtype size changed")

#Setting logging to logfile and console
import time
import datetime
import logging

ts = time.time()
timestamp = datetime.datetime.fromtimestamp(ts).strftime('%m-%d-%H-%M')
logfile = 'data/logs/retinanet-{}.log'.format(timestamp)

logging.basicConfig(filename=logfile,level=logging.INFO)

def log_print(*args):
    print(" ".join(map(str, args)))
    logging.info(" ".join(map(str, args)))
    

def get_session():
    """ Construct a modified tf session.
    """
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    return tf.Session(config=config)

log_print("Created log {}.".format(logfile))

Created log data/logs/retinanet-08-29-18-15.log.


## Configurable Params

In [6]:
class Args:
    # Paths
    local_train_dir = os.path.join(os.path.dirname((os.getcwd())), 's3_training_data')
    annotations="data/annotations.csv" ## Path to CSV file containing annotation for training
    classes="data/classmap.csv"  ## Path to CSV containing class label mapping
    val_annotations="data/val_annotations.csv"
    
    # Network
    backbone = "resnet50"
    multi_gpu = 1 # Number of GPUs for parallel processing
    epochs = 50 
    steps = 10000 #steps per epoch
    snapshot_path = 'snapshots'
    snapshot_period = 25 #how many epochs between snapshots

    # Hyperparameters
    batch_size= 4
    epochs = 50 
#     steps = 10000 #steps per epoch
    
    # GPU
    multi_gpu = 1 # Number of GPUs for parallel processing
    gpu = None #ID of GPU as reported by nvidia-smi
    
    # Training variables
    update_csv_annotations = False
    folder_names = ["NYR-BOS_22m12s-22m30s", "PHI-PIT_6m-8m"]
    s3_bucket_name = "bsivisiondata"
#     snapshot = None #Starting point for training
    snapshot= "snapshots/resnet50_csv_49.h5"
    starting_epoch = 50
    imagenet_weights = True
    
    
    # keras-retinanet specific
    evaluation = True
    dataset_type = "csv"
    snapshots = True #if true save snapshots
    weights = None
    gpu = 0 #ID of GPU as reported by nvidia-smi
    random_transform = None
    image_min_side = 800
    image_max_side = 1333
    tensorboard_dir = None
    freeze_backbone = False
    
    
args = Args()
log_print("Args loaded")

Args loaded


## Load Generators

In [3]:
# Download S3 dataset
from utils.file_manager import FileManager
fm = FileManager('bsivisiondata')
fm.download_dir('', local=args.local_train_dir)
log_print("S3 dataset downloaded into {}".format(args.local_train_dir))

Downloaded 0/120 files from BUF-EDM_1h53m26s-1h53m46s/annotations/
Redownloaded 0 files
Downloaded 0/120 files from BUF-EDM_1h53m26s-1h53m46s/frames/
Redownloaded 0 files
Downloaded 0/0 files from BUF-EDM_1h53m26s-1h53m46s/
Redownloaded 0 files
Downloaded 0/150 files from DET-NSH_0h10m12s-0h10m17s/annotations/
Redownloaded 0 files
Downloaded 0/149 files from DET-NSH_0h10m12s-0h10m17s/frames/
Redownloaded 0 files
Downloaded 0/0 files from DET-NSH_0h10m12s-0h10m17s/
Redownloaded 0 files
Downloaded 0/597 files from DET-NSH_0h7m45s-0h8m5s/annotations/
Redownloaded 0 files
Downloaded 0/600 files from DET-NSH_0h7m45s-0h8m5s/frames/
Redownloaded 0 files
Downloaded 0/0 files from DET-NSH_0h7m45s-0h8m5s/
Redownloaded 0 files
Downloaded 0/184 files from NYR-BOS_22m12s-22m30s/annotations/
Redownloaded 0 files
Downloaded 0/285 files from NYR-BOS_22m12s-22m30s/frames/
Redownloaded 0 files
Downloaded 0/1 files from NYR-BOS_22m12s-22m30s/
Redownloaded 0 files
Downloaded 0/162 files from PHI-PIT_6m-8m

In [4]:
# Creates CSV file with photo annotations; filepath,x1,y1,x2,y2,class
if args.update_csv_annotations:
    from keras_retinanet.gen_csv_from_annotations import write_data_to_CSV
    write_data_to_CSV(args.folder_names, args.s3_bucket_name, args.local_train_dir)
log_print("CSV Ready")

CSV Ready


In [5]:
from keras_retinanet.bin.train import create_generators


# create the generators
train_generator, validation_generator = create_generators(args)

log_print("Done.")

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


Done.


## Load Model

In [7]:
from keras_retinanet.bin.train import create_models, check_keras_version
from keras_retinanet import models
from keras_retinanet.models.retinanet import retinanet_bbox
from keras.backend.tensorflow_backend import set_session
import tensorflow as tf

# create object that stores backbone information
backbone = models.backbone(args.backbone)

# make sure keras is the minimum required version
check_keras_version()


# optionally choose specific GPU
if args.gpu:
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
set_session(get_session())

# create the model
if args.snapshot is not None:
    log_print("RESUMING TRAINING: from snapshot {}".format(args.snapshot))
    model            = models.load_model(args.snapshot, backbone_name=args.backbone)
    training_model   = model
    prediction_model = retinanet_bbox(model=model)
else:
    weights = args.weights
    # default to imagenet if nothing else is specified
    if weights is None and args.imagenet_weights:
        weights = backbone.download_imagenet()
        model, training_model, prediction_model = create_models(
            backbone_retinanet=backbone.retinanet,
            num_classes=train_generator.num_classes(),
            weights=weights,
            multi_gpu=args.multi_gpu,
            freeze_backbone=args.freeze_backbone
        )
        log_print("FRESH TRAINING: loading imagenet")

log_print("Model and weights initialized")


RESUMING TRAINING: from snapshot snapshots/resnet50_csv_49.h5
Model and weights initialized


# Load Callbacks

In [7]:
from keras_retinanet.bin.train import create_callbacks

# this lets the generator compute backbone layer shapes using the actual backbone model
if 'vgg' in args.backbone or 'densenet' in args.backbone:
    compute_anchor_targets = functools.partial(anchor_targets_bbox, shapes_callback=make_shapes_callback(model))
    train_generator.compute_anchor_targets = compute_anchor_targets
    if validation_generator is not None:
        validation_generator.compute_anchor_targets = compute_anchor_targets

# create the callbacks
callbacks = create_callbacks(
    model,
    training_model,
    prediction_model,
    validation_generator,
    args,
)
log_print("Callbacks created.")

Callbacks created.




## Start Training

In [None]:
steps_per_epoch = train_generator.size() / args.batch_size
print("Total samples: {}".format(train_generator.size()))
print("Batch size: {}".format(args.batch_size))
print("Steps per epoch: {}".format(steps_per_epoch))
print("Epochs: {}".format(args.epochs))
print("Multi-GPU: {}".format(args.multi_gpu))
import tensorflow as tf
sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))

train_history = training_model.fit_generator(
    generator=train_generator,
    steps_per_epoch=steps_per_epoch,
    epochs=args.epochs,
    verbose=1,
    callbacks=callbacks,
)

log_print("Finished {} epochs".format(args.epochs))

Total samples: 345
Batch size: 4
Steps per epoch: 86
Epochs: 50
Multi-GPU: 1
Epoch 1/50
('624 instances of class', 'hockey_player', 'with average precision: 0.2306')
('136 instances of class', 'hockey_faceoff_dot', 'with average precision: 0.2235')
('87 instances of class', 'hockey_goalie', 'with average precision: 0.2138')
('80 instances of class', 'hockey_net', 'with average precision: 0.2200')
('102 instances of class', 'hockey_score_bug', 'with average precision: 0.0000')
('70 instances of class', 'hockey_ref', 'with average precision: 0.2523')
('25 instances of class', 'hockey_blue_line', 'with average precision: 0.1266')
('21 instances of class', 'hockey_middle_line', 'with average precision: 0.0000')
('98 instances of class', 'hockey_end_line', 'with average precision: 0.2621')
('23 instances of class', 'hockey_puck', 'with average precision: 0.0000')
mAP: 0.1529

Epoch 00001: saving model to snapshots/resnet50_csv_01.h5
Epoch 2/50
('624 instances of class', 'hockey_player', 'wi

Epoch 10/50
('624 instances of class', 'hockey_player', 'with average precision: 0.2254')
('136 instances of class', 'hockey_faceoff_dot', 'with average precision: 0.2205')
('87 instances of class', 'hockey_goalie', 'with average precision: 0.2326')
('80 instances of class', 'hockey_net', 'with average precision: 0.2125')
('102 instances of class', 'hockey_score_bug', 'with average precision: 0.0000')
('70 instances of class', 'hockey_ref', 'with average precision: 0.2650')
('25 instances of class', 'hockey_blue_line', 'with average precision: 0.1296')
('21 instances of class', 'hockey_middle_line', 'with average precision: 0.0019')
('98 instances of class', 'hockey_end_line', 'with average precision: 0.2871')
('23 instances of class', 'hockey_puck', 'with average precision: 0.0000')
mAP: 0.1575

Epoch 00010: saving model to snapshots/resnet50_csv_10.h5
Epoch 11/50
('624 instances of class', 'hockey_player', 'with average precision: 0.2353')
('136 instances of class', 'hockey_faceoff_d

Epoch 19/50
('624 instances of class', 'hockey_player', 'with average precision: 0.2288')
('136 instances of class', 'hockey_faceoff_dot', 'with average precision: 0.2350')
('87 instances of class', 'hockey_goalie', 'with average precision: 0.1955')
('80 instances of class', 'hockey_net', 'with average precision: 0.2247')
('102 instances of class', 'hockey_score_bug', 'with average precision: 0.0000')
('70 instances of class', 'hockey_ref', 'with average precision: 0.2636')
('25 instances of class', 'hockey_blue_line', 'with average precision: 0.1553')
('21 instances of class', 'hockey_middle_line', 'with average precision: 0.0730')
('98 instances of class', 'hockey_end_line', 'with average precision: 0.2838')
('23 instances of class', 'hockey_puck', 'with average precision: 0.0037')
mAP: 0.1663

Epoch 00019: saving model to snapshots/resnet50_csv_19.h5
Epoch 20/50
('624 instances of class', 'hockey_player', 'with average precision: 0.2274')
('136 instances of class', 'hockey_faceoff_d

Epoch 28/50
('624 instances of class', 'hockey_player', 'with average precision: 0.2230')
('136 instances of class', 'hockey_faceoff_dot', 'with average precision: 0.2377')
('87 instances of class', 'hockey_goalie', 'with average precision: 0.2172')
('80 instances of class', 'hockey_net', 'with average precision: 0.2143')
('102 instances of class', 'hockey_score_bug', 'with average precision: 0.0000')
('70 instances of class', 'hockey_ref', 'with average precision: 0.2587')
('25 instances of class', 'hockey_blue_line', 'with average precision: 0.1686')
('21 instances of class', 'hockey_middle_line', 'with average precision: 0.0758')
('98 instances of class', 'hockey_end_line', 'with average precision: 0.2773')
('23 instances of class', 'hockey_puck', 'with average precision: 0.0120')
mAP: 0.1685

Epoch 00028: saving model to snapshots/resnet50_csv_28.h5
Epoch 29/50
('624 instances of class', 'hockey_player', 'with average precision: 0.2230')
('136 instances of class', 'hockey_faceoff_d

## Analysis

In [31]:
from keras_retinanet.retinanet_utils.image import read_image_bgr, preprocess_image, resize_image
from keras_retinanet.retinanet_utils.visualization import draw_box, draw_caption
from keras_retinanet.retinanet_utils.colors import label_color
import cv2
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

#load classmap
import csv
labels_to_names = {}
with open(args.classes, 'r') as f:
    name_labels = csv.reader(f)
    for name, label in name_labels:
        labels_to_names[int(label)] = name
        
# # load image
# image = read_image_bgr('hockey_images/Rangers_vs_Bruins_Oct_26_2016_39922.png')

# # copy to draw on
# draw = image.copy()
# draw = cv2.cvtColor(draw, cv2.COLOR_BGR2RGB)

# # preprocess image for network
# image = preprocess_image(image)
# image, scale = resize_image(image)

# # process image
# start = time.time()
# # print prediction_model.predict_on_batch(np.expand_dims(image, axis=0))
# boxes, scores, labels = prediction_model.predict_on_batch(np.expand_dims(image, axis=0))
# print("processing time: ", time.time() - start)

# # correct for image scale
# boxes /= scale

# # visualize detections
# for box, score, label in zip(boxes[0], scores[0], labels[0]):
#     # scores are sorted so we can break
#     if score < 0.5:
#         break
        
#     color = label_color(label)
    
#     b = box.astype(int)
#     draw_box(draw, b, color=color)
    
#     caption = "{} {:.3f}".format(labels_to_names[label], score)
#     draw_caption(draw, b, caption)
    
# plt.figure(figsize=(15, 15))
# plt.axis('off')
# plt.imshow(draw)
# plt.show()

input_video_path = 'data/videos/BUF-EDM_10-16-16_13m-13m15s.mp4'
output_video_path = 'data/output_videos/marked_BUF-EDM_10-16-16_13m-13m15s.mp4'
skip_length = 0 #seconds of video to skip
capture_length = 2 #seconds of video to analyze

video_capture = cv2.VideoCapture(input_video_path)
video_capture.set(cv2.CAP_PROP_POS_MSEC, float(skip_length*1000.0))

fps = video_capture.get(cv2.CAP_PROP_FPS)
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
video_output = cv2.VideoWriter(output_video_path, fourcc, fps, (1280, 720))

total_frame_count = int(capture_length * video_capture.get(cv2.CAP_PROP_FPS))	
print('Marking {} frames over {} seconds'.format(total_frame_count, capture_length))
total_time = time.time()
frame_count = 0
avg_frame_time = 0
while True:  # fps._numFrames < 120
    # grab the current frame
    (grabbed, frame) = video_capture.read()

    # if we are viewing a video and we did not grab a frame,
    # then we have reached the end of the video
    if not grabbed:
        print('EOF (end of film)')
        break
    # if we've watched to the capture length, exit
    if video_capture.get(cv2.CAP_PROP_POS_MSEC) / 1000 >= skip_length + capture_length:
        print('Reached capture length')
        break

    st = time.time()
    
    # copy to draw on
    draw = frame.copy()

    # preprocess image for network
    image = preprocess_image(frame)
    image, scale = resize_image(frame)

    boxes, scores, labels = prediction_model.predict_on_batch(np.expand_dims(image, axis=0))
    # visualize detections
    for box, score, label in zip(boxes[0], scores[0], labels[0]):
        # scores are sorted so we can break
        if score < 0.5:
            break

        color = label_color(label)

        b = box.astype(int)
        draw_box(draw, b, color=color)

        caption = "{} {:.3f}".format(labels_to_names[label], score)
        draw_caption(draw, b, caption)
    
    frame_count +=1
    print('Frame count = {} / {}'.format(frame_count, total_frame_count))
    avg_frame_time = (time.time() - st)*0.8 + 0.2*avg_frame_time 
    eta = (time.time() - st) * (total_frame_count - frame_count)
    print('ETA: {} min, {:.1f}s'.format(int(eta)/60, eta%60))
    
    #write to video file
    resized = cv2.resize(draw, (1280, 720))
    bgr = cv2.cvtColor(resized, cv2.COLOR_RGB2BGR)
    video_output.write(resized)


log_print('[INFO] elapsed time (total): {:.2f}'.format(time.time() - total_time))
# print('[INFO] approx. FPS: {:.2f}'.format(fps.fps()))

video_capture.release()
video_output.release()



Marking 59 frames over 2 seconds
Frame count = 1 / 59
ETA: 0 min, 23.2s
Frame count = 2 / 59
ETA: 0 min, 18.8s
Frame count = 3 / 59
ETA: 0 min, 17.7s
Frame count = 4 / 59
ETA: 0 min, 17.2s
Frame count = 5 / 59
ETA: 0 min, 16.2s
Frame count = 6 / 59
ETA: 0 min, 16.2s
Frame count = 7 / 59
ETA: 0 min, 15.9s
Frame count = 8 / 59
ETA: 0 min, 15.1s
Frame count = 9 / 59
ETA: 0 min, 15.0s
Frame count = 10 / 59
ETA: 0 min, 14.7s
Frame count = 11 / 59
ETA: 0 min, 14.5s
Frame count = 12 / 59
ETA: 0 min, 14.1s
Frame count = 13 / 59
ETA: 0 min, 13.6s
Frame count = 14 / 59
ETA: 0 min, 13.5s
Frame count = 15 / 59
ETA: 0 min, 13.3s
Frame count = 16 / 59
ETA: 0 min, 12.9s
Frame count = 17 / 59
ETA: 0 min, 12.7s
Frame count = 18 / 59
ETA: 0 min, 12.3s
Frame count = 19 / 59
ETA: 0 min, 12.3s
Frame count = 20 / 59
ETA: 0 min, 11.8s
Frame count = 21 / 59
ETA: 0 min, 11.3s
Frame count = 22 / 59
ETA: 0 min, 11.1s
Frame count = 23 / 59
ETA: 0 min, 10.9s
Frame count = 24 / 59
ETA: 0 min, 10.6s
Frame count = 25