In [1]:
import collections
import math
import torch
import os, tarfile, json
import time, datetime
from io import StringIO
import numpy as np
import sagemaker
from sagemaker.pytorch import estimator, PyTorchModel, PyTorchPredictor, PyTorch
from sagemaker.utils import name_from_base
import boto3
from types import SimpleNamespace

sagemaker_session = sagemaker.Session()
role = sagemaker.get_execution_role()
bucket = 'privisaa-bucket-virginia'#sagemaker_session.default_bucket() # can replace with your own S3 bucket 'privisaa-bucket-virginia' # 
prefix = 'detectron2'
runtime_client = boto3.client('runtime.sagemaker')

In [2]:
#!git clone https://github.com/facebookresearch/detectron2.git
#!python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'


Collecting git+https://github.com/facebookresearch/detectron2.git
  Cloning https://github.com/facebookresearch/detectron2.git to /tmp/pip-req-build-k95ep6eg
Building wheels for collected packages: detectron2
  Running setup.py bdist_wheel for detectron2 ... [?25lerror
  Complete output from command /home/ec2-user/anaconda3/envs/pytorch_p36/bin/python -u -c "import setuptools, tokenize;__file__='/tmp/pip-req-build-k95ep6eg/setup.py';f=getattr(tokenize, 'open', open)(__file__);code=f.read().replace('\r\n', '\n');f.close();exec(compile(code, __file__, 'exec'))" bdist_wheel -d /tmp/pip-wheel-h95_ipta --python-tag cp36:
  running bdist_wheel
  running build
  running build_py
  creating build
  creating build/lib.linux-x86_64-3.6
  creating build/lib.linux-x86_64-3.6/detectron2
  copying detectron2/__init__.py -> build/lib.linux-x86_64-3.6/detectron2
  creating build/lib.linux-x86_64-3.6/detectron2/modeling
  copying detectron2/modeling/poolers.py -> build/lib.linux-x86_64-3.6/detectron2/

Need to determine if you need a custom container for detectron2 or if you can just pass it to 
Eh you probably need a custom container. 
You can specify all of the dataset register stuff inside of train_net.py


In [2]:
%%sh

# The name of our algorithm
algorithm_name=detectron2-train

chmod +x train
chmod +x serve

account=$(aws sts get-caller-identity --query Account --output text)

# Get the region defined in the current configuration (default to us-west-2 if none defined)
region=$(aws configure get region)
region=${region:-us-east-1}

fullname="${account}.dkr.ecr.${region}.amazonaws.com/${algorithm_name}:latest"

# If the repository doesn't exist in ECR, create it.

aws ecr describe-repositories --repository-names "${algorithm_name}" > /dev/null 2>&1

if [ $? -ne 0 ]
then
    aws ecr create-repository --repository-name "${algorithm_name}" > /dev/null
fi

# Get the login command from ECR and execute it directly
$(aws ecr get-login --region ${region} --no-include-email)

# Build the docker image locally with the image name and then push it to ECR
# with the full name.

docker build  -t ${algorithm_name} .
docker tag ${algorithm_name} ${fullname}

# some kind of security auth issue with pushing this to ecr, not authorized to perform ecr:InitiateLayerUpload
docker push ${fullname}

Login Succeeded
Sending build context to Docker daemon  338.2MB
Step 1/24 : FROM nvidia/cuda:10.1-cudnn7-devel
 ---> b4879c167fc1
Step 2/24 : ENV DEBIAN_FRONTEND noninteractive
 ---> Using cache
 ---> 9de2c1f4a99e
Step 3/24 : RUN apt-get update && apt-get install -y 	python3-opencv ca-certificates python3-dev git wget nginx sudo  	cmake ninja-build protobuf-compiler libprotobuf-dev &&   rm -rf /var/lib/apt/lists/*
 ---> Using cache
 ---> b1206fa2be0f
Step 4/24 : RUN ln -sv /usr/bin/python3 /usr/bin/python
 ---> Using cache
 ---> 1ae8892ae85e
Step 5/24 : WORKDIR /home/appuser
 ---> Using cache
 ---> 51aeac52ec3b
Step 6/24 : ENV PATH="/home/appuser/.local/bin:${PATH}"
 ---> Using cache
 ---> 3a319894ea04
Step 7/24 : RUN wget https://bootstrap.pypa.io/get-pip.py && 	python3 get-pip.py && 	rm get-pip.py
 ---> Using cache
 ---> c2bc708ec81f
Step 8/24 : RUN pip install tensorboard cython boto3  gevent flask pathlib gunicorn tqdm  requests six ipdb h5py html2text nltk progressbar onnxruntime 

https://docs.docker.com/engine/reference/commandline/login/#credentials-store



In [2]:
%%sh
#!/bin/bash

set -e

S3_BUCKET='privisaa-bucket-virginia'
S3_PREFIX="mask-rcnn/sagemaker/input"

# Stage directory must be on EBS volume with 100 GB available space
STAGE_DIR=$HOME/SageMaker/coco-2017-$(date +"%Y-%m-%d-%H-%M-%S")

echo "Create stage directory: $STAGE_DIR"
mkdir -p $STAGE_DIR

wget -O $STAGE_DIR/train2017.zip http://images.cocodataset.org/zips/train2017.zip
echo "Extracting $STAGE_DIR/train2017.zip"
unzip -o $STAGE_DIR/train2017.zip  -d $STAGE_DIR | awk 'BEGIN {ORS="="} {if(NR%1000==0)print "="}'
echo "Done."
rm $STAGE_DIR/train2017.zip

wget -O $STAGE_DIR/val2017.zip http://images.cocodataset.org/zips/val2017.zip
echo "Extracting $STAGE_DIR/val2017.zip"
unzip -o $STAGE_DIR/val2017.zip -d $STAGE_DIR | awk 'BEGIN {ORS="="} {if(NR%1000==0)print "="}'
echo "Done."
rm $STAGE_DIR/val2017.zip

wget -O $STAGE_DIR/annotations_trainval2017.zip http://images.cocodataset.org/annotations/annotations_trainval2017.zip
unzip -o $STAGE_DIR/annotations_trainval2017.zip -d $STAGE_DIR
rm $STAGE_DIR/annotations_trainval2017.zip

mkdir $STAGE_DIR/pretrained-models
wget -O $STAGE_DIR/pretrained-models/ImageNet-R50-AlignPadding.npz http://models.tensorpack.com/FasterRCNN/ImageNet-R50-AlignPadding.npz

echo "`date`: Uploading extracted files to s3://$S3_BUCKET/$S3_PREFIX/train [ eta 12 minutes ]"
aws s3 cp --recursive $STAGE_DIR s3://$S3_BUCKET/$S3_PREFIX/train | awk 'BEGIN {ORS="="} {if(NR%100==0)print "="}'
echo "Done."

# echo "Delete stage directory: $STAGE_DIR"
# rm -rf $STAGE_DIR
# echo "Success."


Create stage directory: /home/ec2-user/SageMaker/coco-2017-2020-06-10-15-35-38
Extracting /home/ec2-user/SageMaker/coco-2017-2020-06-10-15-35-38/train2017.zip
Extracting /home/ec2-user/SageMaker/coco-2017-2020-06-10-15-35-38/val2017.zip
Archive:  /home/ec2-user/SageMaker/coco-2017-2020-06-10-15-35-38/annotations_trainval2017.zip
  inflating: /home/ec2-user/SageMaker/coco-2017-2020-06-10-15-35-38/annotations/instances_train2017.json  
  inflating: /home/ec2-user/SageMaker/coco-2017-2020-06-10-15-35-38/annotations/instances_val2017.json  
  inflating: /home/ec2-user/SageMaker/coco-2017-2020-06-10-15-35-38/annotations/captions_train2017.json  
  inflating: /home/ec2-user/SageMaker/coco-2017-2020-06-10-15-35-38/annotations/captions_val2017.json  
  inflating: /home/ec2-user/SageMaker/coco-2017-2020-06-10-15-35-38/annotations/person_keypoints_train2017.json  
  inflating: /home/ec2-user/SageMaker/coco-2017-2020-06-10-15-35-38/annotations/person_keypoints_val2017.json  
Wed Jun 10 15:43:33 U

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [10]:
prefix = "mask-rcnn/sagemaker" #prefix in your S3 bucket

s3train = f's3://{bucket}/{prefix}/input/train'

train = sagemaker.session.s3_input(s3train, distribution='FullyReplicated', 
                        content_type='application/tfrecord', s3_data_type='S3Prefix')

data_channels = {'train': train}

In [11]:
# set our hyperparameters
hyperparameters = {
                  'num_gpus':8,
#                 'train_batch_size':32, 
#                   'learning_rate':3e-5,
}
                  #'save_to_s3':bucket}

# instantiate model
torch_model = PyTorch( role=role,
                      train_instance_count=1,
                      train_instance_type= 'ml.p3dn.24xlarge', # try local
                      entry_point='detectron2/tools/train_net.py',
                      image_name='209419068016.dkr.ecr.us-east-1.amazonaws.com/detectron2-train',
                      framework_version='1.4.0',
                      hyperparameters=hyperparameters
                     )


most recent training error 
FileNotFoundError: [Errno 2] No such file or directory: 'datasets/coco/annotations/instances_val2017.json'
#033[32m[07/11 20:54:10 d2.engine.defaults]: #033[0mModel:

need to create dataset json before creating container and send to 

In [12]:
torch_model.fit(inputs=data_channels)

2020-07-11 20:26:12 Starting - Starting the training job...
2020-07-11 20:26:15 Starting - Launching requested ML instances......
2020-07-11 20:27:29 Starting - Preparing the instances for training......
2020-07-11 20:28:24 Downloading - Downloading input data.............................................................................................................................................
2020-07-11 20:51:56 Training - Downloading the training image...........[34mFailed to load OpenCL runtime[0m
[34mFailed to load OpenCL runtime[0m
[34mFailed to load OpenCL runtime[0m
[34mFailed to load OpenCL runtime[0m
[34mFailed to load OpenCL runtime[0m
[34mFailed to load OpenCL runtime[0m
[34mFailed to load OpenCL runtime[0m
[34mFailed to load OpenCL runtime[0m
[34mCommand Line Args: Namespace(config_file='configs/quick_schedules/mask_rcnn_R_50_FPN_training_acc_test.yaml', dist_url='tcp://127.0.0.1:49152', eval_only=False, local_rank=4, machine_rank=4, num_gpus=1, num_ma

In [1]:
%pylab inline
from detectron2.engine.defaults import DefaultPredictor, DefaultTrainer
from detectron2.utils.video_visualizer import VideoVisualizer
from detectron2.utils.visualizer import ColorMode, Visualizer
from detectron2.config.config import get_cfg
from detectron2.data import DatasetCatalog, MetadataCatalog, build_detection_test_loader
from detectron2 import model_zoo
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.structures import BoxMode

from PIL import Image
import itertools
import torchvision
import torch
from glob import glob
from tqdm import tqdm
import time, os, json
import pandas as pd
import boto3
import matplotlib.patches as patches

rek_client = boto3.client('rekognition')

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def setup_cfg(config_file, opts, confidence_threshold):
    # load config from file and command-line arguments
    cfg = get_cfg()
    cfg.merge_from_file(config_file)
    cfg.merge_from_list(opts)
    # Set score_threshold for builtin models
    cfg.MODEL.RETINANET.SCORE_THRESH_TEST = confidence_threshold
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = confidence_threshold
    cfg.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH = confidence_threshold
    cfg.freeze()
    return cfg

Populating the interactive namespace from numpy and matplotlib


ModuleNotFoundError: No module named 'detectron2.engine'

In [None]:
data_folders = ['57911_000361_Endzone_frames', '57911_000361_Sideline_frames',
               '57996_002717_Endzone_frames', '57996_002717_Sideline_frames',
               '58001_000893_Endzone_frames', '58001_000893_Sideline_frames']

img_files = []
for folder in data_folders:
    img_file = glob(f'/home/ubuntu/videos/{folder}/*.jpg')
    img_files.extend(img_file)
    
img_files.sort()
    
helmet_files = glob('/home/ubuntu/videos/helmet_images/*.jpg')
helmet_files.sort()
#helmet_files = helmet_files[2:]
img_files.extend(helmet_files)
len(img_files)

In [6]:
import boto3
kvs = boto3.client('kinesisvideo')

response = kvs.get_data_endpoint(
  StreamName='test-video-stream',
  #StreamARN='arn:aws:kinesisvideo:us-east-1:209419068016:stream/test-video-stream/1586209281595',
  APIName='GET_MEDIA'
)

# 'PUT_MEDIA'|'LIST_FRAGMENTS'|'GET_MEDIA_FOR_FRAGMENT_LIST'|'GET_HLS_STREAMING_SESSION_URL'|'GET_DASH_STREAMING_SESSION_URL'|'GET_CLIP'

In [7]:
response

{'ResponseMetadata': {'RequestId': 'd2e93bcc-3d26-48ff-bf07-00b5d983e909',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'd2e93bcc-3d26-48ff-bf07-00b5d983e909',
   'content-type': 'application/json',
   'content-length': '74',
   'date': 'Wed, 15 Jul 2020 02:17:19 GMT'},
  'RetryAttempts': 0},
 'DataEndpoint': 'https://s-1e415f8b.kinesisvideo.us-east-1.amazonaws.com'}

In [None]:
import flask