# Extract, Score and Store - Local Videos & CustomVision.ai Model

This notebook will extract images from video, score against a CustomVision.ai model endpoint to filter images without fish present and store in an Azure Blob Container.

Requirements:
* Videos in .mp4 format stored locally on your computer drive
* A CustomVision.ai model trained to identify fish species (Iteration Published)
* An Azure Storage account to store scored frames in


In [14]:
from PIL import Image, ImageDraw, ImageFont
import cv2, io, json, logging, os, sys, tempfile,uuid
import numpy as np
import xlsxwriter 
import matplotlib.pyplot as plt
%matplotlib inline
from azure.cognitiveservices.vision.customvision.training.models import ImageFileCreateEntry, Region
from azure.cognitiveservices.vision.customvision.prediction import CustomVisionPredictionClient
from matplotlib.patches import Rectangle
from datetime import datetime, timedelta
from azure.storage.blob import ResourceTypes, AccountSasPermissions, AccessPolicy, ContainerSasPermissions,generate_container_sas,BlobServiceClient, BlobClient, ContainerClient


## CustomVision.ai Credentials

Provide the access details to your customvision project and model that will be used to filter the images for YesFish/NoFish. Indicate the file path locally on your desktop where videos are stored then enter the Azure storage account connection string to store all the filtered images. 

The dictionary can be adjusted for your classes to allow class-specific thresholding i.e. only include predictions >50% confidence. This is included as some classes perform better than others, and to avoid increased number of false positives, an adjustable thresholding systems ensures more accurate filtering. The class Average Precision values can be used as a guide for threshold values. 



In [17]:
# endpoint for the customvision cognitive servies account training e.g. https://myaccount.cognitiveservices.azure.com/
endpoint = ""

# Replace with a valid key 
prediction_key = "" # Key found in the prediction URL from CustomVision Performance Page
projectID = "" # Found in the CustomVision project settings page

# Replace with a valid iteration 
iteration = "Iteration1" #name of iteration, must match same iteration prediction key is being used

# Replace with a valid file path 
path = "C:\\  \  \"
# Storage connection string format "DefaultEndpointsProtocol=https;AccountName=audiostores2tg********"
connect_str = "DefaultEndpointsProtocol=htt*****"

# dictionary list for thresholding values from Custom Vision
thresholding_values = {
    "Ambassis agrammus":50 ,
    "Ambassis macleayi":50 ,
    "Amniataba percoides":50 ,
    "Craterocephalus sturcusmuscarum":50,
    "Denariusa bandata":50,
    "Glossamia aprion":50,
    "Lates calcarifer":50 ,
    "Melanotaenia nigrans":50 ,
    "Melanotaenia splendida inornata":50,
    "Neoarius":50,
    "Neosilurus":50,
    "Other":50,
    "Scleropages jardinii":50,
    "Strongylura krefftii":50,
    "Sycomistes butleri":50,
    "Toxotes chatareus":50 
}


In [16]:
def get_file_info(file_path):

    file_info = {}

    parts = file_path.split(os.sep)

    file_info['video_name'] = os.path.splitext(parts[-1])[0]
    file_info['location_name'] = parts[-2]
    file_info['transect_name'] = parts[-3]
    file_info['site_name'] = parts[-4]
    file_info['billabong_type'] = parts[-5]
    file_info['year'] = parts[-6]

    return file_info


In [4]:
def create_azure_storage_container(container_name):

    container = ContainerClient.from_connection_string(connect_str, container_name)

    try:
        container_properties = container.get_container_properties()
        container_client = blob_service_client.get_container_client(container_name)
        # Container exists. You can now use it.
        print(f"Container {container_name} already exists.")

    except Exception as e:
        # Container does not exist. You can now create it.
        container_client = blob_service_client.create_container(container_name)
        #print(e)
        print(f"Creating container {container_name}.")

    return container_client


In [5]:
def generate_container_sastoken(container_client):

    sas_token = generate_container_sas(
        container_client.account_name,
        container_client.container_name,
        account_key=container_client.credential.account_key,    
        permission=ContainerSasPermissions(read=True),
        expiry=datetime.utcnow() + timedelta(hours=730),
    )
    print('SAS token for the storage container ?{0}'.format(sas_token))

    return sas_token
    

In [6]:
def compute_show_predictions(results, np_image, frame_count, fps, min_probability, debug=False):
    species_counter = {}
    for prediction in results.predictions:

        if prediction.tag_name not in thresholding_values:
            print ("WARNING: The Species name is not in threshold dictionary, probability is set to default")
            probability = min_probability
        else:
            probability = thresholding_values[prediction.tag_name]

        if (prediction.probability*100) > probability:

            if prediction.tag_name not in species_counter:
                species_counter[prediction.tag_name] = [{'time':round(frame_count/fps,3), 'probability':round(prediction.probability*100,2)}]
            else:
                species_counter[prediction.tag_name].append({'time':round(frame_count/fps,3), 'probability':round(prediction.probability*100,2)})
    return species_counter

## Adjust Starting Time

To adjust the starting time for frame extraction from your video change starting_time = 

In [12]:
def custom_vision_predictor(blob_service_client, file_path, prediction_key, endpoint, projectID,thresholding_values, iteration, predictions_per_sec=1, min_probability=50, debug=False):
    # Getting parameters from the path
    predictor = CustomVisionPredictionClient(prediction_key, endpoint=endpoint)
    file_info = get_file_info(file_path)

    # Create azure storage container
    container_name = f"{file_info['year']}-{file_info['site_name']}-{file_info['transect_name']}-{file_info['location_name']}-{file_info['video_name']}".lower().replace(' ', '')
    container_client = create_azure_storage_container(container_name)
    sas_token = generate_container_sastoken(container_client)

    container_name_NoFish = 'nofish'
    noFishcontainer_client = create_azure_storage_container(container_name_NoFish)
    nofish_container_file_path = f"{file_info['year']}-{file_info['site_name']}-{file_info['transect_name']}-{file_info['video_name']}"

    # Split video into frames
    video_dir = os.path.join(file_info['year'], file_info['billabong_type'], file_info['site_name'], file_info['transect_name'],file_info['location_name']).replace(os.sep, '-').replace(' ', '-')

    starting_time = 0 # Seconds...

    video_capture = cv2.VideoCapture(file_path)

    num_of_frames = video_capture.get(cv2.CAP_PROP_FRAME_COUNT)

    fps = int(video_capture.get(cv2.CAP_PROP_FPS))

    if debug:
        print(f"Frames per second: {fps}")
        print(f"Total frame count: {video_capture.get(cv2.CAP_PROP_FRAME_COUNT)}")

    frame_count = int(starting_time * fps)

    video_capture.set(cv2.CAP_PROP_POS_FRAMES, frame_count)

    error_frame_count = 0

    # Analyse video frames and ran custom vision 
    while video_capture.isOpened():
        success, np_image = video_capture.read()

        if (frame_count % (fps//predictions_per_sec)) == 0:
            if success is False:
                print(f'Could not process frame: {frame_count} of {num_of_frames}')
                error_frame_count += 1
                frame_count += 1

                if frame_count == num_of_frames:
                    break
                else:
                    continue

            frame_name = '{0}_Frame-{1}.jpg'.format(video_dir, frame_count)

            np_image = cv2.cvtColor(np_image, cv2.COLOR_BGR2RGB)
            buffer = io.BytesIO()
            Image.fromarray(np_image).save(buffer, format='JPEG')
            results = predictor.detect_image(projectID, iteration, buffer.getvalue())

            species_counter = compute_show_predictions(results,np_image,frame_count,fps,min_probability,debug)

            if species_counter:
                blob_client = blob_service_client.get_blob_client(container=container_name, blob=frame_name)
                blob_client.upload_blob(buffer.getvalue())
                if debug:
                    print('Uploading to fish container {0}...'.format(frame_name))

            else:
                if debug:
                    print('Uploading to nofish container {0}...'.format(frame_name))
                blob_client = blob_service_client.get_blob_client(container=container_name_NoFish , blob=nofish_container_file_path+'/' + frame_name)
                blob_client.upload_blob(buffer.getvalue())

        frame_count += 1

        if frame_count == num_of_frames:
            break

    video_capture.release()
    print(f"Total video frames:{num_of_frames}")
    print(f"Total frames to process: {num_of_frames/fps}")
    print(f"Total processed frames that errored: {error_frame_count}")
    # print(f'Total number of unprocessed frames: {error_frame_count} of {num_of_frames/fps}')



In [None]:

files = os.listdir(path)

for f in files:
    if os.path.splitext(f)[1] == '.mp4':

        # Create the BlobServiceClient object which will be used to create a container client
        blob_service_client = BlobServiceClient.from_connection_string(connect_str)
        file_path = path + '\\' + f

        #If no species found in the dictionary, min probability is set to 15 
        custom_vision_predictor(blob_service_client,file_path, prediction_key, endpoint, projectID, thresholding_values, iteration, predictions_per_sec=1, min_probability=15, debug=False)
