### Purpose of notebook
The purpose of this notebook is to pass the YouTube ASL videos through the MoviNet backbone and then save its output back to S3

In [1]:
# Let's not show unnecessary warnings etc
import warnings
warnings.filterwarnings('ignore')

In [2]:
import configparser

config = configparser.ConfigParser()
config.read('/home/ec2-user/.aws/credentials')

['/home/ec2-user/.aws/credentials']

In [3]:
# AWS credentials and S3 settings
aws_access_key_id = config["default"]['aws_access_key_id']
aws_secret_access_key = config["default"]['aws_secret_access_key']
bucket_name = 'asl-capstone'
s3_URI = 's3://asl-capstone/'

In [4]:
import s3fs
fs = s3fs.S3FileSystem(key=aws_access_key_id, secret=aws_secret_access_key)

import boto3
s3 = boto3.client('s3',aws_access_key_id = aws_access_key_id, aws_secret_access_key = aws_secret_access_key,
                  region_name = 'us-west-2')

In [5]:
# Import all required libraries. Keep adding here as you code
import tensorflow as tf
import tensorflow_hub as hub
import cv2
import numpy as np
import pandas as pd
from tqdm import tqdm
import os

tf.keras.backend.clear_session()

2023-11-18 06:41:44.725835: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2023-11-18 06:41:44.751042: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-11-18 06:41:44.751063: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-11-18 06:41:44.751750: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-11-18 06:41:44.755953: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2023-11-18 06:41:44.756400: I tensorflow/core/platform/cpu_feature_guard.cc:1

In [6]:
# Set working directory
%cd /home/ec2-user/models

/home/ec2-user/models


In [7]:
# Define the model you are going to use
model_version = 'a3'

if model_version=='a3':
  max_frames = 120
  image_dims = (256,256)
elif model_version=='a0':
  max_frames = 50
  image_dims = (172,172)
elif model_version=='a5':
  max_frames = 120
  image_dims = (320,302)

In [8]:
# Get the kinetics-600 action labels
KINETICS_URL = "official/projects/movinet/files/kinetics_600_labels.txt"
with open(KINETICS_URL) as obj:
  labels_600 = [line for line in obj.readlines()]
print("Found %d labels." % len(labels_600))

Found 600 labels.


In [9]:
# Create the backbone using the Movinet model
from official.projects.movinet.modeling import movinet

# Create backbone and model.
backbone = movinet.Movinet(
    model_id=model_version, #change to correspond to model
    causal=False,
    use_external_states=False,
)

2023-11-18 06:41:51.030589: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:274] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected


In [16]:
# Get a list of all the numpy files from S3

paginator = s3.get_paginator('list_objects_v2')
video_files = []
for page in paginator.paginate(Bucket = bucket_name, Prefix = 'youtube-asl/1000-samples/numpy_files/'):
    video_files.extend(content['Key'] for content in page.get('Contents',[]) if content['Key'].endswith(('.npy')))
print(len(video_files))

59042


In [17]:
# Get a list of all the numpy files from S3

paginator = s3.get_paginator('list_objects_v2')
already_finished = []
for page in paginator.paginate(Bucket = bucket_name, Prefix = 'youtube-asl/1000-samples/movinet/backbone/'):
    already_finished.extend(content['Key'] for content in page.get('Contents',[]) if content['Key'].endswith(('.npy')))
print(len(already_finished))

460


In [18]:
# Add the S3 prefix
video_files = ['s3://asl-capstone/'+x for x in video_files]
already_finished = ['s3://asl-capstone/'+x for x in already_finished]

In [19]:
# Let's break this into batches
video_files = video_files[40000:]

In [20]:
# Iteration batch
iteration_1 = video_files[:5000]
iteration_2 = video_files[5001:10000]
iteration_3 = video_files[10001:15000]
iteration_4 = video_files[15000:]

In [21]:
iteration_2 = [file for file in iteration_2 if file.replace('/numpy_files/', '/movinet/backbone/') not in already_finished]
len(iteration_2)

4890

In [22]:
movinet_uri = 's3://asl-capstone/youtube-asl/1000-samples/movinet/backbone/'

In [23]:
print(iteration_1[0])

s3://asl-capstone/youtube-asl/1000-samples/numpy_files/eXWZAMUjsNs_9.npy


In [None]:
%%time

import concurrent.futures

def process_image(vid):
    filename = os.path.basename(vid)
    with fs.open(vid,"rb") as f:
        vid_file = np.load(f)
    
    vid_file = np.expand_dims(vid_file, axis=0)
    embeddings = backbone(vid_file)[0]['block4_layer9'].numpy()
    with fs.open(f"{movinet_uri+filename}","wb") as f:
        np.save(f,embeddings)

    return filename


with concurrent.futures.ThreadPoolExecutor() as executor:
    # Submit tasks to the executor
    futures = [executor.submit(process_image, i) for i in iteration_2]
    # Collect the results
    results = [future.result() for future in concurrent.futures.as_completed(futures)]

print(len(results))

2023-11-18 06:44:36.214582: I external/local_xla/xla/service/service.cc:168] XLA service 0x7fdb10042850 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2023-11-18 06:44:36.214616: I external/local_xla/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version
I0000 00:00:1700289876.223073    2687 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
2023-11-18 06:44:36.223299: E external/local_xla/xla/stream_executor/stream_executor_internal.h:177] SetPriority unimplemented for this stream.




2023-11-18 06:44:40.212644: E external/local_xla/xla/stream_executor/stream_executor_internal.h:177] SetPriority unimplemented for this stream.
2023-11-18 06:44:42.897400: E external/local_xla/xla/stream_executor/stream_executor_internal.h:177] SetPriority unimplemented for this stream.
2023-11-18 06:44:45.045174: E external/local_xla/xla/stream_executor/stream_executor_internal.h:177] SetPriority unimplemented for this stream.
2023-11-18 06:44:45.325199: E external/local_xla/xla/stream_executor/stream_executor_internal.h:177] SetPriority unimplemented for this stream.
2023-11-18 06:44:47.799398: E external/local_xla/xla/stream_executor/stream_executor_internal.h:177] SetPriority unimplemented for this stream.
2023-11-18 06:44:47.803216: E external/local_xla/xla/stream_executor/stream_executor_internal.h:177] SetPriority unimplemented for this stream.
2023-11-18 06:44:51.464652: E external/local_xla/xla/stream_executor/stream_executor_internal.h:177] SetPriority unimplemented for this 

In [32]:
results

['eXWZAMUjsNs_90.npy',
 'eXWZAMUjsNs_92.npy',
 'eXWZAMUjsNs_96.npy',
 'eXWZAMUjsNs_99.npy',
 'eXWZAMUjsNs_91.npy',
 'eXWZAMUjsNs_9.npy',
 'eXWZAMUjsNs_93.npy',
 'eXWZAMUjsNs_98.npy',
 'eXWZAMUjsNs_97.npy',
 'eXWZAMUjsNs_94.npy',
 'eXWZAMUjsNs_95.npy',
 'eYP1Z8zkyN4_14.npy',
 'eYP1Z8zkyN4_1.npy',
 'eYP1Z8zkyN4_16.npy',
 'eYP1Z8zkyN4_15.npy',
 'eYP1Z8zkyN4_0.npy',
 'eYP1Z8zkyN4_10.npy',
 'eYP1Z8zkyN4_13.npy',
 'eYP1Z8zkyN4_11.npy',
 'eYP1Z8zkyN4_12.npy']

In [1]:
# for vid in tqdm(iteration_1):
#     # Generate video embeddings and store to CPU
#     filename = os.path.basename(vid)
#     with fs.open(vid,"rb") as f:
#         vid_file = np.load(f)
#     vid_file = np.expand_dims(vid_file, axis=0)
#     embeddings = backbone(vid_file)[0]['block4_layer9'].cpu().numpy()
#     with fs.open(f"{movinet_uri+filename}","wb") as f:
#         np.save(f,embeddings)
#     #pbar.update(1)
# #pbar.close() #Closing the bar