### Purpose of notebook
The purpose of this notebook is to pass the YouTube ASL videos through the MoviNet backbone and then save its output back to S3

In [1]:
# Let's not show unnecessary warnings etc
import warnings
warnings.filterwarnings('ignore')

In [2]:
import configparser

config = configparser.ConfigParser()
config.read('/home/ec2-user/.aws/credentials')

['/home/ec2-user/.aws/credentials']

In [3]:
# AWS credentials and S3 settings
aws_access_key_id = config["default"]['aws_access_key_id']
aws_secret_access_key = config["default"]['aws_secret_access_key']
bucket_name = 'asl-capstone'
s3_URI = 's3://asl-capstone/'

In [4]:
import s3fs
fs = s3fs.S3FileSystem(key=aws_access_key_id, secret=aws_secret_access_key)

import boto3
s3 = boto3.client('s3',aws_access_key_id = aws_access_key_id, aws_secret_access_key = aws_secret_access_key,
                  region_name = 'us-west-2')

In [5]:
# Import all required libraries. Keep adding here as you code
import tensorflow as tf, tf_keras
import tensorflow_hub as hub
import cv2
import numpy as np
import pandas as pd
from tqdm import tqdm
import os

tf.keras.backend.clear_session()

2023-12-01 02:14:18.671048: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2023-12-01 02:14:19.536732: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-12-01 02:14:19.536805: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-12-01 02:14:19.720764: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-12-01 02:14:20.069211: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2023-12-01 02:14:20.071952: I tensorflow/core/platform/cpu_feature_guard.cc:1

In [6]:
# Set working directory
%cd /home/ec2-user/models

/home/ec2-user/models


In [7]:
model_version = 'a2'

if model_version=='a3':
  max_frames = 120
  image_dims = (256,256)
elif model_version=='a2':
  max_frames = 80
  image_dims = (224,224)
elif model_version=='a0':
  max_frames = 50
  image_dims = (172,172)


from official.projects.movinet.modeling import movinet

if model_version == 'a3':
    from official.projects.movinet.modeling import movinet_model_a3_modified as movinet_model_modified
elif model_version == 'a2':
    from official.projects.movinet.modeling import movinet_model_a2_modified as movinet_model_modified

In [8]:
path = '/home/ec2-user/ASL-Translator/modeling/movinet_checkpoints_a2_epoch9'
model = tf_keras.models.load_model(path)

2023-12-01 02:14:31.424600: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:274] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected






In [9]:
# %%time

# vals = model(tf.random.uniform((1,80,224,224,3), dtype=tf.float32))['vid_embedding']
# vals.shape

In [10]:
# Get a list of all the numpy files from S3

paginator = s3.get_paginator('list_objects_v2')
video_files = []
for page in paginator.paginate(Bucket = bucket_name, Prefix = 'msasl/RGB/'):
    video_files.extend(content['Key'] for content in page.get('Contents',[]) if content['Key'].endswith(('.npy')))
print(len(video_files))

13535


In [11]:
# Get a list of all the numpy files from S3

paginator = s3.get_paginator('list_objects_v2')
already_finished = []
for page in paginator.paginate(Bucket = bucket_name, Prefix = 'msasl/movinet_embedding/'):
    already_finished.extend(content['Key'] for content in page.get('Contents',[]) if content['Key'].endswith(('.npy')))
print(len(already_finished))

306


In [12]:
# Add the S3 prefix
video_files = ['s3://asl-capstone/'+x for x in video_files]
already_finished = ['s3://asl-capstone/'+x for x in already_finished]

In [13]:
already_finished[0]

's3://asl-capstone/msasl/movinet_embedding/about.npy'

In [14]:
# Let's break this into batches
video_files = [file for file in video_files if file.replace('/RGB/', '/movinet_embedding/') not in already_finished]

len(video_files)

13229

In [15]:
# Iteration batch
iteration_1 = video_files[:5000]
iteration_2 = video_files[5000:10000]
iteration_3 = video_files[10000:]
# iteration_4 = video_files[15000:]

In [16]:
iteration_1[0]

's3://asl-capstone/msasl/RGB/afraid_26.npy'

In [17]:
output_uri = 's3://asl-capstone/msasl/movinet_embedding/'

In [18]:
print(video_files[0])

s3://asl-capstone/msasl/RGB/afraid_26.npy


In [None]:
%%time

import concurrent.futures

def process_image(vid):
    filename = os.path.basename(vid)
    with fs.open(vid,"rb") as f:
        vid_file = np.load(f)

    vid_file = tf.image.resize(vid_file, (224,224)).numpy()
    vid_file = np.expand_dims(vid_file, axis=0)
    embeddings = model(vid_file)['vid_embedding']
    with fs.open(f"{output_uri+filename}","wb") as f:
        np.save(f,embeddings)

    return filename


with concurrent.futures.ThreadPoolExecutor() as executor:
    # Submit tasks to the executor
    futures = [executor.submit(process_image, i) for i in iteration_2]
    # Collect the results
    results = [future.result() for future in concurrent.futures.as_completed(futures)]

print(len(results))

2023-12-01 02:17:51.778471: E external/local_xla/xla/stream_executor/stream_executor_internal.h:177] SetPriority unimplemented for this stream.
2023-12-01 02:17:52.502147: E external/local_xla/xla/stream_executor/stream_executor_internal.h:177] SetPriority unimplemented for this stream.
2023-12-01 02:17:52.770518: E external/local_xla/xla/stream_executor/stream_executor_internal.h:177] SetPriority unimplemented for this stream.
2023-12-01 02:17:52.838728: E external/local_xla/xla/stream_executor/stream_executor_internal.h:177] SetPriority unimplemented for this stream.
2023-12-01 02:17:53.073213: E external/local_xla/xla/stream_executor/stream_executor_internal.h:177] SetPriority unimplemented for this stream.
2023-12-01 02:17:53.161666: E external/local_xla/xla/stream_executor/stream_executor_internal.h:177] SetPriority unimplemented for this stream.
2023-12-01 02:17:53.798347: E external/local_xla/xla/stream_executor/stream_executor_internal.h:177] SetPriority unimplemented for this 

In [32]:
results

['eXWZAMUjsNs_90.npy',
 'eXWZAMUjsNs_92.npy',
 'eXWZAMUjsNs_96.npy',
 'eXWZAMUjsNs_99.npy',
 'eXWZAMUjsNs_91.npy',
 'eXWZAMUjsNs_9.npy',
 'eXWZAMUjsNs_93.npy',
 'eXWZAMUjsNs_98.npy',
 'eXWZAMUjsNs_97.npy',
 'eXWZAMUjsNs_94.npy',
 'eXWZAMUjsNs_95.npy',
 'eYP1Z8zkyN4_14.npy',
 'eYP1Z8zkyN4_1.npy',
 'eYP1Z8zkyN4_16.npy',
 'eYP1Z8zkyN4_15.npy',
 'eYP1Z8zkyN4_0.npy',
 'eYP1Z8zkyN4_10.npy',
 'eYP1Z8zkyN4_13.npy',
 'eYP1Z8zkyN4_11.npy',
 'eYP1Z8zkyN4_12.npy']

In [1]:
# for vid in tqdm(iteration_1):
#     # Generate video embeddings and store to CPU
#     filename = os.path.basename(vid)
#     with fs.open(vid,"rb") as f:
#         vid_file = np.load(f)
#     vid_file = np.expand_dims(vid_file, axis=0)
#     embeddings = backbone(vid_file)[0]['block4_layer9'].cpu().numpy()
#     with fs.open(f"{movinet_uri+filename}","wb") as f:
#         np.save(f,embeddings)
#     #pbar.update(1)
# #pbar.close() #Closing the bar