### Purpose of notebook
The purpose of this notebook is to pass the YouTube ASL videos through the MoviNet backbone and then save its output back to S3

In [1]:
# Let's not show unnecessary warnings etc
import warnings
warnings.filterwarnings('ignore')

In [2]:
import configparser

config = configparser.ConfigParser()
config.read('/home/ec2-user/.aws/credentials')

['/home/ec2-user/.aws/credentials']

In [3]:
# AWS credentials and S3 settings
aws_access_key_id = config["default"]['aws_access_key_id']
aws_secret_access_key = config["default"]['aws_secret_access_key']
bucket_name = 'asl-capstone'
s3_URI = 's3://asl-capstone/'

In [4]:
import s3fs
fs = s3fs.S3FileSystem(key=aws_access_key_id, secret=aws_secret_access_key)

import boto3
s3 = boto3.client('s3',aws_access_key_id = aws_access_key_id, aws_secret_access_key = aws_secret_access_key,
                  region_name = 'us-west-2')

In [5]:
# Import all required libraries. Keep adding here as you code
import tensorflow as tf, tf_keras
import tensorflow_hub as hub
import cv2
import numpy as np
import pandas as pd
from tqdm import tqdm
import os
import dask
from dask.distributed import Client, progress, as_completed
import requests
tf.keras.backend.clear_session()

2023-11-30 18:39:50.997273: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2023-11-30 18:39:51.000509: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2023-11-30 18:39:51.045374: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [6]:
# Set working directory
%cd /home/ec2-user/models

/home/ec2-user/models


In [7]:
model_version = 'a2'

if model_version=='a3':
  max_frames = 120
  image_dims = (256,256)
elif model_version=='a2':
  max_frames = 80
  image_dims = (224,224)
elif model_version=='a0':
  max_frames = 50
  image_dims = (172,172)


from official.projects.movinet.modeling import movinet

if model_version == 'a3':
    from official.projects.movinet.modeling import movinet_model_a3_modified as movinet_model_modified
elif model_version == 'a2':
    from official.projects.movinet.modeling import movinet_model_a2_modified as movinet_model_modified


In [10]:
path = '/home/ec2-user/ASL-Translator/modeling/movinet_checkpoints_a2_epoch9'
with tf.device("/GPU:0"):
    model = tf_keras.models.load_model(path)







In [11]:
%%time

vals = model(tf.random.uniform((1,80,224,224,3), dtype=tf.float32))['vid_embedding']
vals.shape

CPU times: user 2min 8s, sys: 1.8 s, total: 2min 10s
Wall time: 31.6 s


TensorShape([1, 80, 768])

In [12]:
# Get a list of all the numpy files from S3

paginator = s3.get_paginator('list_objects_v2')
video_files = []
for page in paginator.paginate(Bucket = bucket_name, Prefix = 'msasl/RGB/'):
    video_files.extend(content['Key'] for content in page.get('Contents',[]) if content['Key'].endswith(('.npy')))
print(len(video_files))

13535


In [13]:
# Get a list of all the numpy files from S3

paginator = s3.get_paginator('list_objects_v2')
already_finished = []
for page in paginator.paginate(Bucket = bucket_name, Prefix = 'msasl/movinet_embedding/'):
    already_finished.extend(content['Key'] for content in page.get('Contents',[]) if content['Key'].endswith(('.npy')))
print(len(already_finished))

0


In [14]:
# Add the S3 prefix
video_files = ['s3://asl-capstone/'+x for x in video_files]
already_finished = ['s3://asl-capstone/'+x for x in already_finished]

In [15]:
# Let's check what the files look like
print(video_files[0:2])

['s3://asl-capstone/msasl/RGB/about.npy', 's3://asl-capstone/msasl/RGB/about_13.npy']


In [16]:
output_uri = 's3://asl-capstone/msasl/movinet_embedding/'

In [17]:
# Start a Dask client
client = Client(processes=False)
# Print the link to the dashboard
print("-------------------------------------------------------------")
print(client.dashboard_link)

-------------------------------------------------------------
http://172.31.42.63:8787/status


In [18]:
def process_video(vid):
  # Add a try except
  try:
      # Generate video embeddings and store to CPU
      filename = os.path.basename(vid)
      with fs.open(vid,"rb") as f:
          vid_file = np.load(f)
      vid_file = np.expand_dims(vid_file, axis=0)
      embeddings = model(vid_file)['vid_embedding'].cpu().numpy()
      with fs.open(f"{output_uri+filename}","wb") as f:
          np.save(f,embeddings)
  except:
      print(f"Error with processing {vid}")

In [None]:
%%time
# Use Dask's map function to apply the process_video function to each video
futures = client.map(process_video, video_files)

# Create a progress bar with tqdm
progress_bar = tqdm(total=len(video_files))

# Use as_completed to update the progress bar as futures complete
for future in as_completed(futures):
    result = future.result()  # This blocks until the future is complete
    progress_bar.update()

progress_bar.close()

# Wait for all futures to complete
client.gather(futures)

In [19]:
def get_instance_id():
    """Get current instance ID from metadata"""
    url = "http://169.254.169.254/latest/meta-data/instance-id"
    response = requests.get(url)
    return response.text

In [20]:
def stop_instance(instance_id, region_name='us-west-2'):
    """Stop the EC2 instance"""
    ec2 = boto3.client('ec2', aws_access_key_id = aws_access_key_id, aws_secret_access_key = aws_secret_access_key, region_name=region_name)
    ec2.stop_instances(InstanceIds=[instance_id])

In [21]:
# Get the current instance ID
instance_id = get_instance_id()
print(instance_id)
# Stop the instance
stop_instance(instance_id)

NameError: name 'requests' is not defined