# Stanford 40
### Download the data
You can see the zip files if you click the Files tab (looks like a folder symbol on the left of the screen)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
def onceSF():
  #Import dataset
  !wget http://vision.stanford.edu/Datasets/Stanford40_JPEGImages.zip
  !wget http://vision.stanford.edu/Datasets/Stanford40_ImageSplits.zip

  #Unzip it
  !unzip Stanford40_JPEGImages.zip -d Stanford40/
  !unzip Stanford40_ImageSplits.zip -d Stanford40/

  #Load dataset
  with open('Stanford40/ImageSplits/train.txt', 'r') as f:
    train_files = list(map(str.strip, f.readlines()))
    train_labels = ['_'.join(name.split('_')[:-1]) for name in train_files]
    print(f'Train files ({len(train_files)}):\n\t{train_files}')
    print(f'Train labels ({len(train_labels)}):\n\t{train_labels}\n')

  with open('Stanford40/ImageSplits/test.txt', 'r') as f:
      test_files = list(map(str.strip, f.readlines()))
      test_labels = ['_'.join(name.split('_')[:-1]) for name in test_files]
      print(f'Test files ({len(test_files)}):\n\t{test_files}')
      print(f'Test labels ({len(test_labels)}):\n\t{test_labels}\n')
      
  action_categories = sorted(list(set(['_'.join(name.split('_')[:-1]) for name in train_files])))
  print(f'Action categories ({len(action_categories)}):\n{action_categories}')

  #Visualise image in dataset
  # import cv2
  # from google.colab.patches import cv2_imshow

  # image_no = 234  # change this to a number between [0, 3999] and you can see a different training image
  # img = cv2.imread(f'Stanford40/JPEGImages/{train_files[image_no]}')
  # cv2_imshow(img)
  # print(f'An image with the label - {train_labels[image_no]}')

  return True


--2022-04-02 10:47:05--  http://vision.stanford.edu/Datasets/Stanford40_JPEGImages.zip
Resolving vision.stanford.edu (vision.stanford.edu)... 171.64.68.10
Connecting to vision.stanford.edu (vision.stanford.edu)|171.64.68.10|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 304771808 (291M) [application/zip]
Saving to: ‘Stanford40_JPEGImages.zip’


2022-04-02 10:47:32 (10.5 MB/s) - ‘Stanford40_JPEGImages.zip’ saved [304771808/304771808]

--2022-04-02 10:47:33--  http://vision.stanford.edu/Datasets/Stanford40_ImageSplits.zip
Resolving vision.stanford.edu (vision.stanford.edu)... 171.64.68.10
Connecting to vision.stanford.edu (vision.stanford.edu)|171.64.68.10|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 62010 (61K) [application/zip]
Saving to: ‘Stanford40_ImageSplits.zip’


2022-04-02 10:47:33 (285 KB/s) - ‘Stanford40_ImageSplits.zip’ saved [62010/62010]



### Unzip it

### Read the train and test splits and also create a separate labels lists

In [None]:
import cv2
import numpy as np
import tensorflow as tf



""" Load the Standford40 dataset, perform data preprocessing """

def loadSF40(img_size=(224,224)):
 
    with open('Stanford40/ImageSplits/train.txt', 'r') as f:
        train_files = list(map(str.strip, f.readlines()))
        train_labels = ['_'.join(name.split('_')[:-1]) for name in train_files]
        #print(f'Train files ({len(train_files)}):\n\t{train_files}')
        #print(f'Train labels ({len(train_labels)}):\n\t{train_labels}\n')
    
    with open('Stanford40/ImageSplits/test.txt', 'r') as f:
        test_files = list(map(str.strip, f.readlines()))
        test_labels = ['_'.join(name.split('_')[:-1]) for name in test_files]
        #print(f'Test files ({len(test_files)}):\n\t{test_files}')
        #print(f'Test labels ({len(test_labels)}):\n\t{test_labels}\n')
        print("Lengt of train_files: ", len(train_files))
        
    action_categories = sorted(list(set(['_'.join(name.split('_')[:-1]) for name in train_files])))
    print(f'Action categories ({len(action_categories)}):\n{action_categories}')
    
    
      #Read images, resize to correct size and put into list
    SF_training_set = np.empty((len(train_files), img_size[0],img_size[1], 3))
    for i in range(len(train_files)):
        img = cv2.imread(f'Stanford40/JPEGImages/{train_files[i]}')
        img = cv2.resize(img, img_size)
        np.asarray(img)
        SF_training_set[i] = img
      

    SF_test_set = np.empty((len(test_files), img_size[0],img_size[1], 3))
    for im in range(len(test_files)):
        img = cv2.imread(f'Stanford40/JPEGImages/{test_files[i]}')
        img = cv2.resize(img, img_size)
        np.asarray(img)
        SF_test_set[i] = img
    
    #Preprocess data with dataPreprocessing function
    print("Going into preprocessing!") #Debug purposes
    SF_training_set, SF_test_set = dataPreprocessing(SF_training_set, SF_test_set)
    print("Done with preprocessing")
    
    return (SF_training_set, train_labels, SF_test_set, test_labels, action_categories)


""" Load the TVHI dataset, do data preprocessing """

def loadTVHIData(img_size=(224,224)):
    
    set_1_indices = [[2,14,15,16,18,19,20,21,24,25,26,27,28,32,40,41,42,43,44,45,46,47,48,49,50],
                    [1,6,7,8,9,10,11,12,13,23,24,25,27,28,29,30,31,32,33,34,35,44,45,47,48],
                    [2,3,4,11,12,15,16,17,18,20,21,27,29,30,31,32,33,34,35,36,42,44,46,49,50],
                    [1,7,8,9,10,11,12,13,14,16,17,18,22,23,24,26,29,31,35,36,38,39,40,41,42]]
    set_2_indices = [[1,3,4,5,6,7,8,9,10,11,12,13,17,22,23,29,30,31,33,34,35,36,37,38,39],
                    [2,3,4,5,14,15,16,17,18,19,20,21,22,26,36,37,38,39,40,41,42,43,46,49,50],
                    [1,5,6,7,8,9,10,13,14,19,22,23,24,25,26,28,37,38,39,40,41,43,45,47,48],
                    [2,3,4,5,6,15,19,20,21,25,27,28,30,32,33,34,37,43,44,45,46,47,48,49,50]]
    classes = ['handShake', 'highFive', 'hug', 'kiss']  # we ignore the negative class

    # test set
    set_1 = [f'{classes[c]}_{i:04d}.avi' for c in range(len(classes)) for i in set_1_indices[c]]
    set_1_label = [f'{classes[c]}' for c in range(len(classes)) for i in set_1_indices[c]]
    print(f'Set 1 to be used for test ({len(set_1)}):\n\t{set_1}')
    print(f'Set 1 labels ({len(set_1_label)}):\n\t{set_1_label}\n')

    # training set
    set_2 = [f'{classes[c]}_{i:04d}.avi' for c in range(len(classes)) for i in set_2_indices[c]]
    set_2_label = [f'{classes[c]}' for c in range(len(classes)) for i in set_2_indices[c]]
    print(f'Set 2 to be used for train and validation ({len(set_2)}):\n\t{set_2}')
    print(f'Set 2 labels ({len(set_2_label)}):\n\t{set_2_label}')
    
    
    # Take middle frame from each video in TVHI dataset
    TVHI_training_set = []
    for video in set_2:
        vidcap = cv2.VideoCapture(f'../TVHI_data/tV_human_interactions_videos/{video}')
        middle_frame = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT)/2)
        vidcap.set(cv2.CAP_PROP_POS_FRAMES, middle_frame) #Get the middle frame of the video
        success, image = vidcap.read()
        if success:
            frame = cv2.resize(frame, img_size)
            TVHI_training_set.append(frame)
            
    TVHI_test_set = []
    for video in set_1:
        vidcap = cv2.VideoCapture(f'../TVHI_data/tV_human_interactions_videos/{video}')
        middle_frame = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT)/2)
        vidcap.set(cv2.CAP_PROP_POS_FRAMES, middle_frame)
        success, image = vidcap.read()
        if success:
            TVHI_test_set.append(image)
    
    TVHI_training_set, TVHI_test_set = dataPreprocessing(TVHI_training_set, TVHI_test_set)
    train_labels = set_2_label
    test_labels = set_1_label
    
    return (TVHI_training_set, train_labels, TVHI_test_set, test_labels, classes) 


""" Data Preprocessing Function """

def dataPreprocessing(training_images, test_images, padding = False):
    
    print("Shape 1 :" ,training_images[0].shape)
    
    #Use function reshape to reshape the images to 224,224 
    # training_images = training_images.reshape(training_images.shape[0], img_size[0], img_size[1], 3)
    # test_images = test_images.reshape(test_images.shape[0], img_size[0], img_size[1], 3)

    # normalise -scale to range 0-1   
    # training_images = training_images / 255.0
    # test_images = test_images / 255.0
    # print("Done!")


    if padding:
        print(f"Image shape before: {training_images[0].shape}")

        # Pad images with 0s since we want information in the edges
        padding_size = ((0,0),(2,2),(2,2))
        training_images      = np.pad(training_images, padding_size, 'constant')
        test_images       = np.pad(test_images, padding_size, 'constant')

        print(f"Updated Image Shape: {training_images[0].shape}.")

    return training_images, test_images


""" Function for calculating the optical flow with Farnebäck algorithm """

def opticalFlowCalculator(video_path, img_size=(224,224)):
    optical_flow_data = []
    
    for video in video_path:

        vidcap = cv2.VideoCapture(f'../TVHI_data/tV_human_interactions_videos/{video}') # get video
        middle_frame = int((vidcap.get(cv2.CAP_PROP_FRAME_COUNT)/2)-8)      # get index of middle frame, set to -8 frames back so that when we take stack of frames, the middle one will be in the middle of the stack
        vidcap.set(cv2.CAP_PROP_POS_FRAMES, middle_frame)               # set the video to the middle frame    
        success, old_frame = vidcap.read()                              # read image

        hsv = np.zeros_like(old_frame) 
        hsv[...,1] = 255                                                # Set HSV's Value-channel to constant

        old_frame = cv2.cvtColor(old_frame, cv2.COLOR_BGR2GRAY)         # Convert to grayscale to fit algorithm (Farneback)
        old_frame  = cv2.resize(old_frame, img_size)                   # Resize image to fit the other data

        stackOFframes = []
        
        OF_params = [0.5, 3, 15, 3, 5, 1.2, 0] #default Farnebacks parameters
        
        for i in range(16): #Loop over 16 frames, middle frame will be middle of stack
            success, new_frame = vidcap.read()
            if not success:
                break
            
            #Do preprocessing of new frame 
            new_frame  = cv2.cvtColor(new_frame,cv2.COLOR_BGR2GRAY)
            new_frame  = cv2.resize(new_frame, img_size)

            flow = cv2.calcOpticalFlowFarneback(old_frame,new_frame, None, OF_params)   # calculate the optical flow for each pixel in the frame with Farneback

            #Encode the optical flow as polar coordinates and impose color coding. Convert to bgr image.
            mag, ang = cv2.cartToPolar(flow[..., 0], flow[..., 1])
            hsv[..., 0] = ang * 180 / np.pi / 2
            hsv[..., 2] = cv2.normalize(mag, None, 0, 255, cv2.NORM_MINMAX)
            bgr = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)

            stackOFframes.append(bgr)           # add the stack of 16 frames to list

            old_frame = new_frame               # update the previous frame to current frame
                
        optical_flow_data.append(np.asarray(stackOFframes))     #make the stack of frames into an np array and store in general optical flow data list
    
    return optical_flow_data


""" the Optical Flow input to the CNN """

def opticalFlowInput():
    
    #Take relevant data and create test and training set (set 1 = test set,set 2 = training set)
    set_1_indices = [[2,14,15,16,18,19,20,21,24,25,26,27,28,32,40,41,42,43,44,45,46,47,48,49,50],
                 [1,6,7,8,9,10,11,12,13,23,24,25,27,28,29,30,31,32,33,34,35,44,45,47,48],
                 [2,3,4,11,12,15,16,17,18,20,21,27,29,30,31,32,33,34,35,36,42,44,46,49,50],
                 [1,7,8,9,10,11,12,13,14,16,17,18,22,23,24,26,29,31,35,36,38,39,40,41,42]]
    set_2_indices = [[1,3,4,5,6,7,8,9,10,11,12,13,17,22,23,29,30,31,33,34,35,36,37,38,39],
                    [2,3,4,5,14,15,16,17,18,19,20,21,22,26,36,37,38,39,40,41,42,43,46,49,50],
                    [1,5,6,7,8,9,10,13,14,19,22,23,24,25,26,28,37,38,39,40,41,43,45,47,48],
                    [2,3,4,5,6,15,19,20,21,25,27,28,30,32,33,34,37,43,44,45,46,47,48,49,50]]
    classes = ['handShake', 'highFive', 'hug', 'kiss']  # we ignore the negative class

    # test set
    set_1 = [f'{classes[c]}_{i:04d}.avi' for c in range(len(classes)) for i in set_1_indices[c]]
    set_1_label = [f'{classes[c]}' for c in range(len(classes)) for i in set_1_indices[c]]
    print(f'Set 1 to be used for test ({len(set_1)}):\n\t{set_1}')
    print(f'Set 1 labels ({len(set_1_label)}):\n\t{set_1_label}\n')

    # training set
    set_2 = [f'{classes[c]}_{i:04d}.avi' for c in range(len(classes)) for i in set_2_indices[c]]
    set_2_label = [f'{classes[c]}' for c in range(len(classes)) for i in set_2_indices[c]]
    print(f'Set 2 to be used for train and validation ({len(set_2)}):\n\t{set_2}')
    print(f'Set 2 labels ({len(set_2_label)}):\n\t{set_2_label}')
    
    
    training_data = opticalFlowCalculator(set_2)
    testing_data = opticalFlowCalculator(set_1)
    train_labels = set_2_label
    test_labels = set_1_label
    
    return (training_data, train_labels, testing_data, test_labels)

### Visualize a photo from the training files and also print its label

# TV Human Interaction (TV-HI)
### Download the dataset

In [None]:
# !wget http://www.robots.ox.ac.uk/~alonso/data/tv_human_interactions_videos.tar.gz
# !wget http://www.robots.ox.ac.uk/~alonso/data/readme.txt

--2022-04-02 11:40:42--  http://www.robots.ox.ac.uk/~alonso/data/tv_human_interactions_videos.tar.gz
Resolving www.robots.ox.ac.uk (www.robots.ox.ac.uk)... 129.67.94.2
Connecting to www.robots.ox.ac.uk (www.robots.ox.ac.uk)|129.67.94.2|:80... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://www.robots.ox.ac.uk/~alonso/data/tv_human_interactions_videos.tar.gz [following]
--2022-04-02 11:40:42--  https://www.robots.ox.ac.uk/~alonso/data/tv_human_interactions_videos.tar.gz
Connecting to www.robots.ox.ac.uk (www.robots.ox.ac.uk)|129.67.94.2|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 163535078 (156M) [application/x-gzip]
Saving to: ‘tv_human_interactions_videos.tar.gz.1’


2022-04-02 11:40:49 (26.6 MB/s) - ‘tv_human_interactions_videos.tar.gz.1’ saved [163535078/163535078]

--2022-04-02 11:40:49--  http://www.robots.ox.ac.uk/~alonso/data/readme.txt
Resolving www.robots.ox.ac.uk (www.robots.ox.ac.uk)... 129.67.94.2
Conn

### Untar compressed files and move the readme.txt into TV-HI folder

In [None]:
# !mkdir TV-HI
# !tar -xvf  'tv_human_interactions_videos.tar.gz' -C TV-HI
# !mv readme.txt 'TV-HI/readme.txt'

mkdir: cannot create directory ‘TV-HI’: File exists
tv_human_interactions_videos/
tv_human_interactions_videos/negative_0001.avi
tv_human_interactions_videos/negative_0002.avi
tv_human_interactions_videos/negative_0003.avi
tv_human_interactions_videos/negative_0004.avi
tv_human_interactions_videos/negative_0005.avi
tv_human_interactions_videos/negative_0006.avi
tv_human_interactions_videos/negative_0007.avi
tv_human_interactions_videos/negative_0008.avi
tv_human_interactions_videos/negative_0009.avi
tv_human_interactions_videos/negative_0010.avi
tv_human_interactions_videos/negative_0011.avi
tv_human_interactions_videos/negative_0012.avi
tv_human_interactions_videos/negative_0013.avi
tv_human_interactions_videos/negative_0014.avi
tv_human_interactions_videos/negative_0015.avi
tv_human_interactions_videos/negative_0016.avi
tv_human_interactions_videos/negative_0017.avi
tv_human_interactions_videos/negative_0018.avi
tv_human_interactions_videos/negative_0019.avi
tv_human_interactions_vid

### Let us copy the Set 1 and Set 2 indices from the readme.txt and create our lists
You can check the readme.txt in TV-HI directory which you can find from the Files tab on the left (little Folder symbol). If you cannot see it try to refresh the Files tab (little folder symbol with a circular arrow on it). Then you can double click readme.txt to see its content. You won't need to do anything with this file because the code below creates the sets and labels for you.

In [None]:
# set_1_indices = [[2,14,15,16,18,19,20,21,24,25,26,27,28,32,40,41,42,43,44,45,46,47,48,49,50],
#                  [1,6,7,8,9,10,11,12,13,23,24,25,27,28,29,30,31,32,33,34,35,44,45,47,48],
#                  [2,3,4,11,12,15,16,17,18,20,21,27,29,30,31,32,33,34,35,36,42,44,46,49,50],
#                  [1,7,8,9,10,11,12,13,14,16,17,18,22,23,24,26,29,31,35,36,38,39,40,41,42]]
# set_2_indices = [[1,3,4,5,6,7,8,9,10,11,12,13,17,22,23,29,30,31,33,34,35,36,37,38,39],
#                  [2,3,4,5,14,15,16,17,18,19,20,21,22,26,36,37,38,39,40,41,42,43,46,49,50],
#                  [1,5,6,7,8,9,10,13,14,19,22,23,24,25,26,28,37,38,39,40,41,43,45,47,48],
#                  [2,3,4,5,6,15,19,20,21,25,27,28,30,32,33,34,37,43,44,45,46,47,48,49,50]]
# classes = ['handShake', 'highFive', 'hug', 'kiss']  # we ignore the negative class

# # test set
# set_1 = [f'{classes[c]}_{i:04d}.avi' for c in range(len(classes)) for i in set_1_indices[c]]
# set_1_label = [f'{classes[c]}' for c in range(len(classes)) for i in set_1_indices[c]]
# print(f'Set 1 to be used for test ({len(set_1)}):\n\t{set_1}')
# print(f'Set 1 labels ({len(set_1_label)}):\n\t{set_1_label}\n')

# # training set
# set_2 = [f'{classes[c]}_{i:04d}.avi' for c in range(len(classes)) for i in set_2_indices[c]]
# set_2_label = [f'{classes[c]}' for c in range(len(classes)) for i in set_2_indices[c]]
# print(f'Set 2 to be used for train and validation ({len(set_2)}):\n\t{set_2}')
# print(f'Set 2 labels ({len(set_2_label)}):\n\t{set_2_label}')


Set 1 to be used for test (100):
	['handShake_0002.avi', 'handShake_0014.avi', 'handShake_0015.avi', 'handShake_0016.avi', 'handShake_0018.avi', 'handShake_0019.avi', 'handShake_0020.avi', 'handShake_0021.avi', 'handShake_0024.avi', 'handShake_0025.avi', 'handShake_0026.avi', 'handShake_0027.avi', 'handShake_0028.avi', 'handShake_0032.avi', 'handShake_0040.avi', 'handShake_0041.avi', 'handShake_0042.avi', 'handShake_0043.avi', 'handShake_0044.avi', 'handShake_0045.avi', 'handShake_0046.avi', 'handShake_0047.avi', 'handShake_0048.avi', 'handShake_0049.avi', 'handShake_0050.avi', 'highFive_0001.avi', 'highFive_0006.avi', 'highFive_0007.avi', 'highFive_0008.avi', 'highFive_0009.avi', 'highFive_0010.avi', 'highFive_0011.avi', 'highFive_0012.avi', 'highFive_0013.avi', 'highFive_0023.avi', 'highFive_0024.avi', 'highFive_0025.avi', 'highFive_0027.avi', 'highFive_0028.avi', 'highFive_0029.avi', 'highFive_0030.avi', 'highFive_0031.avi', 'highFive_0032.avi', 'highFive_0033.avi', 'highFive_0034.a

In [None]:
# from moviepy.editor import *

# video_no = 98  # change this to a number between [0, 100] and you can see a different training video from Set 2

# clip=VideoFileClip(f'TV-HI/tv_human_interactions_videos/{set_2[video_no]}')
# print(f'\n\nA video with the label - {set_2_label[video_no]}\n')
# clip.ipython_display(width=280)

Imageio: 'ffmpeg-linux64-v3.3.1' was not found on your computer; downloading it now.
Try 1. Download from https://github.com/imageio/imageio-binaries/raw/master/ffmpeg/ffmpeg-linux64-v3.3.1 (43.8 MB)
Downloading: 8192/45929032 bytes (0.0%)2670592/45929032 bytes (5.8%)5922816/45929032 bytes (12.9%)9560064/45929032 bytes (20.8%)13107200/45929032 bytes (28.5%)16433152/45929032 bytes (35.8%)19841024/45929032 bytes (43.2%)23355392/45929032 bytes (50.9%)26574848/45929032 bytes (57.9%)29409280/45929032 bytes (64.0%)32948224/45929032 bytes (71.7%)36339712/45929032 bytes (79.1%)39657472/45929032 bytes (86.3%)

100%|██████████| 51/51 [00:00<00:00, 630.45it/s]
100%|██████████| 55/55 [00:00<00:00, 283.97it/s]
