In [None]:
from os.path import dirname, abspath, join as pjoin
from os import makedirs, mkdir
import scipy.io as sio
from pathlib import Path
import numpy as np
from sklearn.decomposition import PCA

In [None]:
data = sio.loadmat('/Users/ryangreen/Desktop/Procedure Learning Research/cygnus_data/ProceL/change_iphone_battery/data.mat')

In [None]:
### CREATE DIRECTORIES TO STORE ALL PRODUCED FEATURES

output_path = "/Users/ryangreen/Desktop/Procedure Learning Research/mapped_procel"
directory_name = "change_iphone_battery"
bkgrd_class_label = "BKGRD"

full_directory_path = pjoin(output_path, directory_name)
transcript_dir_path = pjoin(full_directory_path, "transcripts")
gt_dir_path = pjoin(full_directory_path, "groundTruth")
feature_dir_path = pjoin(full_directory_path, "features")

for directory in [full_directory_path, transcript_dir_path, gt_dir_path, feature_dir_path]:
    try:
        makedirs(directory)
    except FileExistsError as e:
        print ("ERROR - Creation of the directory %s failed: %s \n" % (directory, e.strerror))
    else:
        print ("SUCCESS - Created %s" % directory)

In [None]:
### CREATE MAPPING FILE FOR KEYSTEPS TO INDICES ###
mapping_arr = ['0 ' + bkgrd_class_label] + [str(count + 1) + ' ' + name[0].replace(" ", "_") for count, name in enumerate(data['grammar'][:,0])]
mapping_file = open(full_directory_path + "/mapping.txt","w")
mapping_file.writelines("\n".join(mapping_arr))
mapping_file.close()

In [None]:
def reduce_to_transcript(arr):
    out = []
    for i in range(len(arr)):
        if i == 0 or arr[i - 1] != arr[i]:
            out.append(arr[i])
    return out

In [None]:
### MAP SUPERFRAME KEYSTEP INTERVALS TO TRANSCRIPTS AND GT TEXT FILES ###

video_idx = 0
for video in data['key_steps'][:,0]:
    steps = [step_range.tolist() for step_range in video[:,0]]
    
    # make superframe mapping to all background class 'SIL'
    num_superframes = data['superframe_frame'][video_idx][0].shape[0]
    groundtruth = [bkgrd_class_label] * num_superframes
    
#     print('\n### Video ' + str(video_idx) + ' ' + str(num_superframes) + '\n')
    
    for step_idx in range(len(steps)):
        step = steps[step_idx]
        key_step_name = data['grammar'][:,0][step_idx][0].replace(" ", "_")
        
        for segment in step:
            if len(segment) == 2:
#                 print("Interval:" + str(segment))
                for sf_idx in range(segment[0], segment[1] + 1):
#                     print(sf_idx)
                    if sf_idx < num_superframes: # shouldn't need - check with Zijia
                        groundtruth[sf_idx] = key_step_name
    
    transcript = reduce_to_transcript(groundtruth)
            
    video_name = directory_name + "_" + str(video_idx).zfill(4)
    
    np.save( feature_dir_path + "/" + video_name, np.concatenate((data['feature_hofs_sf'][video_idx][0], data['feature_vgg'][video_idx][0]), axis=0))
    
    
    # write out groundtruth
    output_file = open(gt_dir_path + "/" + video_name + ".txt","w") 
    output_file.writelines("\n".join(groundtruth))
    output_file.close()
    
    # reduce groundtruth array to transcript
    output_file = open(transcript_dir_path + "/" + video_name + ".txt","w") 
    output_file.writelines("\n".join(transcript))
    output_file.close()
    
    video_idx += 1

In [None]:
def flatten_video_features(features):
    return np.concatenate([video_features for video_features in features], axis=1)

In [None]:
def reduce_and_transform_features(reduced_feature_dim, output_dir_path, make_directory=True):

    if make_directory: mkdir(output_dir_path)
    
    num_videos = data['feature_hofs_sf'].shape[0]
    hofs_features = data['feature_hofs_sf'][:,0]
    vgg_features = data['feature_vgg'][:,0]

    if num_videos != len(vgg_features):
        print('Num videos does not match features', num_videos, len(hofs_features), len(vgg_features))
        return

    # concatenate vgg and hofs_sf features
    combined_features = [None] * num_videos
    
    for video_idx in range(num_videos):
        combined_features[video_idx] = np.concatenate((hofs_features[video_idx], vgg_features[video_idx]), axis=0)
    
    
    # compute transformation matrix
    pca = PCA(n_components=reduced_feature_dim)
    # flatten and transform data to be (num_features)
    flat_features = np.swapaxes(flatten_video_features(combined_features), 0, 1)
    pca.fit(flat_features)
    
    for video_idx in range(num_videos):
        transformed_vid_features = np.swapaxes(pca.transform(np.swapaxes(combined_features[video_idx], 0, 1)), 0, 1)
        video_name = directory_name + "_" + str(video_idx).zfill(4)
        np.save( output_dir_path + "/" + video_name, transformed_vid_features)
    
    return pca.explained_variance_ratio_

In [None]:
explained_var = reduce_and_transform_features(300, pjoin(feature_dir_path, '300_features'), True)

In [None]:
import matplotlib.pyplot as plt
var1=np.round(np.cumsum(explained_var * 100), decimals=4)

plt.plot(var1)