## image classification using tensorflow for handwashing    
### methodology:
- seperate into train and test data
- seperate into images

In [27]:
import os
import random
import shutil
import cv2

``input_dir`` contains the video files    
``output_frames_dir_test`` will contain 30% of the frames         
``output_frames_dir_train`` will contain the other 70% of the frames        

In [88]:
input_dir = '/Users/andrewbahsoun/Documents/computer_science/sci-250/hand-washing/data/HandWashDataset/'
output_frames_dir_test = '/Users/andrewbahsoun/Documents/computer_science/sci-250/hand-washing/data/outputFrames/test'
output_frames_dir_train = '/Users/andrewbahsoun/Documents/computer_science/sci-250/hand-washing/data/outputFrames/train'

steps = ['nostep0', 'Step_1', 'Step_2', 'Step_3', 'Step_4', 'Step_5', 'Step_6', 'Step_7', 'Step_8', 'Step_9', 'Step_10', 'Step_11', 'Step_12' ]


Creates a dictonary with 12 classes, the key is the step number, the value is a list with the path of all the videos that will enter the list. 

In [89]:
all_file_names_dict = {
    1: [],
    2: [],
    3: [],
    4: [],
    5: [],
    6: [],
    7: [],
    8: [],
    9: [],
    10: [],
    11: [],
    12: []
}


for step in range(1, 13):
    
    for name in os.listdir(os.path.join(input_dir, steps[step])):
        # Open file
        with open(os.path.join(input_dir, steps[step], name)) as f:
            all_file_names_dict[step].append(name)



### get all videos into frames

In [86]:
'''
This function reads a video file, extracts each frame, and saves the frames as JPEG images in the specified output directory. 
It uses OpenCV to read the video, processes frames sequentially, and assigns filenames based on the frame number and the original video filename. 
The process continues until all frames are saved.
'''
def get_frames_from_video(directory, filename, step, output_frames_dir):
    # Creating a VideoCapture object to read the video
    cap = cv2.VideoCapture(os.path.join(directory, steps[step], filename))

    is_success, image = cap.read()
    frame_number = 0

    while is_success:
        out_filename = "frame_{}_{}.jpg".format(frame_number, os.path.splitext(filename)[0])
        save_path_and_name = os.path.join(output_frames_dir, out_filename)
        cv2.imwrite(save_path_and_name, image)
        is_success, image = cap.read()
        frame_number += 1


This code splits videos from each step into training and testing datasets based on a `test_ratio` of 30% for testing. It processes each video, skips the system files `.DS_Store`, and extracts frames using `get_frames_from_video`, saving them in the appropriate output directories. The `counter` ensures videos are distributed correctly between training and testing.

In [95]:
counter = 0
test_ratio = 0.3

for step in range(1,13):
    counter = 0
    for video in all_file_names_dict[step]:
        if (video != ".DS_Store"):

            if ((len(all_file_names_dict) * (1-test_ratio) ) < counter):
                #train data
                get_frames_from_video(input_dir, video, step, output_frames_dir_train)
            else:
                #test data
                get_frames_from_video(input_dir, video, step, output_frames_dir_test)
            counter += 1
    

#### debugging purposes 
this will print out all the files in the directory onto a file

In [31]:
def find_num_of_videos_per_step(directory, output_filename):
    with open(output_filename, 'a') as f:  # Open the file in append mode
        for name in os.listdir(directory):
            f.write(name + '\n')  # Write each name on a new line


In [None]:
find_num_of_videos_per_step(output_frames_dir_test, 'test_videos')
find_num_of_videos_per_step(output_frames_dir_train, 'train_videos')

### get photos into directories by class

In [52]:
def move_video_into_subdirectory_onedigit(directory, output_dir, step):
    for name in os.listdir(directory):
            if ("A_0" + str(step)) in name:
                shutil.move(os.path.join(directory, name), output_dir)
    

In [53]:
def move_video_into_subdirectory_twodigit(directory, output_dir, step):
    for name in os.listdir(directory):
            if ("A_" + str(step)) in name:
                shutil.move(os.path.join(directory, name), output_dir)

In [57]:
#moving all test photos step(1-9) into their respective directories
for step in range(1, 10):
    move_video_into_subdirectory_onedigit(output_frames_dir_test, os.path.join(output_frames_dir_test,('step_' + str(step))), step)

In [58]:
#moving all test photos step(10-12) into their respective directories
for step in range(10, 13):
    move_video_into_subdirectory_twodigit(output_frames_dir_test, os.path.join(output_frames_dir_test,('step_' + str(step))), step)

In [67]:
#moving all train photos step(1-9) into their respective directories
for step in range(1, 10):
    move_video_into_subdirectory_onedigit(output_frames_dir_train, os.path.join(output_frames_dir_train,('step_' + str(step))), step)

In [73]:
#moving all train photos step(10-12) into their respective directories
for step in range(10, 13):
    move_video_into_subdirectory_twodigit(output_frames_dir_train, os.path.join(output_frames_dir_train,('step_' + str(step))), step)