<a href="https://colab.research.google.com/github/VinceChin/-/blob/master/WOA7015_GroupProjects_ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Processing the Data

## Downlaod SSBD datasets and extracts

In [None]:
import requests
import zipfile
import os

def download_and_unzip(url, extract_to='.'):
    """
    Downloads a ZIP file from a given URL and extracts its contents into a specified directory.
    Includes error handling for download and extraction processes.

    Parameters:
    url (str): The URL of the ZIP file.
    extract_to (str): The directory to extract the contents to. Defaults to the current directory.
    """
    # Create a directory for the dataset if it doesn't exist
    if not os.path.exists(extract_to):
        os.makedirs(extract_to)

    try:
        # Attempt to download the file
        response = requests.get(url)
        response.raise_for_status()  # This will raise an HTTPError if the HTTP request returned an unsuccessful status code

        zip_file_name = os.path.join(extract_to, 'temp.zip')

        # Save the ZIP file
        with open(zip_file_name, 'wb') as file:
            file.write(response.content)

        try:
            # Attempt to extract the ZIP file
            with zipfile.ZipFile(zip_file_name, 'r') as zip_ref:
                zip_ref.extractall(extract_to)
        except zipfile.BadZipFile:
            print("Failed to unzip the file. The file may be corrupted.")
        finally:
            # Clean up: delete the ZIP file
            os.remove(zip_file_name)

    except requests.exceptions.HTTPError as http_err:
        print(f"HTTP error occurred: {http_err}")
    except requests.exceptions.ConnectionError as conn_err:
        print(f"Connection error occurred: {conn_err}")
    except Exception as e:
        print(f"An error occurred: {e}")

# Example usage
download_url = "https://rolandgoecke.files.wordpress.com/2019/11/ssbd-release.zip"
download_and_unzip(download_url, extract_to='ssbd')

## Download youtube videos and check donwload results

In [None]:
!pip install pytube

Collecting pytube
  Downloading pytube-15.0.0-py3-none-any.whl (57 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.6/57.6 kB[0m [31m734.0 kB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pytube
Successfully installed pytube-15.0.0


### Parsing xmls from datasets

In [None]:
import xml.etree.ElementTree as ET
import os
import pandas as pd
from pytube import YouTube

def parse_xml(xml_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()

    video_data = {
        'xml_name': os.path.basename(xml_file),
        'url': root.find('url').text,
        'height': root.find('height').text,
        'width': root.find('width').text,
        'frames': root.find('frames').text,
        'persons': root.find('persons').text,
        'duration': root.find('duration').text,
        'conversation': root.find('conversation').text,
        'behaviours': []
    }

    for behaviour in root.find('behaviours'):
        behaviour_data = {
            'time': behaviour.find('time').text,
            'bodypart': behaviour.find('bodypart').text,
            'category': behaviour.find('category').text,
            'intensity': behaviour.find('intensity').text,
            'modality': behaviour.find('modality').text
        }
        video_data['behaviours'].append(behaviour_data)

    return video_data

### Download Videos

In [None]:
def download_youtube_video(url, save_path, xml_name):
    try:
        yt = YouTube(url)
        stream = yt.streams.get_highest_resolution()
        # Use XML file name for the video, but keep the original video format
        video_format = stream.mime_type.split('/')[-1]
        video_filename = f"{xml_name}.{video_format}"
        stream.download(output_path=save_path, filename=video_filename)
        return True
    except Exception as e:
        print(f"Failed to download {url}: {e}")
        return False

def process_xml_files(directory, video_directory):
    all_data = []

    if not os.path.exists(video_directory):
        os.makedirs(video_directory)

    for filename in os.listdir(directory):
        if filename.endswith('.xml'):
            xml_file = os.path.join(directory, filename)
            video_data = parse_xml(xml_file)

            video_file_name = video_data['xml_name']  # XML file name without extension
            download_status = download_youtube_video(video_data['url'], video_directory, video_file_name)
            video_data['download_status'] = download_status

            all_data.append(video_data)

    return all_data

# Example usage
annotations_directory = '/content/ssbd/Annotations'  # Your XML files directory
videos_directory = 'downloaded_videos'  # Directory to store downloaded videos

all_video_data = process_xml_files(annotations_directory, videos_directory)

# Convert list to pandas DataFrame
all_video_data_df = pd.DataFrame(all_video_data)

Failed to download http://www.youtube.com/watch?v=5BVFjqo0FUY: 5BVFjqo0FUY is unavailable
Failed to download http://www.youtube.com/watch?v=T9rbit_oiJA: T9rbit_oiJA is a private video
Failed to download http://www.youtube.com/watch?v=ehlLfMossUY: ehlLfMossUY is a private video
Failed to download http://www.youtube.com/watch?v=Pqd9Vu-juPI: Pqd9Vu-juPI is a private video
Failed to download http://www.youtube.com/watch?v=zuoD4tEtYyk: zuoD4tEtYyk is a private video
Failed to download http://www.youtube.com/watch?v=xeKKMkVgNPU: xeKKMkVgNPU is a private video
Failed to download http://www.youtube.com/watch?v=5WTHMIJ_61I: 'streamingData'
Failed to download http://www.youtube.com/watch?v=-rC-ab0nzxY: -rC-ab0nzxY is unavailable
Failed to download http://www.youtube.com/watch?v=Pqd9Vu-juPI: Pqd9Vu-juPI is a private video
Failed to download http://www.youtube.com/watch?v=TH5mlAhdw00: TH5mlAhdw00 is a private video
Failed to download http://www.youtube.com/watch?v=5sgfS0SSh8o: 5sgfS0SSh8o is age r

### Display Download results

In [None]:
import pandas as pd

# Example function to determine the category from xml_name
def determine_category(xml_name):
  lower_name = xml_name.lower()

  if 'armflapping' in lower_name:
    return 'armflapping'
  elif 'headbanging' in lower_name:
    return 'headbanging'
  elif 'spinning' in lower_name:
    return 'spinning'
  else:
    return 'unknown'

# Assuming all_video_data_df is your DataFrame
# Add category column
all_video_data_df['category'] = all_video_data_df['xml_name'].apply(determine_category)

# Prepare a list to hold the success rates for each category
category_success_rates = []

# Calculate success rate for each category
for category in ['armflapping', 'headbanging', 'spinning']:
    category_data = all_video_data_df[all_video_data_df['category'] == category]
    success_count = category_data['download_status'].sum()
    total_count = category_data.shape[0]
    success_rate = (success_count / total_count) * 100 if total_count > 0 else 0
    category_success_rates.append({'Category': category, 'Success Rate': success_rate,
                                   'success_count': success_count, 'total_count': total_count})

# Convert list to DataFrame
success_rate_df = pd.DataFrame(category_success_rates)

# Display the DataFrame as a table
print(success_rate_df)


      Category  Success Rate  success_count  total_count
0  armflapping          72.0             18           25
1  headbanging          68.0             17           25
2     spinning          76.0             19           25


# Build & Train the model


In [None]:
!pip install mmaction

Collecting mmaction
  Using cached mmaction-0.5.0-py2.py3-none-any.whl (106 kB)
Collecting mmcv (from mmaction)
  Using cached mmcv-2.1.0.tar.gz (471 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting Pillow<=6.2.2 (from mmaction)
  Using cached Pillow-6.2.2-cp310-cp310-linux_x86_64.whl
Collecting addict (from mmcv->mmaction)
  Using cached addict-2.4.0-py3-none-any.whl (3.8 kB)
Collecting mmengine>=0.3.0 (from mmcv->mmaction)
  Using cached mmengine-0.10.1-py3-none-any.whl (450 kB)
Collecting yapf (from mmcv->mmaction)
  Using cached yapf-0.40.2-py3-none-any.whl (254 kB)
Building wheels for collected packages: mmcv
  Building wheel for mmcv (setup.py) ... [?25l[?25hdone
  Created wheel for mmcv: filename=mmcv-2.1.0-cp310-cp310-linux_x86_64.whl size=27499312 sha256=947f40a8bc27cf299fd1575e0fc5fd501e27b02a73171b901cf5a1c2aa231028
  Stored in directory: /root/.cache/pip/wheels/8b/09/35/94a7f7ba6a00e3810abd0492340e4cbba0ff3d443120a94120
Successfully built mmcv
Installin