
# 🕺🕺🕺 Follow Your Pose 💃💃💃: 
# Pose-Guided Text-to-Video Generation using Pose-Free Videos

In [None]:
#@markdown Check type of GPU and VRAM available.
!nvidia-smi - -query-gpu = name, memory.total, memory.free - -format = csv, noheader
#make sure you are using Tesla T4, 15360 MiB, 15101 MiB


# 🕺🕺🕺 Install Environment

In [None]:
# @title  Environment Setup
!apt-get update
!apt install software-properties-common
!sudo dpkg --remove --force-remove-reinstreq python3-pip python3-setuptools python3-wheel
!apt-get install python3-pip

!git clone https: // github.com/jiaxinlarhh/ict3104-team13-2023.git

!git clone https: // github.com/open-mmlab/mmpose.git


In [None]:
# @title  Setup FollowYourPose & MMPose

# FollowYourPose
!cd / content/ict3104-team13-2023
!export PYTHONPATH = /content/ict3104-team13-2023: $PYTHONPATH
!python - m pip install - q - U - -pre triton
!apt update
!python - m pip install - q diffusers == 0.11.1 torch == 1.13.1 transformers == 4.26.0 bitsandbytes == 0.35.4 imageio-ffmpeg xformers == 0.0.16 - -extra-index-url https: // download.pytorch.org/whl/cu113

# MMPose
%cd / content/mmpose
!python3 - m pip install torch torchvision torchaudio - -index-url https: // download.pytorch.org/whl/cu118
# install MMEngine, MMCV and MMDetection using MIM
!python3 - m pip install - U openmim
!mim install mmengine
!mim install "mmcv>=2.0.0"
!mim install "mmdet>=3.0.0"


# 🕺🕺🕺 Data Exploration

In [None]:
#@title US T13-3 Input files subfolders

import os

main_folder = '/content/ict3104-team13-2023/data_folder'

if not os.path.exists(main_folder):
    os.mkdir(main_folder)

subfolders = ['stickman', 'others']

for subfolder in subfolders:
    subfolder_path = os.path.join(main_folder, subfolder)

    if not os.path.exists(subfolder_path):
        os.mkdir(subfolder_path)
        print(f"Created subfolder '{subfolder_path}'")

In [None]:
#@title  US T13-4 Load Video
import ipywidgets as widgets
import os, cv2
from os import listdir
from google.colab.patches import cv2_imshow
from IPython.display import  HTML
from base64 import b64encode

# variables
data_url = None
vid_directory = "./ict3104-team13-2023/videos"
vid_list = []

# store video names in list
for files in os.listdir(vid_directory):
  if files[0] != ".":
    vid_list.append(files)

# show vid name in list as dropdown
dropdown = widgets.Dropdown(options=vid_list, value=None)

# UI
button = widgets.Button(description="Enter")
output = widgets.Output()
display(dropdown, button, output)

# UI functions
def on_button_clicked(b):
    with output:
        mp4 = open(vid_directory+'/'+dropdown.value,'rb').read()
        data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
        if mp4 and data_url:
          display(HTML("""
                  <video controls>
                        <source src="%s" type="video/mp4">
                  </video>
                  """ % data_url))
        else:
          print("error opening vid file")


button.on_click(on_button_clicked)

# 🕺🕺🕺 Inference

Due to memory of GPU, we recommend set video_length=8 in ./config/pose_sample.yaml for running successfully. 

Meanwhile, we should keep the skeleton frame length(./followyourpose/pipelines/pipeline_followyourpose.py:422 ) equal with video_length

In [None]:
%cd /content/FollowYourPose
!pwd
!TORCH_DISTRIBUTED_DEBUG=DETAIL accelerate launch txt2video.py --config="configs/pose_sample.yaml"  --skeleton_path="./pose_example/vis_ikun_pose2.mov"

In [None]:
# @title US T13-7 Inference videos with captions

from moviepy.editor import VideoFileClip, clips_array
import os
from moviepy.editor import VideoFileClip
from PIL import Image, ImageSequence

# combine the inference video (stickman) with the video from charade into a GIF for display later
def combine_and_create_gif(mov_path, mp4_path, output_gif_path):
    # Loading the vid
    mov_video = VideoFileClip(mov_path)
    mp4_video = VideoFileClip(mp4_path)

    width1, height1 = mov_video.size
    width2, height2 = mp4_video.size

    min_width = min(width1, width2)
    min_height = min(height1, height2)

    # Resize both vid to have same width and height
    mov_video = mov_video.resize((min_width, min_height))
    mp4_video = mp4_video.resize((min_width, min_height))

    # Combine the vid side by side
    result = clips_array([[mov_video, mp4_video]])

    # Write as GIF for display
    result.write_gif(output_gif_path, fps=10)

In [None]:
# @title US T13-28 Select skeleton video and perform inference
import os
from ipywidgets import interact, widgets
from IPython.display import display, HTML, Image, clear_output
from functools import partial
import subprocess



# Declare file_dropdown as a global variable
file_dropdown = None
stickman_directory = '/content/ict3104-team13-2023/data_folder/stickman'
input_path = ""


def display_file_dropdown():
    # Define the directory path

    # Get a list of files in the directory
    files = os.listdir(stickman_directory)

    # Create a dropdown widget with the default value set to None
    global file_dropdown  # Declare as a global variable
    file_dropdown = widgets.Dropdown(
        options=[''] + files,
        description='Select File:',
        disabled=False,
        value=None,  # Set the default value to None
    )

    # Create an output widget to display the selected file
    output = widgets.Output()

    # Define a function to handle the dropdown selection
    def on_file_select(change):
        selected_file = change.new
        with output:
            clear_output()  # Clear the previous output in the output widget
            # Display the selected MP4 file
            if selected_file:
                print(f"Selected File: {selected_file}")
            else:
                print("No file selected")

    # Attach the function to the dropdown's change event
    file_dropdown.observe(on_file_select, names='value')

    # Display the dropdown and the output widget
    display(file_dropdown)
    display(output)

def on_button_click(b):
    # Perform an action when the button is clicked
    selected_file = file_dropdown.value
    if selected_file:
        global input_path
        input_path = os.path.join(stickman_directory, selected_file)
        !TORCH_DISTRIBUTED_DEBUG=DETAIL accelerate launch txt2video.py --config="configs/pose_sample.yaml"  --skeleton_path="{input_path}"



def user_input():
    # Call the function to display the dropdown
    display_file_dropdown()

    # Create a button widget
    button = widgets.Button(description="Confirm")
    button.on_click(on_button_click)

    # Display the button
    display(button)

# Call the user_input function
user_input()

In [None]:
from IPython.display import HTML, display, Image
import yaml

def display_inference_result(result_path, caption):
    # Clear the output
    clear_output()

    # Check if the GIF file exists in the specified directory
    if os.path.isfile(result_path):
        # Create an HTML div element for centering
        center_div = HTML('<div style="display: flex; justify-content: center;">')

        # Display the GIF and the caption inside the centered div
        display(center_div)
        display(Image(filename=result_path))
        display(HTML(f'<p style="text-align:center; font-size:16px;">{caption}</p>'))
    else:
        print("GIF file not found in the specified directory.")

# Retrieve the save_path from the file
def get_inference_result():
  with open("/content/ict3104-team13-2023/checkpoints/inference/save_path.txt", "r") as f:
    save_path = f.read()
  combine_and_create_gif(input_path, save_path)

def get_prompt_from_yaml():
  # Load the configuration from the YAML file
  config_path = "/content/ict3104-team13-2023/configs/pose_sample.yaml"  
  with open(config_path, "r") as config_file:
      config = yaml.load(config_file, Loader=yaml.FullLoader)

  # Get the prompt string from the configuration
  prompts = config["validation_data"]["prompts"]

  # Assuming there's only one prompt in the list, you can access it as follows
  prompt = prompts[0]

  # Print the prompt string
  return prompt


result_path = "/content/ict3104-team13-2023/data_folder/inference_result/output.gif"

get_inference_result()
prompt = get_prompt_from_yaml()
display_inference_result(result_path, prompt)

# 🕺🕺🕺 Training

In [None]:
#@title  User Story T13-8: Select subfolder from dataset folder
import ipywidgets as widgets
import os, cv2
from os import listdir
from ipywidgets import Dropdown, interact

data_directory = "./dataset_folder"
directory_dict = {}

# get subfolders as key and list of files as value in dict
for root, subfolders, files in os.walk(data_directory):
    # Skip the root directory itself
    if root == data_directory:
        continue

    subfolder_name = os.path.relpath(root, data_directory)

    if ".ipynb" in subfolder_name:
      continue
    # Create a list of file names in the subfolder
    file_names = [file for file in files]
    # Add the subfolder and its file names to the dictionary
    directory_dict[subfolder_name] = file_names
#print(directory_dict)


# dropdown UI
subfolder_choices = Dropdown(options =directory_dict.keys())
subfolder_files = Dropdown()
button = widgets.Button(description="Select dataset")


@interact(subfolder = subfolder_choices, dataset = subfolder_files)
def print_city(subfolder, dataset):
    subfolder_files.options = directory_dict[subfolder]

# UI
display(button)


In [None]:
#@title  User Story T13-6
import ipywidgets as widgets
import os, cv2
import subprocess
import yaml
from os import listdir
from google.colab.patches import cv2_imshow
from IPython.display import HTML, clear_output
from base64 import b64encode


# Define and Instantiate variables
charades_data_url = None
charades_video_path = "./charades"
charades_video_list = []
chosen_charades_video = None

# Store names of charades video in a list
for file in os.listdir(charades_video_path):
  charades_video_list.append(file)

# Show input to accept user prompt
prompt_input = widgets.Text(
    value='',  # Initial value
    placeholder='Enter prompt...',  # Placeholder text
    description='Prompt Input: ',  # Label for the input
)

# Add names of charades video as dropdown options
charades_videos_dropdown = widgets.Dropdown(options=charades_video_list, value=None)

# UI to show after running this cell
choose_charades_video_button = widgets.Button(description="Choose Video")
chosen_video_output = widgets.Output()

# Display all UI
display(prompt_input, charades_videos_dropdown, choose_charades_video_button, chosen_video_output)

# method to add user's prompt into pose_sample.yaml
def insert_prompt_input_into_config(prompt):

  # Load the YAML file
  with open('./configs/pose_sample.yaml', 'r') as file:
      config = yaml.safe_load(file)

  # Access the 'prompts' section
  config['validation_data']['prompts'] = [prompt]


  # Save the modified configuration back to the file
  with open('./configs/pose_sample.yaml', 'w') as file:
      yaml.dump(config, file, default_flow_style=False)

def generate_gif():
  # Change directory to /content/ict3104-team13-2023
  os.chdir('/content/ict3104-team13-2023')

  # Print the current working directory
  print(os.getcwd())

  # Set the TORCH_DISTRIBUTED_DEBUG environment variable and launch txt2video.py
  subprocess.run(['accelerate', 'launch', 'txt2video.py', '--config=configs/pose_sample.yaml', '--skeleton_path=./pose_example/vis_ikun_pose2.mov'])

#
def set_charades_video_variables(charades_video_name):
  if charades_video_name is not None:
    # print("Have something")
    pass
  with chosen_video_output:
        charades_mp4 = open(charades_video_path +'/'+ charades_video_name,'rb').read()
        charades_data_url = "data:video/mp4;base64," + b64encode(charades_mp4).decode()
        if charades_mp4 and charades_data_url:
          video_html = f'<video controls><source src="{charades_data_url}" type="video/mp4"></video>'
          # Clear previous output
          clear_output()
          display(HTML(video_html))
        else:
          print("Cannot open chosen charades video")

# OnClick function for 'Choose Video' button
def on_choose_charades_video_button_clicked(b):
    pass

# OnChange function for dropdown
def on_charades_videos_dropdown_change(change):
    if change['name'] == 'value' and change['new']:
        chosen_charades_video = change['new']
        selected_option = change['new']
        # print(f"Selected option: {selected_option}")
        set_charades_video_variables(selected_option)

# On Prompt Input 'enter' key press
def on_prompt_input_enter_pressed(change):
      # print("Enter pressed with text:", prompt_input.value)
      insert_prompt_input_into_config(prompt_input.value)
      generate_gif()

# Attach event functions to UI
prompt_input.on_submit(on_prompt_input_enter_pressed)
charades_videos_dropdown.observe(on_charades_videos_dropdown_change, names='value')
choose_charades_video_button.on_click(on_choose_charades_video_button_clicked)

In [None]:
#@title  User Story T13-10
import ipywidgets as widgets
from IPython.display import display

model_name_input = widgets.Text(
    placeholder='Enter Model Name',
    description='Model Name:',
)

enter_button = widgets.Button(
    description='Enter',
    button_style='primary', 
)

# add to the model when its out
def handle_enter_button_click(b):
    model_name = model_name_input.value
    print(f'Done')
    
enter_button.on_click(handle_enter_button_click)
display(model_name_input, enter_button)

# 🕺🕺🕺 Testing

# 🕺🕺🕺 MMPose

In [None]:
#@title Environment Setup
!update-alternatives --install /usr/local/bin/python3 python3 /usr/bin/python3.8 2
!update-alternatives --install /usr/local/bin/python3 python3 /usr/bin/python3.9 1
!python --version
!apt-get update
!apt install software-properties-common
!sudo dpkg --remove --force-remove-reinstreq python3-pip python3-setuptools python3-wheel
!apt-get install python3-pip

%cd /content

# forked michael's mmpose because project needed to change some of the mmpose code
!git clone https://github.com/micdiary/mmpose.git

#MMPose
%cd /content/mmpose
!python3 -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
# install MMEngine, MMCV and MMDetection using MIM
!python3 -m pip install -U openmim
!mim install mmengine
!mim install "mmcv>=2.0.0"
!mim install "mmdet>=3.0.0"

!python3 -m pip install -r requirements.txt
!python3 -m pip install -v -e .

!python3 -m pip install setuptools==68.1.0

In [None]:
#@title Check MMpose

%cd /content/mmpose/

# Check Pytorch installation
import torch, torchvision

print('torch version:', torch.__version__, torch.cuda.is_available())
print('torchvision version:', torchvision.__version__)

# Check MMPose installation
import mmpose

print('mmpose version:', mmpose.__version__)

# Check mmcv installation
from mmcv.ops import get_compiling_cuda_version, get_compiler_version

print('cuda version:', get_compiling_cuda_version())
print('compiler information:', get_compiler_version())

In [None]:
#@title Inference with MMPOSE
# run inference on ALL videos in the charades folder
import os

%cd /content/mmpose

charades_video_path = "/content/ict3104-team13-2023/charades/"

# List all the MP4 files in the specified directory
mp4_files = [f for f in os.listdir(charades_video_path) if f.endswith('.mp4')]


# Iterate through the MP4 files and run the script for each one
for mp4_file in mp4_files:
    input_path = os.path.join(charades_video_path, mp4_file)
    output_folder = f"/content/ict3104-team13-2023/data_folder/stickman/"

    !python demo/topdown_demo_with_mmdet.py \
    demo/mmdetection_cfg/faster_rcnn_r50_fpn_coco.py \
    https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth \
    configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-256x192.py \
    https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_256x192-c78dce93_20200708.pth \
    --input {input_path} \
    --output-root {output_folder}

In [None]:
#@title Inference with MMPOSE (1 video)
# run inference on 1 video
import os

%cd /content/mmpose

charades_video_path = "/content/ict3104-team13-2023/charades/52CKM.mp4"

output_folder = f"/content/ict3104-team13-2023/data_folder/stickman3/"

!python demo/topdown_demo_with_mmdet.py \
demo/mmdetection_cfg/faster_rcnn_r50_fpn_1class.py \
https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth \
configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-256x192.py \
https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_256x192-c78dce93_20200708.pth \
--input {charades_video_path} \
--output-root {output_folder}

After conducting spam testing, we discovered that the first library we tested was superior to the other three libraries due to their outdated versions. Therefore, we will revert to using the first library, as it provides the desired skeleton result.

In [None]:
#@title  US T13-30

import os
import tempfile
from base64 import b64encode
import ipywidgets as widgets
from moviepy.editor import VideoFileClip
from IPython.display import HTML, display, clear_output

stickman_directory = '/content/ict3104-team13-2023/data_folder/stickman'

output_directory = tempfile.mkdtemp()
converted_videos = []

for video_file in os.listdir(stickman_directory):
    if video_file.endswith('.mp4'):
        video_path = os.path.join(stickman_directory, video_file)
        output_path = os.path.join(output_directory, os.path.splitext(video_file)[0] + '_h264.mp4')

        clip = VideoFileClip(video_path)
        clip.write_videofile(output_path, codec='libx264', logger=None)

        converted_videos.append(output_path)
        print(f"Done converting {video_file}")

stickman_files = [f for f in os.listdir(output_directory) if f.endswith('_h264.mp4')]

stickman_dropdown = widgets.Dropdown(
    options=stickman_files,
    description='Select Video:'
)

output = widgets.Output()

def display_selected_video(change):
    with output:
      clear_output()
      selected_video = change.new
      video_path = os.path.join(output_directory, selected_video)

      with open(video_path, 'rb') as f:
          data = f.read()
          data_url = "data:video/mp4;base64," + b64encode(data).decode()
          display(HTML(f"""
              <video controls autoplay>
                  <source src="{data_url}" type="video/mp4">
              </video>
          """))

stickman_dropdown.observe(display_selected_video, names='value')
display(stickman_dropdown)
display(output)