# Set the Working Directory

In [None]:
import os

working_dir = os.getcwd()

if os.path.isdir(working_dir):
    print("Working directory is ready!")
else:
    raise ValueError("Working directory does not exist")

# Create the `models` Directory

In [None]:
os.makedirs(working_dir + "/models", exist_ok=True)

# Clone the Repositories of [`Video-LLaMA`](https://github.com/DAMO-NLP-SG/Video-LLaMA/tree/main)

This step might take around 1 hour — hang tight! ⏲😴

In [None]:
# Clone the code repository of Video-LLaMA
%cd {working_dir}/models
!git clone https://github.com/DAMO-NLP-SG/Video-LLaMA.git

%cd {working_dir}/models/Video-LLaMA
!git checkout 314f7dce044afa2bf32bce3b3729712861f058ac

# Clone the model repositories of Video-LLaMA
!git lfs install

# Model 1 : DAMO-NLP-SG/Video-LLaMA-2-7B-Finetuned
# https://huggingface.co/DAMO-NLP-SG/Video-LLaMA-2-7B-Finetuned/tree/main
!GIT_TRACE=1 GIT_CURL_VERBOSE=1 git clone https://huggingface.co/DAMO-NLP-SG/Video-LLaMA-2-7B-Finetuned

# Model 2 : DAMO-NLP-SG/Video-LLaMA-2-13B-Finetuned
# https://huggingface.co/DAMO-NLP-SG/Video-LLaMA-2-13B-Finetuned/tree/main
!GIT_TRACE=1 GIT_CURL_VERBOSE=1 git clone https://huggingface.co/DAMO-NLP-SG/Video-LLaMA-2-13B-Finetuned

!ls

In [None]:
import yaml

config_path = working_dir + "/models/Video-LLaMA/eval_configs"

with open(config_path + "/video_llama_eval_withaudio.yaml", "r") as file:
    content = file.read()

content = content.replace(
    'llama_model: "ckpt/vicuna-13b/" or "ckpt/vicuna-7b/" or "ckpt/llama-2-7b-chat-hf"  or "ckpt/llama-2-13b-chat-hf"',
    'llama_model: "ckpt/llama-2-7b-chat-hf"'
)

config = yaml.safe_load(content)

# Create the YAML for Video-LLaMA-2-7B-Finetuned
config_7b = config.copy()
config_7b['model']['llama_model'] = "Video-LLaMA-2-7B-Finetuned/llama-2-7b-chat-hf/"
config_7b['model']['imagebind_ckpt_path'] = "Video-LLaMA-2-7B-Finetuned/"
config_7b['model']['ckpt'] = "Video-LLaMA-2-7B-Finetuned/VL_LLaMA_2_7B_Finetuned.pth"
config_7b['model']['ckpt_2'] = "Video-LLaMA-2-7B-Finetuned/AL_LLaMA_2_7B_Finetuned.pth"

with open(config_path + "/video_llama_eval_withaudio_7b.yaml", "w") as file:
    yaml.dump(config_7b, file)

# Create the YAML for Video-LLaMA-2-13B-Finetuned
config_13b = config.copy()
config_13b['model']['llama_model'] = "Video-LLaMA-2-13B-Finetuned/llama-2-13b-chat-hf/"
config_13b['model']['imagebind_ckpt_path'] = "Video-LLaMA-2-13B-Finetuned/"
config_13b['model']['ckpt'] = "Video-LLaMA-2-13B-Finetuned/VL_LLaMA_2_13B_Finetuned.pth"
config_13b['model']['ckpt_2'] = "Video-LLaMA-2-13B-Finetuned/AL_LLaMA_2_13B_Finetuned.pth"

with open(config_path + "/video_llama_eval_withaudio_13b.yaml", "w") as file:
    yaml.dump(config_13b, file)

# Clone the Repositories of [`ImageBind`](https://github.com/facebookresearch/ImageBind)

In [None]:
# Clone the code repository of ImageBind
%cd {working_dir}/models
!git clone https://github.com/facebookresearch/ImageBind.git

%cd {working_dir}/models/ImageBind
!git checkout c6a47d6dc2b53eced51d398c181d57049ca59286