<a href="https://colab.research.google.com/github/aio25-mix002/m07-p7.1/blob/ngocdung%2Fmake-inference-n-submit/make-inference-n-submit/notebooks/runbook.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Setup

In [1]:
import os
import shutil
from datetime import datetime

## Configuration

In [2]:
WORKING_DIR = "/kaggle/working/"
CODE_DIR = "/kaggle/temp/src"
GDRIVE_DIR = "/content/drive/MyDrive/AIO25-MIX002/Projects/AIO2025_Conquer_CONQ008_M7_Project/"

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Download code

In [4]:
BRANCH = "ngocdung/make-inference-n-submit"

In [5]:
!ls /kaggle/working/data

ls: cannot access '/kaggle/working/data': No such file or directory


In [6]:
# If directory "src" not exist then clone a new one
!pwd
![ -d "{CODE_DIR}" ] || git clone --depth 1  --branch "{BRANCH}" "https://github.com/aio25-mix002/m07-p7.1" "{CODE_DIR}"


/content
Cloning into '/kaggle/temp/src'...
remote: Enumerating objects: 16, done.[K
remote: Counting objects: 100% (16/16), done.[K
remote: Compressing objects: 100% (15/15), done.[K
remote: Total 16 (delta 0), reused 12 (delta 0), pack-reused 0 (from 0)[K
Receiving objects: 100% (16/16), 16.00 KiB | 16.00 MiB/s, done.


## Fetch the latest code

In [7]:
# Go to CODE_DIR, Fetch the latest code
%cd {CODE_DIR}
!git clean -fdx
!git status
!git pull
!pwd


/kaggle/temp/src
On branch ngocdung/make-inference-n-submit
Your branch is up to date with 'origin/ngocdung/make-inference-n-submit'.

nothing to commit, working tree clean
Already up to date.
/kaggle/temp/src


# Data Preparation

### Option 1: Using Kaggle API Credentials (Only if data in Google Drive not available)

First, ensure you have downloaded your `kaggle.json` API token from your Kaggle account. Once downloaded, upload it to your Colab session. You can do this via the 'Files' tab on the left sidebar.

After uploading, we will move it to the correct directory (`~/.kaggle/`) and set the necessary permissions.

In [8]:
import os

# Create the .kaggle directory if it doesn't exist
!mkdir -p ~/.kaggle

# Move the uploaded kaggle.json file to the .kaggle directory
# Assuming kaggle.json is in the current working directory after upload
# If you uploaded it to a different path, please adjust '/content/kaggle.json'
!mv /content/kaggle.json ~/.kaggle/kaggle.json

# Set permissions for the kaggle.json file
!chmod 600 ~/.kaggle/kaggle.json

print('Kaggle API credentials set up successfully!')
!ls -la ~/.kaggle/kaggle.json

Kaggle API credentials set up successfully!
-rw------- 1 root root 67 Jan  8 07:03 /root/.kaggle/kaggle.json


In [9]:
# !python download_data.py
!kaggle competitions download -c action-video
!unzip action-video.zip  -d {WORKING_DIR}

Downloading action-video.zip to /kaggle/temp/src
 99% 3.10G/3.14G [00:06<00:00, 471MB/s]
100% 3.14G/3.14G [00:06<00:00, 508MB/s]


### (Unused due to synchronizing problem) Option 2: Using data saved in Google Drive


In [8]:
# import os
# import shutil

# source_gdrive_data_path = os.path.join(GDRIVE_DIR, 'Data')
# destination_working_data_path = os.path.join(WORKING_DIR, 'data')

# print(f"Attempting to copy data from Google Drive: {source_gdrive_data_path}")
# print(f"To working directory: {destination_working_data_path}")

# try:
#     # Create the destination directory if it doesn't exist. If it exists, remove it first to avoid errors.
#     if os.path.exists(destination_working_data_path):
#         print(f"Destination directory {destination_working_data_path} already exists. Removing before copy...")
#         shutil.rmtree(destination_working_data_path)

#     shutil.copytree(source_gdrive_data_path, destination_working_data_path)
#     print(f"Successfully copied '{source_gdrive_data_path}' to '{destination_working_data_path}'")
# except FileNotFoundError:
#     print(f"Error: Source data directory not found in Google Drive at {source_gdrive_data_path}")
# except Exception as e:
#     print(f"An error occurred while copying the data from Google Drive: {e}")

Attempting to copy data from Google Drive: /content/drive/MyDrive/AIO25-MIX002/Projects/AIO2025_Conquer_CONQ008_M7_Project/Data
To working directory: /kaggle/working/data
Successfully copied '/content/drive/MyDrive/AIO25-MIX002/Projects/AIO2025_Conquer_CONQ008_M7_Project/Data' to '/kaggle/working/data'


# Train

In [None]:
!python train.py

Save the best checkpoint to Google Drive

In [15]:
import os
import shutil
from datetime import datetime

# Define the brief note for the filename
NOTE = "vanilla" # !!! Should edit every new run
SAVE_PATH = f"{GDRIVE_DIR}Artifacts/Checkpoints"


In [16]:

# Define the source path of the best model checkpoint
source_checkpoint_path = os.path.join(CODE_DIR, 'checkpoints', 'best_model.pth')

# Define your Google Drive destination folder path
# IMPORTANT: Please replace 'YOUR_GOOGLE_DRIVE_FOLDER_PATH' with the actual path to your folder in Google Drive.
# For example, it might be '/content/drive/MyDrive/MyProjectCheckpoints/'

# Ensure the Google Drive folder exists
if not os.path.exists(SAVE_PATH):
    os.makedirs(SAVE_PATH)
    print(f"Created Google Drive folder: {SAVE_PATH}")

# Generate a timestamp for the filename
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

# Construct the new filename for the checkpoint
new_checkpoint_filename = f"model_{timestamp}_{NOTE}.pth"
destination_checkpoint_path = os.path.join(SAVE_PATH, new_checkpoint_filename)

# Copy the checkpoint
try:
    shutil.copy(source_checkpoint_path, destination_checkpoint_path)
    print(f"Checkpoint successfully saved to: {destination_checkpoint_path}")
except FileNotFoundError:
    print(f"Error: Source checkpoint not found at {source_checkpoint_path}")
except Exception as e:
    print(f"An error occurred while saving the checkpoint: {e}")


Error: Source checkpoint not found at /kaggle/temp/src/checkpoints/best_model.pth


Reload the checkpoint (if needed)

In [23]:
import torch
import os

# # Define the path to the checkpoint file
destination_checkpoint_path = f'{GDRIVE_DIR}Artifacts/Checkpoints/model_20260107_081244_vanilla.pth'

# Check if the checkpoint file exists
if os.path.exists(destination_checkpoint_path):
    # Load the checkpoint
    loaded_checkpoint = torch.load(destination_checkpoint_path, map_location=torch.device('cpu')) # Use 'cuda' if you want to load to GPU
    print(f"Checkpoint loaded successfully from: {destination_checkpoint_path}")
    print("Keys in the loaded checkpoint:", loaded_checkpoint.keys())

    # Example of how you might load it into a model (assuming 'model' is defined)
    # model.load_state_dict(loaded_checkpoint['model_state_dict'])
    # optimizer.load_state_dict(loaded_checkpoint['optimizer_state_dict'])
    # epoch = loaded_checkpoint['epoch']
    # loss = loaded_checkpoint['loss']
else:
    print(f"Error: Checkpoint not found at {destination_checkpoint_path}")


Checkpoint loaded successfully from: /content/drive/MyDrive/AIO25-MIX002/Projects/AIO2025_Conquer_CONQ008_M7_Project/Artifacts/Checkpoints/model_20260107_081244_vanilla.pth
Keys in the loaded checkpoint: odict_keys(['_orig_mod.smif.alpha', '_orig_mod.smif.conv_fuse.weight', '_orig_mod.smif.conv_fuse.bias', '_orig_mod.backbone.cls_token', '_orig_mod.backbone.pos_embed', '_orig_mod.backbone.patch_embed.proj.weight', '_orig_mod.backbone.patch_embed.proj.bias', '_orig_mod.backbone.blocks.0.norm1.weight', '_orig_mod.backbone.blocks.0.norm1.bias', '_orig_mod.backbone.blocks.0.attn.qkv.weight', '_orig_mod.backbone.blocks.0.attn.qkv.bias', '_orig_mod.backbone.blocks.0.attn.proj.weight', '_orig_mod.backbone.blocks.0.attn.proj.bias', '_orig_mod.backbone.blocks.0.norm2.weight', '_orig_mod.backbone.blocks.0.norm2.bias', '_orig_mod.backbone.blocks.0.mlp.fc1.weight', '_orig_mod.backbone.blocks.0.mlp.fc1.bias', '_orig_mod.backbone.blocks.0.mlp.fc2.weight', '_orig_mod.backbone.blocks.0.mlp.fc2.bias', 

# Submission

Inference of Test set

In [None]:
!python inference.py --checkpoint {destination_checkpoint_path}

Make submission file and submit to Kaggle