# Setup

In [1]:
import os
import shutil
from datetime import datetime

## Configuration

In [2]:
WORKING_DIR = "/kaggle/working/"
CODE_DIR = "/kaggle/temp/src"
GDRIVE_DIR = "/content/drive/MyDrive/AIO25-MIX002/Projects/AIO2025_Conquer_CONQ008_M7_Project/"

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Download code

In [4]:
BRANCH = "ngocdung/make-inference-n-submit"

In [5]:
# If directory "src" not exist then clone a new one
!pwd
![ -d "{CODE_DIR}" ] || git clone --depth 1  --branch "{BRANCH}" "https://github.com/aio25-mix002/m07-p7.1" "{CODE_DIR}"


/content
Cloning into '/kaggle/temp/src'...
remote: Enumerating objects: 16, done.[K
remote: Counting objects: 100% (16/16), done.[K
remote: Compressing objects: 100% (15/15), done.[K
remote: Total 16 (delta 0), reused 10 (delta 0), pack-reused 0 (from 0)[K
Receiving objects: 100% (16/16), 23.40 KiB | 23.40 MiB/s, done.


## Fetch the latest code

In [38]:
# Go to CODE_DIR, Fetch the latest code
%cd {CODE_DIR}
!git clean -fdx
!git status
!git pull
!pwd


/kaggle/temp/src
Removing action-video.zip
Removing checkpoints/
Removing src/__pycache__/
Removing submission.csv
On branch ngocdung/make-inference-n-submit
Your branch is behind 'origin/ngocdung/make-inference-n-submit' by 1 commit, and can be fast-forwarded.
  (use "git pull" to update your local branch)

Changes not staged for commit:
  (use "git add <file>..." to update what will be committed)
  (use "git restore <file>..." to discard changes in working directory)
	[31mmodified:   inference.py[m
	[31mmodified:   train.py[m

no changes added to commit (use "git add" and/or "git commit -a")
Updating de0d5d4..6ec249c
error: Your local changes to the following files would be overwritten by merge:
	train.py
Please commit your changes or stash them before you merge.
Aborting
/kaggle/temp/src


# Data Preparation

### Option 1: Using Kaggle API Credentials (Only if data in Google Drive not available)

First, ensure you have downloaded your `kaggle.json` API token from your Kaggle account. Once downloaded, upload it to your Colab session. You can do this via the 'Files' tab on the left sidebar.

After uploading, we will move it to the correct directory (`~/.kaggle/`) and set the necessary permissions.

In [6]:
import os

# Create the .kaggle directory if it doesn't exist
!mkdir -p ~/.kaggle

# Move the uploaded kaggle.json file to the .kaggle directory
# Assuming kaggle.json is in the current working directory after upload
# If you uploaded it to a different path, please adjust '/content/kaggle.json'
!mv /content/kaggle.json ~/.kaggle/kaggle.json

# Set permissions for the kaggle.json file
!chmod 600 ~/.kaggle/kaggle.json

print('Kaggle API credentials set up successfully!')
!ls -la ~/.kaggle/kaggle.json

Kaggle API credentials set up successfully!
-rw------- 1 root root 67 Jan  9 05:20 /root/.kaggle/kaggle.json


In [7]:
# !python download_data.py
!kaggle competitions download -c action-video
!unzip -q action-video.zip  -d {WORKING_DIR}

Downloading action-video.zip to /kaggle/temp/src
100% 3.13G/3.14G [00:11<00:00, 262MB/s]
100% 3.14G/3.14G [00:11<00:00, 286MB/s]
replace /kaggle/working/data/data_train/brush_hair/April_09_brush_hair_u_nm_np1_ba_goo_1/10000.jpg? [y]es, [n]o, [A]ll, [N]one, [r]ename: 

### (Unused due to synchronizing problem) Option 2: Using data saved in Google Drive


In [10]:
# import os
# import shutil

# source_gdrive_data_path = os.path.join(GDRIVE_DIR, 'Data')
# destination_working_data_path = os.path.join(WORKING_DIR, 'data')

# print(f"Attempting to copy data from Google Drive: {source_gdrive_data_path}")
# print(f"To working directory: {destination_working_data_path}")

# try:
#     # Create the destination directory if it doesn't exist. If it exists, remove it first to avoid errors.
#     if os.path.exists(destination_working_data_path):
#         print(f"Destination directory {destination_working_data_path} already exists. Removing before copy...")
#         shutil.rmtree(destination_working_data_path)

#     shutil.copytree(source_gdrive_data_path, destination_working_data_path)
#     print(f"Successfully copied '{source_gdrive_data_path}' to '{destination_working_data_path}'")
# except FileNotFoundError:
#     print(f"Error: Source data directory not found in Google Drive at {source_gdrive_data_path}")
# except Exception as e:
#     print(f"An error occurred while copying the data from Google Drive: {e}")

# Train

### Training

In [16]:
# Training - can change number of epochs
!python train.py # --epochs 1

Using device: cuda
Initializing datasets...
Loaded pretrained weights. Missing: 132, Unexpected: 0

Training Configuration:
  Epochs: 1
  Batch size: 4
  Learning rate: 0.0001
  Num frames: 16
  Frame stride: 2
  Val ratio: 0.1
  Checkpoint dir: ./checkpoints


Epoch 1/1
Train Loss: 3.7096 | Acc: 0.0844
Val Loss: 3.4936   | Acc: 0.1134
New best model saved! (0.1134)

Training complete! Best validation accuracy: 0.1134


### Save the best checkpoint to Google Drive

In [17]:
import os
import shutil
from datetime import datetime

# Define the brief note for the filename
NOTE = "vanilla" # !!! Should edit every new run
SAVE_PATH = f"{GDRIVE_DIR}Artifacts/Checkpoints"


In [18]:
# Define the source path of the best model checkpoint
source_checkpoint_path = os.path.join(CODE_DIR, 'checkpoints', 'best_model.pth')

# Define your Google Drive destination folder path
# IMPORTANT: Please replace 'YOUR_GOOGLE_DRIVE_FOLDER_PATH' with the actual path to your folder in Google Drive.
# For example, it might be '/content/drive/MyDrive/MyProjectCheckpoints/'

# Ensure the Google Drive folder exists
if not os.path.exists(SAVE_PATH):
    os.makedirs(SAVE_PATH)
    print(f"Created Google Drive folder: {SAVE_PATH}")

# Generate a timestamp for the filename
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

# Construct the new filename for the checkpoint
new_checkpoint_filename = f"model_{timestamp}_{NOTE}.pth"
destination_checkpoint_path = os.path.join(SAVE_PATH, new_checkpoint_filename)

# Copy the checkpoint
try:
    shutil.copy(source_checkpoint_path, destination_checkpoint_path)
    print(f"Checkpoint successfully saved to: {destination_checkpoint_path}")
except FileNotFoundError:
    print(f"Error: Source checkpoint not found at {source_checkpoint_path}")
except Exception as e:
    print(f"An error occurred while saving the checkpoint: {e}")


Checkpoint successfully saved to: /content/drive/MyDrive/AIO25-MIX002/Projects/AIO2025_Conquer_CONQ008_M7_Project/Artifacts/Checkpoints/model_20260109_054453_vanilla.pth


### Reload the checkpoint (if needed)

In [19]:
import torch
import os

# # Define the path to the checkpoint file
destination_checkpoint_path = source_checkpoint_path #f'{GDRIVE_DIR}Artifacts/Checkpoints/model_20260107_081244_vanilla.pth'

# Check if the checkpoint file exists
if os.path.exists(destination_checkpoint_path):
    # Load the checkpoint
    loaded_checkpoint = torch.load(destination_checkpoint_path, map_location=torch.device('cpu')) # Use 'cuda' if you want to load to GPU
    print(f"Checkpoint loaded successfully from: {destination_checkpoint_path}")
    print("Keys in the loaded checkpoint:", loaded_checkpoint.keys())

    # Example of how you might load it into a model (assuming 'model' is defined)
    # model.load_state_dict(loaded_checkpoint['model_state_dict'])
    # optimizer.load_state_dict(loaded_checkpoint['optimizer_state_dict'])
    # epoch = loaded_checkpoint['epoch']
    # loss = loaded_checkpoint['loss']
else:
    print(f"Error: Checkpoint not found at {destination_checkpoint_path}")


Checkpoint loaded successfully from: /kaggle/temp/src/checkpoints/best_model.pth
Keys in the loaded checkpoint: odict_keys(['smif.alpha', 'smif.conv_fuse.weight', 'smif.conv_fuse.bias', 'backbone.cls_token', 'backbone.pos_embed', 'backbone.patch_embed.proj.weight', 'backbone.patch_embed.proj.bias', 'backbone.blocks.0.norm1.weight', 'backbone.blocks.0.norm1.bias', 'backbone.blocks.0.attn.qkv.weight', 'backbone.blocks.0.attn.qkv.bias', 'backbone.blocks.0.attn.proj.weight', 'backbone.blocks.0.attn.proj.bias', 'backbone.blocks.0.norm2.weight', 'backbone.blocks.0.norm2.bias', 'backbone.blocks.0.mlp.fc1.weight', 'backbone.blocks.0.mlp.fc1.bias', 'backbone.blocks.0.mlp.fc2.weight', 'backbone.blocks.0.mlp.fc2.bias', 'backbone.blocks.0.lmim.delta', 'backbone.blocks.0.lmim.reduce.weight', 'backbone.blocks.0.lmim.reduce.bias', 'backbone.blocks.0.lmim.expand.weight', 'backbone.blocks.0.lmim.expand.bias', 'backbone.blocks.0.lmim.temporal_mlp.0.weight', 'backbone.blocks.0.lmim.temporal_mlp.0.bias', 

# Submission

### Make submission file

In [32]:

!python inference.py --checkpoint {destination_checkpoint_path} \
    --data_root {WORKING_DIR}data/test




Using device: cuda
INFERENCE ON TEST SET
Loading checkpoint from /kaggle/temp/src/checkpoints/best_model.pth...
Model loaded

Loading test dataset...
Test samples: 510

Running inference...
Processed 160/510 samples
Processed 320/510 samples
Processed 480/510 samples

Inference complete! Processed 510 videos

âœ“ Submission file created at: /kaggle/working/submission.csv

Submission saved to: /kaggle/working/submission.csv


### Submit to Kaggle

In [35]:
# !Must specify message
MESSAGE = "Testing runbook.ipynb again"

In [37]:
!kaggle competitions submit -c action-video -f /kaggle/working/submission.csv -m "{MESSAGE}"

100% 3.15k/3.15k [00:00<00:00, 3.90kB/s]
Successfully submitted to AIO-2025: Video Action Classification Challenge