In [None]:
!pip3 install boto3 huggingface_hub tqdm --upgrade

In [None]:
import boto3
import tarfile
import os
import sys
from huggingface_hub import HfApi, HfFolder, upload_folder
from tqdm import tqdm  # For the progress bar

def main():
    # Initialize S3 client
    s3 = boto3.client('s3')

    # S3 bucket and key
    bucket_name = 'llama-training-s3'
    s3_key = 'llama-training-s3/model/pytorch-training-2025-01-20-06-15-03-149/output/model.tar.gz'

    # Use the home directory for storage
    home_dir = os.path.expanduser('~')
    print("Home directory is:", home_dir)

    # Define local paths using the home directory
    local_model_tar = os.path.join(home_dir, 'model.tar.gz')
    model_dir = os.path.join(home_dir, 'llama31-testloss-pcliner')

    # Get the size of the file to download
    try:
        response = s3.head_object(Bucket=bucket_name, Key=s3_key)
        total_length = response.get('ContentLength')
    except Exception as e:
        print(f"Error retrieving object metadata from S3: {e}")
        sys.exit(1)

    # Download model.tar.gz from S3 with progress bar
    print("Downloading model from S3...")
    try:
        with tqdm(total=total_length, unit='B', unit_scale=True, desc='Downloading model.tar.gz') as pbar:
            def progress_hook(bytes_amount):
                pbar.update(bytes_amount)

            s3.download_file(
                Bucket=bucket_name,
                Key=s3_key,
                Filename=local_model_tar,
                Callback=progress_hook
            )
        print(f"Downloaded model.tar.gz to {local_model_tar}")
    except Exception as e:
        print(f"Error downloading file from S3: {e}")
        sys.exit(1)

    # Extract the tar.gz file
    os.makedirs(model_dir, exist_ok=True)

    print("Extracting model files...")
    try:
        with tarfile.open(local_model_tar, 'r:gz') as tar:
            tar.extractall(path=model_dir)
        print(f"Extracted model to {model_dir}")
    except Exception as e:
        print(f"Error extracting model files: {e}")
        sys.exit(1)

    # Set up Hugging Face API
    # Set up Hugging Face API
    hf_token = "WRITE_YOUR_HF_TOKEN"  # Replace with your actual token
    HfFolder.save_token(hf_token)

    username = "javiagu"  # Replace with your Hugging Face username
    model_name = "gemma2_pCliNER_bf16"  # Replace with your desired model name
    model_repo_id = f"{username}/{model_name}"

    api = HfApi()

    # Create the repository (if it doesn't exist)
    try:
        api.create_repo(
            repo_id=model_repo_id,
            repo_type="model",
            exist_ok=True,
            token=hf_token
        )
        print(f"Repository {model_repo_id} is ready.")
    except Exception as e:
        print(f"Error creating repository on Hugging Face Hub: {e}")
        sys.exit(1)

    # Upload the model files
    print("Uploading model to Hugging Face Hub...")
    try:
        upload_folder(
            folder_path=model_dir,
            repo_id=model_repo_id,
            repo_type="model",
            token=hf_token,
            ignore_patterns=["*.ipynb_checkpoints", "*.lock"],
        )
        print("Model uploaded successfully!")
    except Exception as e:
        print(f"Error uploading model to Hugging Face Hub: {e}")
        sys.exit(1)


if __name__ == "__main__":
    main()


Home directory is: C:\Users\javia
Downloading model from S3...


Downloading model.tar.gz:  59%|█████████████████████████████▉                     | 8.65G/14.7G [37:13<16:44, 6.06MB/s]

In [None]:
!explorer .

CONTENT CHECK

In [None]:
import os

def list_extracted_files(directory):
    """
    List all files and their sizes in a given directory.
    """
    print(f"{'File/Folder':<80} {'Size (Bytes)':>15}")
    print("-" * 95)

    for root, dirs, files in os.walk(directory):
        for name in files:
            file_path = os.path.join(root, name)
            file_size = os.path.getsize(file_path)
            print(f"{file_path:<80} {file_size:>15}")

    print("\nListing completed.")

# Directory containing the extracted files
model_dir = os.path.join(os.path.expanduser('~'), 'model')

# Check if the directory exists before listing files
if os.path.exists(model_dir):
    print(f"Listing contents of extracted directory: {model_dir}")
    list_extracted_files(model_dir)
else:
    print(f"Directory {model_dir} does not exist. Extraction might have failed.")


In [None]:
# Set up Hugging Face API
# Set up Hugging Face API
hf_token = "WRITE_YOUR_HF_TOKEN"  # Replace with your actual token
HfFolder.save_token(hf_token)

username = "javiagu"  # Replace with your Hugging Face username
model_name = "KULLM_pCliNER_bf16"  # Replace with your desired model name
model_repo_id = f"{username}/{model_name}"

api = HfApi()

# Create the repository (if it doesn't exist)
try:
    api.create_repo(
        repo_id=model_repo_id,
        repo_type="model",
        exist_ok=True,
        token=hf_token
    )
    print(f"Repository {model_repo_id} is ready.")
except Exception as e:
    print(f"Error creating repository on Hugging Face Hub: {e}")
    sys.exit(1)

# Upload the model files
print("Uploading model to Hugging Face Hub...")
try:
    upload_folder(
        folder_path=model_dir,
        repo_id=model_repo_id,
        repo_type="model",
        token=hf_token,
        ignore_patterns=["*.ipynb_checkpoints", "*.lock"],
    )
    print("Model uploaded successfully!")
except Exception as e:
    print(f"Error uploading model to Hugging Face Hub: {e}")
    sys.exit(1)

# Clean up local files
print("Cleaning up local files...")
try:
    os.remove(local_model_tar)
    import shutil
    shutil.rmtree(model_dir)
    print("Clean up completed.")
except Exception as e:
    print(f"Error during clean up: {e}")
