In [None]:
import numpy as np
import pandas as pd
import os
import os
import json
import shutil
from kaggle.api.kaggle_api_extended import KaggleApi

In [None]:
# Configuration
DATASET_NAME = "My Combined Dataset"
KAGGLE_USERNAME = "your-kaggle-username"  # Change this to your username
DATASET_ID = f"{KAGGLE_USERNAME}/{DATASET_NAME.replace(' ', '-').lower()}"
BASE_DIR = "/kaggle/working/my_dataset"
IMAGE_SOURCE_DIR = "/kaggle/input/your-source-dataset"  # Change to your dataset path
BATCH_SIZE = 100

In [None]:
# Function to create metadata file
def create_metadata(dataset_path):
    metadata = {
        "title": DATASET_NAME,
        "id": DATASET_ID,
        "licenses": [{"name": "CC0-1.0"}]
    }
    with open(os.path.join(dataset_path, "dataset-metadata.json"), "w") as f:
        json.dump(metadata, f, indent=4)

In [None]:

# Initialize Kaggle API
api = KaggleApi()
api.authenticate()

# Create dataset directory
os.makedirs(BASE_DIR, exist_ok=True)
create_metadata(BASE_DIR)

# Get list of images
image_files = [f for f in os.listdir(IMAGE_SOURCE_DIR) if f.endswith(('.png', '.jpg', '.jpeg'))]

In [None]:
for i in range(0, len(image_files), BATCH_SIZE):
    batch = image_files[i:i + BATCH_SIZE]
    batch_dir = os.path.join(BASE_DIR, f"batch_{i//BATCH_SIZE + 1}")
    os.makedirs(batch_dir, exist_ok=True)

    # Copy images to batch directory
    for img in batch:
        shutil.copy(os.path.join(IMAGE_SOURCE_DIR, img), os.path.join(batch_dir, img))

    # Upload batch to Kaggle
    is_new_dataset = (i == 0)
    if is_new_dataset:
        !kaggle datasets create -p {BASE_DIR} --public
    else:
        !kaggle datasets version -p {BASE_DIR} -m "Adding more images" --delete-old-versions

    # Clean up to save space
    shutil.rmtree(batch_dir)

print("Dataset upload complete!")