In [1]:
import shutil
import os

output_dir = "/kaggle/working/imagenet_subset"

if os.path.exists(output_dir):
    shutil.rmtree(output_dir)
    print("Old output directory removed.")

os.makedirs(output_dir, exist_ok=True)
print("New output directory created.")


New output directory created.


In [2]:
import os
import shutil

src_list = [
    "/kaggle/input/imagenet1k2",
    "/kaggle/input/imagenet1k0"
]

dst = "/kaggle/working/imagenet_subset"
os.makedirs(dst, exist_ok=True)

for src in src_list:
    print(f"Processing dataset: {src}")
    classes = sorted(os.listdir(src))[:100]  
    
    for cls in classes:
        src_path = os.path.join(src, cls)
        dst_path = os.path.join(dst, cls)

        
        if not os.path.exists(dst_path):
            print(f"Copying class {cls} from {src}")
            shutil.copytree(src_path, dst_path)

        
        else:
            print(f"Merging class {cls} from {src}")
            for fname in os.listdir(src_path):
                src_file = os.path.join(src_path, fname)
                dst_file = os.path.join(dst_path, fname)
                
                
                if not os.path.exists(dst_file):
                    shutil.copy2(src_file, dst_file)


Processing dataset: /kaggle/input/imagenet1k2
Copying class 00000 from /kaggle/input/imagenet1k2
Copying class 00001 from /kaggle/input/imagenet1k2
Copying class 00002 from /kaggle/input/imagenet1k2
Copying class 00003 from /kaggle/input/imagenet1k2
Copying class 00004 from /kaggle/input/imagenet1k2
Copying class 00005 from /kaggle/input/imagenet1k2
Copying class 00006 from /kaggle/input/imagenet1k2
Copying class 00007 from /kaggle/input/imagenet1k2
Copying class 00008 from /kaggle/input/imagenet1k2
Copying class 00009 from /kaggle/input/imagenet1k2
Copying class 00010 from /kaggle/input/imagenet1k2
Copying class 00011 from /kaggle/input/imagenet1k2
Copying class 00012 from /kaggle/input/imagenet1k2
Copying class 00013 from /kaggle/input/imagenet1k2
Copying class 00014 from /kaggle/input/imagenet1k2
Copying class 00015 from /kaggle/input/imagenet1k2
Copying class 00016 from /kaggle/input/imagenet1k2
Copying class 00017 from /kaggle/input/imagenet1k2
Copying class 00018 from /kaggle/inp

In [3]:
# !zip -r imagenet_subset.zip imagenet_subset

In [4]:
import os
import zipfile

base = "/kaggle/working/imagenet_subset"
classes = sorted(os.listdir(base))
chunk_size = 30   

for idx in range(0, 100, chunk_size):
    chunk = classes[idx: idx + chunk_size]
    zip_name = f"imagenet_subset_part_{idx//chunk_size + 1}.zip"
    zip_path = os.path.join("/kaggle/working", zip_name)

    print("Creating", zip_name)

    with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for cls in chunk:
            cls_path = os.path.join(base, cls)
            for root, _, files in os.walk(cls_path):
                for f in files:
                    fpath = os.path.join(root, f)
                    arcname = os.path.relpath(fpath, base)
                    zipf.write(fpath, arcname)

print("All parts created!")


Creating imagenet_subset_part_1.zip
Creating imagenet_subset_part_2.zip
Creating imagenet_subset_part_3.zip
Creating imagenet_subset_part_4.zip
All parts created!
