In [None]:
from google.colab import drive
import os
import tarfile
import json
import pickle
from tqdm import tqdm

# Step 1: Mount Google Drive
drive.mount('/content/drive')

# Step 2: Specify paths
tar_file_path = 'drive/My Drive/evaluation/eval_all.tar'
local_extract_path = '/content/evaluation'
log_file_path = 'drive/My Drive/evaluation/extract_log.json'
members_file_path = 'drive/My Drive/evaluation/members.pkl'

# Step 3: Create directories if they don't exist
os.makedirs(local_extract_path, exist_ok=True)

# Step 4: Load progress log
if os.path.exists(log_file_path):
    with open(log_file_path, 'r') as log_file:
        progress_log = json.load(log_file)
else:
    progress_log = {"last_extracted_index": 0}

# Step 5: Check and load members list
if os.path.exists(members_file_path):
    with open(members_file_path, 'rb') as members_file:
        members = pickle.load(members_file)
    print("Loaded members list from members.pkl")
else:
    print("Generating members list from the tar file...")
    with tarfile.open(tar_file_path, 'r') as tar:
        members = tar.getmembers()
        members = [m for m in members if m.isfile()]  # Exclude directories
    with open(members_file_path, 'wb') as members_file:
        pickle.dump(members, members_file)
    print(f"Members list saved to {members_file_path}")

# Step 6: Extract the tar file with batch processing
batch_size = 10000
total_batches = len(members) // batch_size + (1 if len(members) % batch_size else 0)
start_index = progress_log["last_extracted_index"]

print("Starting extraction process...")
try:
    with tarfile.open(tar_file_path, 'r') as tar:
        for i in tqdm(range(start_index, len(members), batch_size),
                      initial=start_index // batch_size,
                      total=total_batches,
                      desc="Extracting files in batches"):
            batch = members[i:i + batch_size]

            for member in tqdm(batch, desc=f"Processing batch {i // batch_size + 1}", leave=False):
                try:
                    tar.extract(member, path=local_extract_path)
                    print(f"Extracted: {os.path.join(local_extract_path, member.name)}")
                except Exception as e:
                    print(f"Error extracting {member.name}: {e}")

            # Save progress after each batch
            progress_log["last_extracted_index"] = i + batch_size
            with open(log_file_path, 'w') as log_file:
                json.dump(progress_log, log_file)
            print(f"Batch {i // batch_size + 1} completed.")

except Exception as e:
    print(f"An error occurred during extraction: {e}")

print(f"Extraction process completed. Extracted files are available at: {local_extract_path}")

# Step 7: Verify extraction results
def count_files(directory):
    return sum(len(files) for _, _, files in os.walk(directory))

total_files = count_files(local_extract_path)
print(f"Total files extracted: {total_files}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Loaded members list from members.pkl
Starting extraction process...


Extracting files in batches: 100%|██████████| 40/40 [00:00<?, ?it/s]

Extraction process completed. Extracted files are available at: /content/evaluation
Total files extracted: 0





In [None]:
from google.colab import drive
import os
import zipfile
from tqdm import tqdm

# Step 1: Mount Google Drive
drive.mount('/content/drive')

# Step 2: Specify paths
zip_file_path = 'drive/MyDrive/evaluation/extracted_files.zip'  # Path to the .zip file on Google Drive
extract_to_path = 'drive/MyDrive/evaluation/evaluation_extracted'               # Path to extract files locally

# Step 3: Create the extraction directory if it doesn't exist
os.makedirs(extract_to_path, exist_ok=True)

# Step 4: Extract the .zip file
try:
    print(f"Extracting files from {zip_file_path} to {extract_to_path}...")
    with zipfile.ZipFile(zip_file_path, 'r') as zipf:
        file_list = zipf.namelist()  # List of files in the .zip archive
        with tqdm(total=len(file_list), desc="Extracting files") as pbar:
            for file in file_list:
                zipf.extract(file, extract_to_path)
                pbar.update(1)  # Update the progress bar
    print(f"Extraction completed. Files are available at: {extract_to_path}")
except Exception as e:
    print(f"An error occurred during extraction: {e}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Extracting files from drive/MyDrive/evaluation/extracted_files.zip to drive/MyDrive/evaluation/evaluation_extracted...


Extracting files: 0it [00:00, ?it/s]

Extraction completed. Files are available at: drive/MyDrive/evaluation/evaluation_extracted





In [None]:
import os

def count_files_in_directory(directory_path):
    file_count = 0
    for root, dirs, files in os.walk(directory_path):
        file_count += len(files)  # Count the files in the current directory
    return file_count

# Example usage
directory_path = 'drive/My Drive/evaluation/evaluation_extracted'  # Change to your directory path
num_files = count_files_in_directory(directory_path)
print(f"Total number of files: {num_files}")


Total number of files: 0


In [None]:
from google.colab import drive
import tarfile
import os
import json
import pickle
from tqdm import tqdm
import shutil

# Step 1: Mount Google Drive
drive.mount('/content/drive')

# Step 2: Specify the paths
tar_file_path = 'drive/My Drive/evaluation/eval_all.tar'  # Path to the tar file
local_extract_path = '/content/evaluation'               # Temporary local extraction path
final_extract_path = 'drive/My Drive/evaluation'         # Final directory on Google Drive
log_file_path = 'drive/My Drive/evaluation/extract_log.json'  # Progress log file path
members_file_path = 'drive/My Drive/evaluation/members.pkl'  # Members list file path

# Step 3: Create directories if they don't exist
os.makedirs(local_extract_path, exist_ok=True)
os.makedirs(final_extract_path, exist_ok=True)

# Step 4: Load progress log
if os.path.exists(log_file_path):
    with open(log_file_path, 'r') as log_file:
        progress_log = json.load(log_file)
else:
    progress_log = {"last_extracted_index": 0}

# Step 5: Check and load members list
if os.path.exists(members_file_path):
    with open(members_file_path, 'rb') as members_file:
        members = pickle.load(members_file)
    print("Loaded members list from members.pkl")
else:
    print("Generating members list from the tar file...")
    with tarfile.open(tar_file_path, 'r') as tar:
        members = tar.getmembers()
    with open(members_file_path, 'wb') as members_file:
        pickle.dump(members, members_file)
    print(f"Members list saved to {members_file_path}")

# Step 6: Extract the tar file with batch processing
batch_size = 10000
total_batches = len(members) // batch_size + (1 if len(members) % batch_size else 0)

start_index = progress_log["last_extracted_index"]

print("Starting extraction process...")
try:
    with tarfile.open(tar_file_path, 'r') as tar:
        # Iterate over batches
        for i in tqdm(range(start_index, len(members), batch_size),
                      initial=start_index // batch_size,
                      total=total_batches,
                      desc="Extracting files in batches"):
            batch = members[i:i + batch_size]

            for member in tqdm(batch,
                               desc=f"Processing batch {i // batch_size + 1}",
                               leave=False):
                try:
                    tar.extract(member, path=local_extract_path)
                except Exception as e:
                    print(f"Error extracting {member.name}: {e}")
                    continue

            # Save progress after completing each batch
            progress_log["last_extracted_index"] = i + batch_size
            with open(log_file_path, 'w') as log_file:
                json.dump(progress_log, log_file)
            print(f"Batch {i // batch_size + 1} completed.")

    # Move extracted files to Google Drive
    print("Moving extracted files to Google Drive...")
    shutil.copytree(local_extract_path, final_extract_path, dirs_exist_ok=True)
    print(f"Files moved to: {final_extract_path}")

except Exception as e:
    print(f"An error occurred during extraction: {e}")

# Cleanup local extraction folder
if os.path.exists(local_extract_path):
    shutil.rmtree(local_extract_path)

print("Extraction process completed.")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Loaded members list from members.pkl
Starting extraction process...


Extracting files in batches:  35%|███▌      | 14/40 [00:00<?, ?it/s]
Processing batch 15:   0%|          | 0/10000 [00:00<?, ?it/s][A
Processing batch 15:   0%|          | 1/10000 [00:00<1:36:28,  1.73it/s][A
Processing batch 15:   1%|          | 63/10000 [00:00<01:24, 117.05it/s][A
Processing batch 15:   1%|          | 93/10000 [00:00<01:03, 155.27it/s][A
Processing batch 15:   1%|          | 122/10000 [00:00<01:00, 163.96it/s][A
Processing batch 15:   1%|▏         | 147/10000 [00:01<01:50, 88.84it/s] [A
Processing batch 15:   2%|▏         | 245/10000 [00:01<00:46, 208.70it/s][A
Processing batch 15:   3%|▎         | 319/10000 [00:01<00:33, 289.52it/s][A
Processing batch 15:   4%|▎         | 371/10000 [00:02<00:48, 196.82it/s][A
Processing batch 15:   4%|▍         | 436/10000 [00:02<00:37, 256.74it/s][A
Processing batch 15:   5%|▍         | 483/10000 [00:02<00:52, 182.19it/s][A
Processing batch 15:   5%|▌         | 519/10000 [00:02<00:48, 197.47it/s][A
Processing batch 15: 

Batch 15 completed.



Processing batch 16:   0%|          | 0/10000 [00:00<?, ?it/s][A
Processing batch 16:   1%|          | 110/10000 [00:00<00:08, 1099.84it/s][A
Processing batch 16:   2%|▏         | 229/10000 [00:00<00:08, 1149.08it/s][A
Processing batch 16:   3%|▎         | 344/10000 [00:00<00:08, 1109.08it/s][A
Processing batch 16:   5%|▍         | 456/10000 [00:00<00:09, 1034.66it/s][A
Processing batch 16:   6%|▌         | 561/10000 [00:00<00:11, 849.33it/s] [A
Processing batch 16:   7%|▋         | 651/10000 [00:00<00:11, 789.10it/s][A
Processing batch 16:   7%|▋         | 733/10000 [00:00<00:12, 764.20it/s][A
Processing batch 16:   8%|▊         | 812/10000 [00:00<00:12, 713.66it/s][A
Processing batch 16:   9%|▉         | 909/10000 [00:01<00:11, 772.52it/s][A
Processing batch 16:  10%|▉         | 989/10000 [00:01<00:11, 768.46it/s][A
Processing batch 16:  11%|█         | 1068/10000 [00:01<00:12, 736.40it/s][A
Processing batch 16:  12%|█▏        | 1177/10000 [00:01<00:10, 831.02it/s][A
Pr

Batch 16 completed.



Processing batch 17:   0%|          | 0/10000 [00:00<?, ?it/s][A
Processing batch 17:   2%|▏         | 238/10000 [00:00<00:04, 2367.31it/s][A
Processing batch 17:   5%|▍         | 475/10000 [00:00<00:04, 2061.21it/s][A
Processing batch 17:   7%|▋         | 684/10000 [00:00<00:05, 1718.69it/s][A
Processing batch 17:   9%|▊         | 862/10000 [00:00<00:06, 1353.43it/s][A
Processing batch 17:  10%|█         | 1008/10000 [00:00<00:06, 1353.63it/s][A
Processing batch 17:  12%|█▏        | 1229/10000 [00:00<00:05, 1568.05it/s][A
Processing batch 17:  14%|█▍        | 1419/10000 [00:00<00:05, 1659.95it/s][A
Processing batch 17:  16%|█▌        | 1621/10000 [00:00<00:04, 1761.35it/s][A
Processing batch 17:  18%|█▊        | 1844/10000 [00:01<00:04, 1859.14it/s][A
Processing batch 17:  20%|██        | 2035/10000 [00:01<00:11, 678.27it/s] [A
Processing batch 17:  22%|██▏       | 2207/10000 [00:01<00:09, 805.48it/s][A
Processing batch 17:  24%|██▎       | 2358/10000 [00:02<00:08, 913.60

Batch 17 completed.



Processing batch 18:   0%|          | 0/10000 [00:00<?, ?it/s][A
Processing batch 18:   2%|▏         | 244/10000 [00:00<00:04, 2435.69it/s][A
Processing batch 18:   5%|▍         | 488/10000 [00:00<00:04, 2288.20it/s][A
Processing batch 18:   7%|▋         | 718/10000 [00:00<00:04, 2128.24it/s][A
Processing batch 18:   9%|▉         | 932/10000 [00:00<00:04, 2115.83it/s][A
Processing batch 18:  11%|█▏        | 1145/10000 [00:00<00:04, 2076.73it/s][A
Processing batch 18:  14%|█▎        | 1354/10000 [00:00<00:04, 1834.86it/s][A
Processing batch 18:  15%|█▌        | 1542/10000 [00:00<00:05, 1593.58it/s][A
Processing batch 18:  17%|█▋        | 1748/10000 [00:00<00:04, 1715.48it/s][A
Processing batch 18:  20%|█▉        | 1952/10000 [00:01<00:04, 1794.64it/s][A
Processing batch 18:  21%|██▏       | 2138/10000 [00:01<00:04, 1633.81it/s][A
Processing batch 18:  23%|██▎       | 2308/10000 [00:01<00:10, 720.22it/s] [A
Processing batch 18:  24%|██▍       | 2435/10000 [00:01<00:09, 779.6

Batch 18 completed.



Processing batch 19:   0%|          | 0/10000 [00:00<?, ?it/s][A
Processing batch 19:   1%|▏         | 136/10000 [00:00<00:07, 1353.45it/s][A
Processing batch 19:   3%|▎         | 282/10000 [00:00<00:06, 1410.57it/s][A
Processing batch 19:   4%|▍         | 428/10000 [00:00<00:06, 1423.52it/s][A
Processing batch 19:   6%|▌         | 571/10000 [00:00<00:10, 930.16it/s] [A
Processing batch 19:   7%|▋         | 682/10000 [00:00<00:10, 900.14it/s][A
Processing batch 19:   8%|▊         | 791/10000 [00:00<00:09, 949.51it/s][A
Processing batch 19:  10%|▉         | 984/10000 [00:00<00:07, 1215.16it/s][A
Processing batch 19:  12%|█▏        | 1196/10000 [00:00<00:05, 1467.51it/s][A
Processing batch 19:  14%|█▍        | 1428/10000 [00:01<00:05, 1709.95it/s][A
Processing batch 19:  17%|█▋        | 1654/10000 [00:01<00:04, 1865.32it/s][A
Processing batch 19:  18%|█▊        | 1849/10000 [00:01<00:04, 1864.59it/s][A
Processing batch 19:  20%|██        | 2041/10000 [00:01<00:08, 925.06it/s

Batch 19 completed.



Processing batch 20:   0%|          | 0/10000 [00:00<?, ?it/s][A
Processing batch 20:   2%|▏         | 168/10000 [00:00<00:05, 1675.77it/s][A
Processing batch 20:   3%|▎         | 336/10000 [00:00<00:06, 1539.26it/s][A
Processing batch 20:   5%|▌         | 525/10000 [00:00<00:05, 1690.51it/s][A
Processing batch 20:   7%|▋         | 711/10000 [00:00<00:05, 1751.08it/s][A
Processing batch 20:   9%|▉         | 889/10000 [00:00<00:05, 1758.51it/s][A
Processing batch 20:  11%|█         | 1087/10000 [00:00<00:04, 1830.22it/s][A
Processing batch 20:  13%|█▎        | 1282/10000 [00:00<00:04, 1868.15it/s][A
Processing batch 20:  15%|█▍        | 1470/10000 [00:00<00:04, 1862.75it/s][A
Processing batch 20:  17%|█▋        | 1695/10000 [00:00<00:04, 1979.08it/s][A
Processing batch 20:  19%|█▉        | 1894/10000 [00:01<00:04, 1906.35it/s][A
Processing batch 20:  21%|██        | 2122/10000 [00:01<00:03, 2016.07it/s][A
Processing batch 20:  23%|██▎       | 2325/10000 [00:01<00:03, 1957.5

Batch 20 completed.



Processing batch 21:   0%|          | 0/10000 [00:00<?, ?it/s][A
Processing batch 21:   1%|          | 105/10000 [00:00<00:09, 1040.48it/s][A
Processing batch 21:   2%|▏         | 210/10000 [00:00<00:10, 968.35it/s] [A
Processing batch 21:   3%|▎         | 308/10000 [00:00<00:09, 971.33it/s][A
Processing batch 21:   4%|▍         | 421/10000 [00:00<00:09, 1030.87it/s][A
Processing batch 21:   5%|▌         | 542/10000 [00:00<00:08, 1093.93it/s][A
Processing batch 21:   7%|▋         | 667/10000 [00:00<00:08, 1146.19it/s][A
Processing batch 21:   8%|▊         | 794/10000 [00:00<00:07, 1186.06it/s][A
Processing batch 21:   9%|▉         | 913/10000 [00:00<00:07, 1144.04it/s][A
Processing batch 21:  10%|█         | 1028/10000 [00:00<00:07, 1129.28it/s][A
Processing batch 21:  11%|█▏        | 1142/10000 [00:01<00:08, 1020.25it/s][A
Processing batch 21:  13%|█▎        | 1315/10000 [00:01<00:07, 1216.37it/s][A
Processing batch 21:  15%|█▌        | 1513/10000 [00:01<00:05, 1432.26it/

Batch 21 completed.



Processing batch 22:   0%|          | 0/10000 [00:00<?, ?it/s][A
Processing batch 22:   2%|▏         | 159/10000 [00:00<00:06, 1531.20it/s][A
Processing batch 22:   3%|▎         | 313/10000 [00:00<00:06, 1478.07it/s][A
Processing batch 22:   5%|▍         | 461/10000 [00:00<00:19, 497.71it/s] [A
Processing batch 22:   6%|▌         | 554/10000 [00:00<00:19, 492.84it/s][A
Processing batch 22:   7%|▋         | 732/10000 [00:01<00:12, 721.06it/s][A
Processing batch 22:   8%|▊         | 845/10000 [00:01<00:15, 577.15it/s][A
Processing batch 22:  10%|▉         | 961/10000 [00:01<00:13, 676.90it/s][A
Processing batch 22:  11%|█         | 1058/10000 [00:01<00:17, 502.85it/s][A
Processing batch 22:  11%|█▏        | 1133/10000 [00:02<00:20, 439.41it/s][A
Processing batch 22:  12%|█▏        | 1195/10000 [00:02<00:22, 398.06it/s][A
Processing batch 22:  12%|█▏        | 1247/10000 [00:02<00:23, 365.27it/s][A
Processing batch 22:  13%|█▎        | 1339/10000 [00:02<00:18, 456.93it/s][A
P

Batch 22 completed.



Processing batch 23:   0%|          | 0/10000 [00:00<?, ?it/s][A
Processing batch 23:   2%|▏         | 246/10000 [00:00<00:03, 2454.76it/s][A
Processing batch 23:   5%|▍         | 492/10000 [00:00<00:05, 1764.69it/s][A
Processing batch 23:   7%|▋         | 680/10000 [00:00<00:05, 1695.06it/s][A
Processing batch 23:   9%|▊         | 855/10000 [00:00<00:06, 1520.76it/s][A
Processing batch 23:  10%|█         | 1013/10000 [00:00<00:05, 1537.12it/s][A
Processing batch 23:  12%|█▏        | 1198/10000 [00:00<00:05, 1629.25it/s][A
Processing batch 23:  14%|█▎        | 1365/10000 [00:00<00:05, 1545.31it/s][A
Processing batch 23:  15%|█▌        | 1522/10000 [00:00<00:05, 1483.34it/s][A
Processing batch 23:  17%|█▋        | 1672/10000 [00:01<00:05, 1458.73it/s][A
Processing batch 23:  18%|█▊        | 1819/10000 [00:01<00:05, 1445.86it/s][A
Processing batch 23:  20%|█▉        | 1965/10000 [00:01<00:05, 1421.74it/s][A
Processing batch 23:  21%|██        | 2108/10000 [00:01<00:06, 1262.

Batch 23 completed.



Processing batch 24:   0%|          | 0/10000 [00:00<?, ?it/s][A
Processing batch 24:   2%|▏         | 244/10000 [00:00<00:04, 2435.86it/s][A
Processing batch 24:   5%|▍         | 488/10000 [00:00<00:04, 2238.94it/s][A
Processing batch 24:   7%|▋         | 713/10000 [00:00<00:06, 1360.93it/s][A
Processing batch 24:   9%|▉         | 877/10000 [00:00<00:07, 1152.88it/s][A
Processing batch 24:  10%|█         | 1011/10000 [00:00<00:09, 967.95it/s][A
Processing batch 24:  12%|█▏        | 1210/10000 [00:00<00:07, 1188.47it/s][A
Processing batch 24:  14%|█▎        | 1350/10000 [00:04<01:07, 128.78it/s] [A
Processing batch 24:  15%|█▍        | 1462/10000 [00:04<00:52, 162.33it/s][A
Processing batch 24:  16%|█▋        | 1637/10000 [00:04<00:35, 235.57it/s][A
Processing batch 24:  18%|█▊        | 1824/10000 [00:05<00:24, 336.58it/s][A
Processing batch 24:  21%|██        | 2056/10000 [00:05<00:15, 497.48it/s][A
Processing batch 24:  23%|██▎       | 2287/10000 [00:05<00:11, 685.66it/s

Batch 24 completed.



Processing batch 25:   0%|          | 0/10000 [00:00<?, ?it/s][A
Processing batch 25:   1%|▏         | 149/10000 [00:00<00:06, 1482.53it/s][A
Processing batch 25:   3%|▎         | 298/10000 [00:00<00:06, 1481.59it/s][A
Processing batch 25:   5%|▍         | 467/10000 [00:00<00:06, 1574.43it/s][A
Processing batch 25:   6%|▋         | 628/10000 [00:00<00:05, 1588.15it/s][A
Processing batch 25:   8%|▊         | 787/10000 [00:00<00:06, 1533.93it/s][A
Processing batch 25:   9%|▉         | 941/10000 [00:00<00:08, 1077.15it/s][A
Processing batch 25:  11%|█         | 1069/10000 [00:00<00:07, 1127.13it/s][A
Processing batch 25:  12%|█▏        | 1199/10000 [00:00<00:07, 1170.63it/s][A
Processing batch 25:  13%|█▎        | 1326/10000 [00:01<00:07, 1196.52it/s][A
Processing batch 25:  15%|█▍        | 1453/10000 [00:01<00:07, 1177.30it/s][A
Processing batch 25:  16%|█▌        | 1604/10000 [00:01<00:06, 1269.88it/s][A
Processing batch 25:  18%|█▊        | 1763/10000 [00:01<00:06, 1360.63

Batch 25 completed.



Processing batch 26:   0%|          | 0/10000 [00:00<?, ?it/s][A
Processing batch 26:   2%|▏         | 206/10000 [00:00<00:04, 2054.55it/s][A
Processing batch 26:   4%|▍         | 423/10000 [00:00<00:10, 918.58it/s] [A
Processing batch 26:   5%|▌         | 548/10000 [00:00<00:14, 667.67it/s][A
Processing batch 26:   6%|▋         | 636/10000 [00:00<00:16, 555.95it/s][A
Processing batch 26:   7%|▋         | 737/10000 [00:01<00:14, 636.37it/s][A
Processing batch 26:   8%|▊         | 816/10000 [00:01<00:14, 643.75it/s][A
Processing batch 26:   9%|▉         | 891/10000 [00:01<00:18, 505.50it/s][A
Processing batch 26:  10%|▉         | 952/10000 [00:01<00:17, 508.58it/s][A
Processing batch 26:  10%|█         | 1010/10000 [00:01<00:17, 509.36it/s][A
Processing batch 26:  11%|█         | 1066/10000 [00:01<00:25, 349.58it/s][A
Processing batch 26:  11%|█         | 1110/10000 [00:02<00:27, 320.89it/s][A
Processing batch 26:  11%|█▏        | 1149/10000 [00:02<00:30, 293.31it/s][A
Pro

Batch 26 completed.



Processing batch 27:   0%|          | 0/10000 [00:00<?, ?it/s][A
Processing batch 27:   1%|          | 115/10000 [00:00<00:08, 1127.76it/s][A
Processing batch 27:   2%|▏         | 228/10000 [00:00<00:08, 1086.78it/s][A
Processing batch 27:   4%|▎         | 356/10000 [00:00<00:08, 1159.33it/s][A
Processing batch 27:   5%|▍         | 473/10000 [00:00<00:08, 1129.93it/s][A
Processing batch 27:   6%|▌         | 598/10000 [00:00<00:08, 1171.36it/s][A
Processing batch 27:   7%|▋         | 716/10000 [00:00<00:08, 1145.84it/s][A
Processing batch 27:   8%|▊         | 844/10000 [00:00<00:07, 1187.15it/s][A
Processing batch 27:  10%|▉         | 993/10000 [00:00<00:07, 1280.32it/s][A
Processing batch 27:  12%|█▏        | 1173/10000 [00:00<00:06, 1438.56it/s][A
Processing batch 27:  13%|█▎        | 1332/10000 [00:01<00:05, 1484.25it/s][A
Processing batch 27:  15%|█▌        | 1512/10000 [00:01<00:05, 1577.79it/s][A
Processing batch 27:  17%|█▋        | 1671/10000 [00:01<00:05, 1439.13it

Batch 27 completed.



Processing batch 28:   0%|          | 0/10000 [00:00<?, ?it/s][A
Processing batch 28:   1%|▏         | 142/10000 [00:00<00:06, 1415.90it/s][A
Processing batch 28:   3%|▎         | 284/10000 [00:00<00:06, 1404.90it/s][A
Processing batch 28:   4%|▍         | 425/10000 [00:00<00:06, 1381.36it/s][A
Processing batch 28:   6%|▌         | 564/10000 [00:00<00:07, 1294.83it/s][A
Processing batch 28:   7%|▋         | 731/10000 [00:00<00:06, 1421.76it/s][A
Processing batch 28:   9%|▉         | 897/10000 [00:00<00:06, 1497.26it/s][A
Processing batch 28:  10%|█         | 1048/10000 [00:00<00:08, 1078.14it/s][A
Processing batch 28:  12%|█▏        | 1172/10000 [00:00<00:08, 1067.78it/s][A
Processing batch 28:  13%|█▎        | 1305/10000 [00:01<00:07, 1133.69it/s][A
Processing batch 28:  14%|█▍        | 1437/10000 [00:01<00:07, 1181.87it/s][A
Processing batch 28:  16%|█▌        | 1568/10000 [00:01<00:06, 1211.96it/s][A
Processing batch 28:  17%|█▋        | 1725/10000 [00:01<00:06, 1312.02

Batch 28 completed.



Processing batch 29:   0%|          | 0/10000 [00:00<?, ?it/s][A
Processing batch 29:   0%|          | 48/10000 [00:00<00:20, 479.89it/s][A
Processing batch 29:   1%|          | 96/10000 [00:00<00:28, 342.42it/s][A
Processing batch 29:   2%|▏         | 166/10000 [00:00<00:20, 475.76it/s][A
Processing batch 29:   2%|▏         | 237/10000 [00:00<00:20, 484.06it/s][A
Processing batch 29:   3%|▎         | 288/10000 [00:02<02:12, 73.28it/s] [A
Processing batch 29:   4%|▍         | 401/10000 [00:02<01:10, 137.02it/s][A
Processing batch 29:   5%|▌         | 512/10000 [00:02<00:44, 214.14it/s][A
Processing batch 29:   6%|▋         | 636/10000 [00:02<00:29, 318.09it/s][A
Processing batch 29:   8%|▊         | 759/10000 [00:02<00:21, 433.57it/s][A
Processing batch 29:   9%|▊         | 862/10000 [00:03<00:17, 522.91it/s][A
Processing batch 29:  10%|▉         | 967/10000 [00:03<00:14, 616.83it/s][A
Processing batch 29:  11%|█         | 1080/10000 [00:03<00:12, 721.10it/s][A
Processing

Batch 29 completed.



Processing batch 30:   0%|          | 0/10000 [00:00<?, ?it/s][A
Processing batch 30:   1%|          | 107/10000 [00:00<00:09, 1069.81it/s][A
Processing batch 30:   2%|▏         | 241/10000 [00:00<00:07, 1222.64it/s][A
Processing batch 30:   4%|▎         | 370/10000 [00:00<00:07, 1251.47it/s][A
Processing batch 30:   6%|▌         | 555/10000 [00:00<00:06, 1485.39it/s][A
Processing batch 30:   7%|▋         | 704/10000 [00:00<00:06, 1424.01it/s][A
Processing batch 30:   9%|▉         | 884/10000 [00:00<00:05, 1533.74it/s][A
Processing batch 30:  11%|█         | 1101/10000 [00:00<00:05, 1734.24it/s][A
Processing batch 30:  13%|█▎        | 1327/10000 [00:00<00:04, 1897.51it/s][A
Processing batch 30:  16%|█▌        | 1553/10000 [00:00<00:04, 2009.15it/s][A
Processing batch 30:  18%|█▊        | 1782/10000 [00:01<00:03, 2094.49it/s][A
Processing batch 30:  20%|██        | 2001/10000 [00:01<00:03, 2122.19it/s][A
Processing batch 30:  22%|██▏       | 2214/10000 [00:01<00:04, 1929.03

Batch 30 completed.



Processing batch 31:   0%|          | 0/10000 [00:00<?, ?it/s][A
Processing batch 31:   2%|▏         | 167/10000 [00:00<00:05, 1666.94it/s][A
Processing batch 31:   3%|▎         | 334/10000 [00:00<00:06, 1469.05it/s][A
Processing batch 31:   5%|▍         | 495/10000 [00:00<00:06, 1527.52it/s][A
Processing batch 31:   6%|▋         | 650/10000 [00:00<00:06, 1423.52it/s][A
Processing batch 31:   8%|▊         | 794/10000 [00:00<00:08, 1064.89it/s][A
Processing batch 31:   9%|▉         | 912/10000 [00:00<00:08, 1094.39it/s][A
Processing batch 31:  11%|█         | 1064/10000 [00:00<00:07, 1210.01it/s][A
Processing batch 31:  12%|█▏        | 1215/10000 [00:00<00:06, 1292.36it/s][A
Processing batch 31:  14%|█▎        | 1372/10000 [00:01<00:06, 1369.35it/s][A
Processing batch 31:  15%|█▌        | 1515/10000 [00:01<00:07, 1063.30it/s][A
Processing batch 31:  16%|█▋        | 1635/10000 [00:01<00:07, 1051.61it/s][A
Processing batch 31:  18%|█▊        | 1768/10000 [00:01<00:07, 1084.99

Batch 31 completed.



Processing batch 32:   0%|          | 0/10000 [00:00<?, ?it/s][A
Processing batch 32:   2%|▏         | 175/10000 [00:00<00:05, 1742.85it/s][A
Processing batch 32:   4%|▎         | 350/10000 [00:00<00:05, 1718.99it/s][A
Processing batch 32:   5%|▌         | 522/10000 [00:00<00:06, 1394.13it/s][A
Processing batch 32:   7%|▋         | 668/10000 [00:00<00:06, 1337.91it/s][A
Processing batch 32:   8%|▊         | 806/10000 [00:00<00:07, 1286.03it/s][A
Processing batch 32:   9%|▉         | 937/10000 [00:00<00:07, 1289.08it/s][A
Processing batch 32:  11%|█         | 1068/10000 [00:00<00:07, 1208.65it/s][A
Processing batch 32:  12%|█▏        | 1191/10000 [00:00<00:08, 1025.27it/s][A
Processing batch 32:  13%|█▎        | 1318/10000 [00:01<00:08, 1072.68it/s][A
Processing batch 32:  14%|█▍        | 1444/10000 [00:01<00:07, 1120.81it/s][A
Processing batch 32:  16%|█▌        | 1560/10000 [00:01<00:19, 434.18it/s] [A
Processing batch 32:  17%|█▋        | 1715/10000 [00:01<00:14, 579.95i

Batch 32 completed.



Processing batch 33:   0%|          | 0/10000 [00:00<?, ?it/s][A
Processing batch 33:   2%|▏         | 169/10000 [00:00<00:05, 1685.77it/s][A
Processing batch 33:   3%|▎         | 338/10000 [00:00<00:05, 1653.29it/s][A
Processing batch 33:   5%|▌         | 504/10000 [00:00<00:07, 1355.70it/s][A
Processing batch 33:   6%|▋         | 645/10000 [00:00<00:09, 995.60it/s] [A
Processing batch 33:   8%|▊         | 757/10000 [00:00<00:09, 973.18it/s][A
Processing batch 33:   9%|▉         | 898/10000 [00:00<00:08, 1085.68it/s][A
Processing batch 33:  10%|█         | 1015/10000 [00:01<00:12, 714.91it/s][A
Processing batch 33:  11%|█         | 1107/10000 [00:01<00:12, 708.93it/s][A
Processing batch 33:  12%|█▏        | 1192/10000 [00:01<00:13, 634.68it/s][A
Processing batch 33:  13%|█▎        | 1265/10000 [00:01<00:14, 591.23it/s][A
Processing batch 33:  14%|█▎        | 1353/10000 [00:01<00:13, 639.55it/s][A
Processing batch 33:  14%|█▍        | 1423/10000 [00:01<00:15, 554.12it/s][

Batch 33 completed.



Processing batch 34:   0%|          | 0/10000 [00:00<?, ?it/s][A
Processing batch 34:   1%|          | 124/10000 [00:00<00:07, 1239.79it/s][A
Processing batch 34:   3%|▎         | 262/10000 [00:00<00:07, 1318.35it/s][A
Processing batch 34:   4%|▍         | 416/10000 [00:00<00:06, 1417.77it/s][A
Processing batch 34:   6%|▌         | 558/10000 [00:00<00:07, 1201.27it/s][A
Processing batch 34:   7%|▋         | 683/10000 [00:00<00:07, 1166.59it/s][A
Processing batch 34:   8%|▊         | 803/10000 [00:00<00:09, 950.91it/s] [A
Processing batch 34:  10%|▉         | 954/10000 [00:00<00:08, 1095.38it/s][A
Processing batch 34:  11%|█         | 1099/10000 [00:00<00:07, 1187.33it/s][A
Processing batch 34:  12%|█▏        | 1225/10000 [00:01<00:08, 1040.50it/s][A
Processing batch 34:  13%|█▎        | 1337/10000 [00:01<00:13, 653.24it/s] [A
Processing batch 34:  15%|█▍        | 1455/10000 [00:01<00:11, 749.70it/s][A
Processing batch 34:  16%|█▌        | 1584/10000 [00:01<00:09, 861.66it/

Batch 34 completed.



Processing batch 35:   0%|          | 0/10000 [00:00<?, ?it/s][A
Processing batch 35:   1%|          | 61/10000 [00:00<00:18, 537.64it/s][A
Processing batch 35:   1%|▏         | 133/10000 [00:00<00:15, 638.05it/s][A
Processing batch 35:   3%|▎         | 253/10000 [00:00<00:11, 845.68it/s][A
Processing batch 35:   3%|▎         | 338/10000 [00:00<00:31, 302.84it/s][A
Processing batch 35:   4%|▍         | 395/10000 [00:01<00:28, 341.67it/s][A
Processing batch 35:   5%|▍         | 451/10000 [00:01<00:25, 369.95it/s][A
Processing batch 35:   5%|▌         | 504/10000 [00:01<00:24, 391.59it/s][A
Processing batch 35:   6%|▌         | 555/10000 [00:01<00:23, 401.56it/s][A
Processing batch 35:   6%|▌         | 604/10000 [00:01<00:31, 298.25it/s][A
Processing batch 35:   6%|▋         | 643/10000 [00:01<00:42, 217.75it/s][A
Processing batch 35:   7%|▋         | 704/10000 [00:02<00:33, 279.58it/s][A
Processing batch 35:   8%|▊         | 832/10000 [00:02<00:20, 456.73it/s][A
Processing

Batch 35 completed.



Processing batch 36:   0%|          | 0/10000 [00:00<?, ?it/s][A
Processing batch 36:   0%|          | 9/10000 [00:00<02:08, 77.46it/s][A
Processing batch 36:   1%|          | 60/10000 [00:00<00:31, 314.96it/s][A
Processing batch 36:   1%|          | 104/10000 [00:00<00:28, 351.29it/s][A
Processing batch 36:   1%|▏         | 140/10000 [00:00<00:30, 318.21it/s][A
Processing batch 36:   2%|▏         | 219/10000 [00:00<00:20, 470.74it/s][A
Processing batch 36:   3%|▎         | 269/10000 [00:00<00:21, 454.71it/s][A
Processing batch 36:   4%|▎         | 369/10000 [00:00<00:16, 579.88it/s][A
Processing batch 36:   5%|▍         | 453/10000 [00:00<00:14, 653.88it/s][A
Processing batch 36:   6%|▌         | 592/10000 [00:01<00:10, 865.28it/s][A
Processing batch 36:   7%|▋         | 681/10000 [00:01<00:14, 657.30it/s][A
Processing batch 36:   8%|▊         | 801/10000 [00:01<00:11, 785.71it/s][A
Processing batch 36:   9%|▉         | 890/10000 [00:01<00:11, 767.55it/s][A
Processing ba

Batch 36 completed.



Processing batch 37:   0%|          | 0/10000 [00:00<?, ?it/s][A
Processing batch 37:   2%|▏         | 188/10000 [00:00<00:05, 1872.15it/s][A
Processing batch 37:   4%|▍         | 376/10000 [00:00<00:05, 1834.09it/s][A
Processing batch 37:   6%|▌         | 560/10000 [00:00<00:05, 1754.50it/s][A
Processing batch 37:   7%|▋         | 740/10000 [00:00<00:05, 1771.33it/s][A
Processing batch 37:  10%|▉         | 975/10000 [00:00<00:04, 1973.07it/s][A
Processing batch 37:  12%|█▏        | 1173/10000 [00:00<00:05, 1543.70it/s][A
Processing batch 37:  13%|█▎        | 1341/10000 [00:00<00:05, 1484.36it/s][A
Processing batch 37:  15%|█▌        | 1512/10000 [00:00<00:05, 1543.46it/s][A
Processing batch 37:  17%|█▋        | 1674/10000 [00:01<00:05, 1518.03it/s][A
Processing batch 37:  18%|█▊        | 1831/10000 [00:01<00:05, 1499.58it/s][A
Processing batch 37:  20%|██        | 2002/10000 [00:01<00:05, 1557.47it/s][A
Processing batch 37:  22%|██▏       | 2193/10000 [00:01<00:04, 1656.7

Batch 37 completed.



Processing batch 38:   0%|          | 0/10000 [00:00<?, ?it/s][A
Processing batch 38:   2%|▏         | 185/10000 [00:00<00:05, 1845.10it/s][A
Processing batch 38:   4%|▎         | 370/10000 [00:00<00:05, 1782.96it/s][A
Processing batch 38:   6%|▌         | 590/10000 [00:00<00:04, 1969.04it/s][A
Processing batch 38:   8%|▊         | 788/10000 [00:00<00:06, 1494.38it/s][A
Processing batch 38:  10%|▉         | 951/10000 [00:00<00:06, 1426.90it/s][A
Processing batch 38:  11%|█         | 1119/10000 [00:00<00:05, 1495.52it/s][A
Processing batch 38:  13%|█▎        | 1280/10000 [00:00<00:05, 1525.12it/s][A
Processing batch 38:  14%|█▍        | 1438/10000 [00:00<00:05, 1490.67it/s][A
Processing batch 38:  16%|█▌        | 1614/10000 [00:01<00:05, 1567.19it/s][A
Processing batch 38:  18%|█▊        | 1807/10000 [00:01<00:04, 1670.79it/s][A
Processing batch 38:  20%|██        | 2022/10000 [00:01<00:04, 1810.03it/s][A
Processing batch 38:  23%|██▎       | 2263/10000 [00:01<00:03, 1983.4

Batch 38 completed.



Processing batch 39:   0%|          | 0/10000 [00:00<?, ?it/s][A
Processing batch 39:   2%|▏         | 207/10000 [00:00<00:04, 2067.46it/s][A
Processing batch 39:   4%|▍         | 414/10000 [00:00<00:04, 2058.89it/s][A
Processing batch 39:   6%|▌         | 620/10000 [00:00<00:04, 2035.46it/s][A
Processing batch 39:   8%|▊         | 824/10000 [00:00<00:04, 1860.65it/s][A
Processing batch 39:  10%|█         | 1013/10000 [00:00<00:04, 1851.47it/s][A
Processing batch 39:  12%|█▏        | 1200/10000 [00:00<00:06, 1344.65it/s][A
Processing batch 39:  14%|█▎        | 1353/10000 [00:00<00:06, 1316.75it/s][A
Processing batch 39:  15%|█▍        | 1497/10000 [00:01<00:08, 1012.45it/s][A
Processing batch 39:  17%|█▋        | 1695/10000 [00:01<00:06, 1218.19it/s][A
Processing batch 39:  18%|█▊        | 1838/10000 [00:01<00:07, 1055.18it/s][A
Processing batch 39:  20%|█▉        | 1961/10000 [00:01<00:10, 753.48it/s] [A
Processing batch 39:  21%|██        | 2059/10000 [00:01<00:11, 707.0

Batch 39 completed.



Processing batch 40:   0%|          | 0/5610 [00:00<?, ?it/s][A
Processing batch 40:   4%|▍         | 215/5610 [00:00<00:02, 2146.04it/s][A
Processing batch 40:   8%|▊         | 430/5610 [00:00<00:03, 1313.65it/s][A
Processing batch 40:  10%|█         | 580/5610 [00:00<00:04, 1174.46it/s][A
Processing batch 40:  13%|█▎        | 709/5610 [00:00<00:04, 1205.00it/s][A
Processing batch 40:  15%|█▍        | 836/5610 [00:00<00:05, 860.72it/s] [A
Processing batch 40:  17%|█▋        | 970/5610 [00:00<00:04, 968.72it/s][A
Processing batch 40:  19%|█▉        | 1082/5610 [00:01<00:05, 901.48it/s][A
Processing batch 40:  23%|██▎       | 1272/5610 [00:01<00:03, 1139.93it/s][A
Processing batch 40:  25%|██▍       | 1401/5610 [00:01<00:04, 947.68it/s] [A
Processing batch 40:  27%|██▋       | 1524/5610 [00:01<00:04, 1003.56it/s][A
Processing batch 40:  29%|██▉       | 1636/5610 [00:01<00:04, 812.38it/s] [A
Processing batch 40:  31%|███▏      | 1755/5610 [00:01<00:04, 880.47it/s][A
Process

Batch 40 completed.
Moving extracted files to Google Drive...


In [None]:
import tarfile

tar_file_path = 'drive/My Drive/evaluation/eval_all.tar'

with tarfile.open(tar_file_path, 'r') as tar:
    members = tar.getmembers()
    print(f"Total members in tar file: {len(members)}")

    # Display details of the first 20 members
    for i, member in enumerate(members[:20]):
        print(f"Member {i+1}:")
        print(f"  Name: {member.name}")
        print(f"  Is File: {member.isfile()}")
        print(f"  Is Directory: {member.isdir()}")
        print(f"  Is Link: {member.islnk()}")
        print(f"  Size: {member.size} bytes")


Total members in tar file: 395610
Member 1:
  Name: eval_all
  Is File: False
  Is Directory: True
  Is Link: False
  Size: 0 bytes
Member 2:
  Name: eval_all/Eval_0000004.png
  Is File: True
  Is Directory: False
  Is Link: False
  Size: 74186 bytes
Member 3:
  Name: eval_all/Eval_0000005.png
  Is File: True
  Is Directory: False
  Is Link: False
  Size: 72377 bytes
Member 4:
  Name: eval_all/Eval_0000007.png
  Is File: True
  Is Directory: False
  Is Link: False
  Size: 86112 bytes
Member 5:
  Name: eval_all/Eval_0000008.png
  Is File: True
  Is Directory: False
  Is Link: False
  Size: 86709 bytes
Member 6:
  Name: eval_all/Eval_0000015.png
  Is File: True
  Is Directory: False
  Is Link: False
  Size: 74185 bytes
Member 7:
  Name: eval_all/Eval_0000016.png
  Is File: True
  Is Directory: False
  Is Link: False
  Size: 65312 bytes
Member 8:
  Name: eval_all/Eval_0000023.png
  Is File: True
  Is Directory: False
  Is Link: False
  Size: 91958 bytes
Member 9:
  Name: eval_all/Eval_000

In [None]:
from google.colab import drive
import os
import tarfile
from tqdm import tqdm

# Step 2: Specify paths
tar_file_path = 'drive/My Drive/evaluation/eval_all.tar'
local_extract_path = '/content/evaluation'

# Step 3: Create directory for extraction
os.makedirs(local_extract_path, exist_ok=True)

# Step 4: Inspect members in the .tar file
print("Inspecting members in the .tar file...")
with tarfile.open(tar_file_path, 'r') as tar:
    members = tar.getmembers()
    print(f"Total members in tar file: {len(members)}")

    # Display details of the first 20 members
    for i, member in enumerate(members[:20]):
        print(f"Member {i+1}:")
        print(f"  Name: {member.name}")
        print(f"  Is File: {member.isfile()}")
        print(f"  Is Directory: {member.isdir()}")
        print(f"  Is Link: {member.islnk()}")
        print(f"  Size: {member.size} bytes")

# Step 5: Extract files
print("Starting extraction process...")
try:
    with tarfile.open(tar_file_path, 'r') as tar:
        for member in tqdm(members, desc="Extracting files"):
            if member.isfile():
                # Sanitize path if necessary
                member.name = os.path.basename(member.name)
                tar.extract(member, path=local_extract_path)
            else:
                print(f"Skipping: {member.name} (Not a file)")

    print(f"Extraction completed. Extracted files are available at: {local_extract_path}")
except Exception as e:
    print(f"An error occurred during extraction: {e}")

# Step 6: Verify extracted files
def list_files(directory, num_files=10):
    files = []
    for root, _, filenames in os.walk(directory):
        for filename in filenames:
            files.append(os.path.join(root, filename))
    print(f"Total files: {len(files)}")
    print("First few files:")
    print("\n".join(files[:num_files]))

list_files(local_extract_path)
