# Extract `.json.gz` Files from Subfolders

This notebook:
- Recursively searches all subfolders under the current directory
- Finds `*.json.gz` files
- Extracts them into `.json`
- Writes all extracted files into the **outer (current) directory**
- Optionally avoids filename collisions


In [2]:
from pathlib import Path
import gzip
import shutil

## Configuration

- `UNIQUE_NAMES`: prefix filenames with parent folder to avoid collisions
- `OVERWRITE`: overwrite existing JSON files
- `DELETE_GZ`: delete `.gz` files after extraction
- `DRY_RUN`: print actions without writing files


In [3]:
UNIQUE_NAMES = True     # Avoid filename collisions
OVERWRITE    = False    # Do not overwrite existing files
DELETE_GZ    = False    # Keep original .gz files
DRY_RUN      = False    # Set True to preview actions

## Locate `.json.gz` Files Recursively


In [1]:
from pathlib import Path
import shutil

root_dir = Path(r"D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\ready")
target_dir = Path(r"D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed")  # move into this directory

gz_files = [p for p in root_dir.rglob("*.json.gz") if p.is_file()]

print(f"Found {len(gz_files)} .json.gz files")

moved = 0
skipped = 0

for src in gz_files:
    dest = target_dir / src.name

    # Skip if file already exists in target
    if dest.exists():
        skipped += 1
        print(f"[SKIP] Exists: {dest.name}")
        continue

    # Avoid moving file onto itself
    if src.parent == target_dir:
        continue

    shutil.move(str(src), str(dest))
    moved += 1
    print(f"[OK] Moved: {src} -> {dest}")

print("\nSummary")
print(f"Moved:   {moved}")
print(f"Skipped: {skipped}")

Found 1183 .json.gz files
[OK] Moved: D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\ready\group_0\2025-12_460_70B0_in-network-rates_01_of_04.json.gz -> D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\2025-12_460_70B0_in-network-rates_01_of_04.json.gz
[OK] Moved: D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\ready\group_0\2025-12_460_70B0_in-network-rates_02_of_04.json.gz -> D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\2025-12_460_70B0_in-network-rates_02_of_04.json.gz
[OK] Moved: D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\ready\group_0\2025-12_460_70B0_in-network-rates_03_of_04.json.gz -> D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\2025-12_460_70B0_in-network-rates_03_of_04.json.gz
[OK] Moved: D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\ready\group_0\2025-12_460_70J0_in-network-rates_02_of_07.json.gz -> D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\2025-12_460_70J0_in-network-rates_02_of_07.json.gz
[OK] Moved: D:\payer_mrf\raw\test\2025-11-

[OK] Moved: D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\ready\group_1\2025-11-13_Blue-Cross-and-Blue-Shield-of-Illinois_HMO-Illinois-HMO-A1P-IL-HMO-Non-Standard_in-network-rates.json.gz -> D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\2025-11-13_Blue-Cross-and-Blue-Shield-of-Illinois_HMO-Illinois-HMO-A1P-IL-HMO-Non-Standard_in-network-rates.json.gz
[OK] Moved: D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\ready\group_1\2025-11-13_Blue-Cross-and-Blue-Shield-of-Illinois_HMO-Illinois-HMO-A35-IL-HMO-Non-Standard_in-network-rates.json.gz -> D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\2025-11-13_Blue-Cross-and-Blue-Shield-of-Illinois_HMO-Illinois-HMO-A35-IL-HMO-Non-Standard_in-network-rates.json.gz
[OK] Moved: D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\ready\group_1\2025-11-13_Blue-Cross-and-Blue-Shield-of-Illinois_HMO-Illinois-HMO-A70-IL-HMO-Non-Standard_in-network-rates.json.gz -> D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\2025-11-13_Blue-Cross-and-B

[OK] Moved: D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\ready\group_1\2025-11-13_Blue-Cross-and-Blue-Shield-of-Illinois_HMO-Illinois-HMO-Y49-IL-HMO-Non-Standard_in-network-rates.json.gz -> D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\2025-11-13_Blue-Cross-and-Blue-Shield-of-Illinois_HMO-Illinois-HMO-Y49-IL-HMO-Non-Standard_in-network-rates.json.gz
[OK] Moved: D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\ready\group_1\2025-11-13_Blue-Cross-and-Blue-Shield-of-Illinois_HMO-Illinois-HMO-Y54-IL-HMO-Non-Standard_in-network-rates.json.gz -> D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\2025-11-13_Blue-Cross-and-Blue-Shield-of-Illinois_HMO-Illinois-HMO-Y54-IL-HMO-Non-Standard_in-network-rates.json.gz
[OK] Moved: D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\ready\group_1\2025-11-13_Blue-Cross-and-Blue-Shield-of-Illinois_HMO-Illinois-HMO-Y70-IL-HMO-Non-Standard_in-network-rates.json.gz -> D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\2025-11-13_Blue-Cross-and-B

[OK] Moved: D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\ready\group_1\2025-11-14_Blue-Cross-and-Blue-Shield-of-Illinois_Blue-Advantage-HMO-BLUEH-D01-IL-HMO-Standard-Advocate-Health_in-network-rates.json.gz -> D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\2025-11-14_Blue-Cross-and-Blue-Shield-of-Illinois_Blue-Advantage-HMO-BLUEH-D01-IL-HMO-Standard-Advocate-Health_in-network-rates.json.gz
[OK] Moved: D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\ready\group_1\2025-11-14_Blue-Cross-and-Blue-Shield-of-Illinois_Blue-Advantage-HMO-BLUEH-D03-IL-HMO-Standard-Advocate-Health_in-network-rates.json.gz -> D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\2025-11-14_Blue-Cross-and-Blue-Shield-of-Illinois_Blue-Advantage-HMO-BLUEH-D03-IL-HMO-Standard-Advocate-Health_in-network-rates.json.gz
[OK] Moved: D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\ready\group_1\2025-11-14_Blue-Cross-and-Blue-Shield-of-Illinois_Blue-Advantage-HMO-BLUEH-D12-IL-HMO-Standard-Advocate-Health_in-net

[OK] Moved: D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\ready\group_1\2025-11-14_Blue-Cross-and-Blue-Shield-of-Illinois_Blue-Advantage-HMO-BLUEH-Q61-IL-HMO-Standard-Advocate-Health_in-network-rates.json.gz -> D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\2025-11-14_Blue-Cross-and-Blue-Shield-of-Illinois_Blue-Advantage-HMO-BLUEH-Q61-IL-HMO-Standard-Advocate-Health_in-network-rates.json.gz
[OK] Moved: D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\ready\group_1\2025-11-14_Blue-Cross-and-Blue-Shield-of-Illinois_Blue-Advantage-HMO-BLUEH-Q64-IL-HMO-Standard-Advocate-Health_in-network-rates.json.gz -> D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\2025-11-14_Blue-Cross-and-Blue-Shield-of-Illinois_Blue-Advantage-HMO-BLUEH-Q64-IL-HMO-Standard-Advocate-Health_in-network-rates.json.gz
[OK] Moved: D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\ready\group_1\2025-11-14_Blue-Cross-and-Blue-Shield-of-Illinois_Blue-Advantage-HMO-BLUEH-Q92-IL-HMO-Standard-Advocate-Health_in-net

[OK] Moved: D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\ready\group_1\2025-11-14_Blue-Cross-and-Blue-Shield-of-Illinois_HMO-Illinois-HMO-D69-IL-HMO-Standard-Advocate-Health_in-network-rates.json.gz -> D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\2025-11-14_Blue-Cross-and-Blue-Shield-of-Illinois_HMO-Illinois-HMO-D69-IL-HMO-Standard-Advocate-Health_in-network-rates.json.gz
[OK] Moved: D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\ready\group_1\2025-11-14_Blue-Cross-and-Blue-Shield-of-Illinois_HMO-Illinois-HMO-D87-IL-HMO-Standard-Advocate-Health_in-network-rates.json.gz -> D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\2025-11-14_Blue-Cross-and-Blue-Shield-of-Illinois_HMO-Illinois-HMO-D87-IL-HMO-Standard-Advocate-Health_in-network-rates.json.gz
[OK] Moved: D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\ready\group_1\2025-11-14_Blue-Cross-and-Blue-Shield-of-Illinois_HMO-Illinois-HMO-E12-IL-HMO-Standard-Advocate-Health_in-network-rates.json.gz -> D:\payer_mrf\raw\t

[OK] Moved: D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\ready\group_10\2025-12_690_08B0_in-network-rates_22_of_35.json.gz -> D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\2025-12_690_08B0_in-network-rates_22_of_35.json.gz
[OK] Moved: D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\ready\group_10\2025-12_690_08B0_in-network-rates_24_of_35.json.gz -> D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\2025-12_690_08B0_in-network-rates_24_of_35.json.gz
[OK] Moved: D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\ready\group_10\2025-12_690_08B0_in-network-rates_2_of_35.json.gz -> D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\2025-12_690_08B0_in-network-rates_2_of_35.json.gz
[OK] Moved: D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\ready\group_10\2025-12_690_08B0_in-network-rates_30_of_35.json.gz -> D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\2025-12_690_08B0_in-network-rates_30_of_35.json.gz
[OK] Moved: D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\rea

[OK] Moved: D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\ready\group_13\2025-12_320_33B0_in-network-rates_27_of_31.json.gz -> D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\2025-12_320_33B0_in-network-rates_27_of_31.json.gz
[OK] Moved: D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\ready\group_13\2025-12_320_33B0_in-network-rates_29_of_31.json.gz -> D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\2025-12_320_33B0_in-network-rates_29_of_31.json.gz
[OK] Moved: D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\ready\group_13\2025-12_320_33D0_in-network-rates_06_of_31.json.gz -> D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\2025-12_320_33D0_in-network-rates_06_of_31.json.gz
[OK] Moved: D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\ready\group_13\2025-12_320_33D0_in-network-rates_07_of_31.json.gz -> D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\2025-12_320_33D0_in-network-rates_07_of_31.json.gz
[OK] Moved: D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\r

[OK] Moved: D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\ready\group_7\2025-12_510_01B0_in-network-rates_6_of_29.json.gz -> D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\2025-12_510_01B0_in-network-rates_6_of_29.json.gz
[OK] Moved: D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\ready\group_7\2025-12_510_01B0_in-network-rates_8_of_29.json.gz -> D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\2025-12_510_01B0_in-network-rates_8_of_29.json.gz
[OK] Moved: D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\ready\group_7\2025-12_510_01C0_in-network-rates_10_of_29.json.gz -> D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\2025-12_510_01C0_in-network-rates_10_of_29.json.gz
[OK] Moved: D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\ready\group_7\2025-12_510_01C0_in-network-rates_11_of_29.json.gz -> D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\2025-12_510_01C0_in-network-rates_11_of_29.json.gz
[OK] Moved: D:\payer_mrf\raw\test\2025-11-25\ingested\analyzed\ready\gro

## Summary


In [None]:
print("Extraction Summary")
print("------------------")
print(f"Found:     {len(gz_files)}")
print(f"Extracted: {extracted}")
print(f"Skipped:   {skipped}")
print(f"Failed:    {failed}")
