# Zipfiles with Python

https://realpython.com/python-zipfile/

In [1]:
import shutil
import zipfile
from pathlib import Path

In [2]:
# help(zipfile);

In [27]:
ROOT_PATH = Path(".").absolute().parent
DATA_PATH = ROOT_PATH / "datasets"
ZIP_PATH = DATA_PATH / "zipfiles"

In [28]:
ROOT_PATH.exists(), DATA_PATH.exists(), ZIP_PATH.exists()

(True, True, True)

## Testing creating zipfiles

In [41]:
with zipfile.ZipFile(ROOT_PATH.joinpath("notebooks/temps") / "test.zip", mode='w') as archive: 
    for filepath in DATA_PATH.iterdir():
        if filepath.is_file(): 
            archive.write(filepath, filepath.relative_to(DATA_PATH))

In [50]:
with zipfile.ZipFile(ZIP_PATH / "sample.zip", mode='w') as archive: 
    for filepath in DATA_PATH.joinpath("sample/").iterdir(): 
        archive.write(filepath, filepath.relative_to(DATA_PATH / "sample/"))

## Manipulating existing ZIP files

In [44]:
with zipfile.ZipFile(ZIP_PATH / "sample.zip", 'r') as a: 
    archive.printdir()

File Name                                             Modified             Size
hello.txt                                      2025-09-10 17:50:02          168
lorem.md                                       2025-09-10 17:49:39            7
real_python.md                                 2025-09-10 17:49:45           13


* Checking file is a zipfile

In [45]:
zipfile.is_zipfile(ZIP_PATH / "sample.zip")

True

In [46]:
zipfile.is_zipfile(ZIP_PATH / "test.zip")

True

In [47]:
zipfile.is_zipfile(DATA_PATH / "data.txt")

False

In [54]:
with zipfile.ZipFile(ZIP_PATH / "sample.zip", 'r') as a: 
    for info in a.infolist(): 
        print(f"Filename: {info.filename}")
        print(f"Normal size: {info.file_size} bytes")
        print(f"Compressed size: {info.compress_size} bytes")
        print("-" * 20)

Filename: hello.txt
Normal size: 168 bytes
Compressed size: 168 bytes
--------------------
Filename: lorem.md
Normal size: 7 bytes
Compressed size: 7 bytes
--------------------
Filename: real_python.md
Normal size: 13 bytes
Compressed size: 13 bytes
--------------------
Filename: data.txt
Normal size: 186 bytes
Compressed size: 186 bytes
--------------------


### Inserting a file on a zip

In [None]:
with zipfile.ZipFile(ZIP_PATH / "sample.zip", 'a') as a: # 'a' append content; 'w': overwrites content 
    to_write = DATA_PATH / "data.txt"
    a.write(to_write, to_write.relative_to(DATA_PATH))  

### Extracting a member from ZIP archives

In [57]:
with zipfile.ZipFile(ZIP_PATH / "sample.zip", mode='r') as archive: 
    archive.extract("data.txt", path=ROOT_PATH / "notebooks/temps/lol")

In [60]:
with zipfile.ZipFile(ZIP_PATH / "sample.zip", mode='r') as archive: 
    archive.extractall(path=ROOT_PATH / "notebooks/temps/trol")

## Creatin, Populating, and Extracting Zip Files

### From directories with subdirectories

In [61]:
with zipfile.ZipFile(ZIP_PATH / "all_subdirs.zip", 'w') as archive: 
    source_path = DATA_PATH
    for filepath in source_path.rglob("*"): 
        if filepath.parent != ZIP_PATH: 
            archive.write(
                filepath, 
                arcname=filepath.relative_to(DATA_PATH)
            )

In [62]:
with zipfile.ZipFile(ZIP_PATH / "all_subdirs.zip", 'r') as archive: 
    archive.printdir()

File Name                                             Modified             Size
data.txt                                       2025-09-09 12:42:50          186
housing.csv                                    2025-08-26 16:35:48      1423529
hunk_finn.txt                                  2025-08-26 14:06:30          571
moby_dick.txt                                  2025-08-26 14:09:38         1151
netflix_data.csv                               2025-08-22 11:13:28      1729748
sample/                                        2025-09-10 17:49:24            0
schools.csv                                    2025-08-22 11:13:14        25412
zipfiles/                                      2025-09-11 13:09:44            0
sample/hello.txt                               2025-09-10 17:50:02          168
sample/lorem.md                                2025-09-10 17:49:38            7
sample/real_python.md                          2025-09-10 17:49:44           13


### Creating Files sequentially

In [68]:
fileslst = [file for file in DATA_PATH.iterdir() if file.is_file()]
fileslst

[WindowsPath('c:/Users/isaul/Documents/repos/feature-engineering-mcd/datasets/data.txt'),
 WindowsPath('c:/Users/isaul/Documents/repos/feature-engineering-mcd/datasets/housing.csv'),
 WindowsPath('c:/Users/isaul/Documents/repos/feature-engineering-mcd/datasets/hunk_finn.txt'),
 WindowsPath('c:/Users/isaul/Documents/repos/feature-engineering-mcd/datasets/moby_dick.txt'),
 WindowsPath('c:/Users/isaul/Documents/repos/feature-engineering-mcd/datasets/netflix_data.csv'),
 WindowsPath('c:/Users/isaul/Documents/repos/feature-engineering-mcd/datasets/schools.csv')]

In [None]:
def append_member(zip_path, member, relative_to): 
    with zipfile.ZipFile(zip_path, 'a') as archive:
        archive.write(zip_path, arcname=zip_path.relative_to(relative_to))

def get_file_from_stream(files): 
    for file in files: 
        yield file

In [79]:
for filename in get_file_from_stream(fileslst): 
    append_member(ZIP_PATH / "incremental.zip", filename, relative_to=DATA_PATH)

  return self._open_to_write(zinfo, force_zip64=force_zip64)


### Extracting files and directories