**Paths: pathlib (preferred) vs os.path**

In [6]:
from pathlib import Path

# Construct a safe, cross-platform file path: data/in/file.txt
# Using '/' with Path objects automatically joins directories in a way that works on any OS (Windows, Linux, Mac)
p = Path("data") / "in" / "file.txt"

# Print whether the path exists, is a file, and is a directory
# Returns: (False, False, False) initially, before file is created
print(p.exists(), p.is_file(), p.is_dir())

# Create the parent directories of the file path, if they don't exist
# parents=True → creates all intermediate directories (like mkdir -p in shell)
# exist_ok=True → doesn't raise an error if the directory already exists
p.parent.mkdir(parents=True, exist_ok=True)

# Write text to the file, creating the file if it doesn't exist (or overwriting it)
# encoding='utf-8' ensures consistent behavior across platforms
p.write_text("hello", encoding="utf-8")

# Read and print the content from the file
print(p.read_text(encoding="utf-8"))

# --- File searching using glob/rglob ---

# Recursively search for all .csv files under the "data" directory
# rglob("*.csv") looks in all subdirectories; use glob("*.csv") for non-recursive
for f in Path("data").rglob("*.csv"):
    # Print the file name and its size in bytes
    print(f.name, f.stat().st_size)

True True False
hello


**OS / filesystem helpers: os, shutil**

In [22]:
import os, shutil
from pathlib import Path

# Step 1: Create a sample file using pathlib (safe and cross-platform)
original_file = Path("sourcefile/old.txt")
original_file.write_text("This is a test file.", encoding="utf-8")
print(f"File Created. {original_file.exists()}")  # Should print: True

try:
    # Step 2: Rename the file using os.rename()
    # This will rename 'old.txt' to 'new.txt' inside the 'sourcefile' directory
    os.rename("sourcefile/old.txt", "sourcefile/new.txt")
    print(f"Renamed file exists: {Path('sourcefile/new.txt').exists()}")  # Check if new.txt exists after renaming
    
except FileNotFoundError:
    print("Error: The file to rename was not found.")
except PermissionError:
    print("Error: Permission denied while renaming the file.")
except Exception as e:
    print(f"Unexpected error occurred while renaming: {e}")

try:
    # Step 3: Remove the renamed file
    # This deletes 'new.txt' from the filesystem
    os.remove("sourcefile/new.txt")
    print(f"File still exists after delete: {Path('sourcefile/new.txt').exists()}")  # Should print: False
    
except FileNotFoundError:
    print("Error: The file to delete was not found.")
except PermissionError:
    print("Error: Permission denied while deleting the file.")
except Exception as e:
    print(f"Unexpected error occurred while deleting: {e}")

File Created. True
Renamed file exists: True
File still exists after delete: False


**CSV files: csv module**

In [29]:
import csv  # Import the built-in CSV module to handle CSV file operations

# --- Define Data to Write --- 
# 'rows' is a list of dictionaries, where each dictionary represents a row in the CSV file
rows = [
    {"id": 1, "name": "dhiraj"},  # First row with id and name
    {"id": 2, "name": "pooja"},   # Second row with id and name
]

# --- Write CSV with header ---
# Open the 'people.csv' file in write mode ('w') with UTF-8 encoding
# `newline=""` ensures no extra blank lines are added between rows on Windows
with open("sourcefile/people.csv", "w", newline="", encoding="utf-8") as f:
    # Create a DictWriter object to write dictionaries into the CSV file
    # 'fieldnames' defines the order and names of the columns (header row)
    writer = csv.DictWriter(f, fieldnames=["id", "name"])

    # Write the header row (column names) to the CSV file
    writer.writeheader()

    # Write the rows of data from the 'rows' list into the CSV file
    writer.writerows(rows)

# --- Read CSV ---
# Open the 'people.csv' file in read mode ('r') to read the data back
# Again, we use `newline=""` and `encoding="utf-8"` for consistent line endings and encoding
with open("sourcefile/people.csv", "r", newline="", encoding="utf-8") as f:
    # Create a DictReader object to read the CSV file into dictionaries (where the keys are column names)
    reader = csv.DictReader(f)

    # Loop through each row in the CSV file
    for row in reader:
        # Access the 'id' and 'name' fields for each row and print them
        print(row["id"], row["name"])


1 dhiraj
2 pooja


**JSON files: json module**

In [37]:
import json  # Import the built-in json module to work with JSON data in Python

# --- Create a Python dictionary representing structured data ---
data = {
    "id": 1,                         # An integer field
    "name": "dhiraj",               # A string field
    "skills": ["python", "AI"]      # A list field (array in JSON)
}

# --- Write JSON to a file ---
# Open (or create) the file 'user.json' inside the 'sourcefile' folder for writing ('w')
# UTF-8 encoding ensures proper support for Unicode characters
with open("sourcefile/user.json", "w", encoding="utf-8") as f:
    # Serialize the 'data' dictionary into JSON and write it to the file
    # - ensure_ascii=False allows non-ASCII characters to be written as-is (not escaped)
    # - indent=2 formats the JSON with indentation (pretty-printed)
    json.dump(data, f, ensure_ascii=False, indent=2)

# --- Read JSON from the file ---
# Open the same file again, this time for reading ('r')
with open("sourcefile/user.json", "r", encoding="utf-8") as f:
    # Deserialize the JSON content back into a Python dictionary
    obj = json.load(f)

# At this point, 'obj' is a Python dict equivalent to the original 'data'
# You can now access it using obj["id"], obj["skills"], etc.


**Binary formats & compression**

In [42]:
import gzip  # For working with .gz (GZIP) compressed files
import bz2   # For working with .bz2 (Bzip2) compressed files

# --- Write text to a gzip-compressed file ---
# Open 'data.txt.gz' inside 'sourcefile/' in write-text mode ('wt')
# 'wt' = write text, as opposed to 'wb' (write binary)
# UTF-8 encoding ensures proper handling of text characters
with gzip.open("sourcefile/data.txt.gz", "wt", encoding="utf-8") as f:
    # Write a string to the compressed file
    f.write("Compressed text")

# --- Read text back from the gzip-compressed file ---
# Open the same file in read-text mode ('rt')
# 'rt' = read text; again, UTF-8 decoding is used
with gzip.open("sourcefile/data.txt.gz", "rt", encoding="utf-8") as f:
    # Read and print the content from the compressed file
    print(f.read())  # Output: Compressed text

# --- Write text to a bz2-compressed file ---
# Open 'data.txt.bz2' in write-text mode with UTF-8 encoding
# Bzip2 provides better compression ratio than gzip (usually slower)
with bz2.open("sourcefile/data.txt.bz2", "wt", encoding="utf-8") as f:
    # Write a string to the .bz2 compressed file
    f.write("hello bz2")


Compressed text


**Zip files**

In [51]:
import zipfile  # Module for handling ZIP archive files
from pathlib import Path  # For working with filesystem paths in a clean, cross-platform way

# --- Step 1: Create a ZIP file and add files to it ---
# 'w' mode means write — it will create a new zip file (overwrites if it already exists)
# compression=ZIP_DEFLATED enables standard ZIP compression
with zipfile.ZipFile("sourcefile/bundle.zip", "w", compression=zipfile.ZIP_DEFLATED) as z:
    # Add files to the archive
    # These files must already exist in the specified location
    z.write("sourcefile/people.csv")  # Add CSV file to the archive
    z.write("sourcefile/user.json")   # Add JSON file to the archive

# --- Step 2: Extract all files from the ZIP archive ---
# This will extract all files into the 'unzipped' folder (creates it if it doesn't exist)
with zipfile.ZipFile("sourcefile/bundle.zip") as z:
    z.extractall("unzipped")  # Extracts all contents to the "unzipped/" folder

# --- Step 3: List the contents of the ZIP archive ---
# Opens the zip again in read mode (default)
with zipfile.ZipFile("sourcefile/bundle.zip") as z:
    # namelist() returns a list of all file names in the archive
    print(z.namelist())  # Output: ['sourcefile/people.csv', 'sourcefile/user.json']


['sourcefile/people.csv', 'sourcefile/user.json']


**Tar files**

In [57]:
"""import tarfile
with tarfile.open("logs.tar.gz", "w:gz") as t:
    t.add("logs", arcname="logs")"""

'import tarfile\nwith tarfile.open("logs.tar.gz", "w:gz") as t:\n    t.add("logs", arcname="logs")'

Pickle (Python object serialization) — caution

In [66]:
import pickle  # Import the 'pickle' module to handle object serialization and deserialization

# Define a sample data structure (a dictionary)
data = {"a": [1, 2], "b": {"x": 3}}

# Open a file named 'obj.pkl' in write-binary mode ('wb') to store the serialized object
with open("sourcefile/obj.pkl", "wb") as f:  
    # Use pickle.dump() to serialize (convert into a byte stream) the 'data' object
    # and write it to the file
    pickle.dump(data, f, protocol=pickle.HIGHEST_PROTOCOL)
    # `protocol=pickle.HIGHEST_PROTOCOL` ensures the latest, most efficient pickle format is used.

# Now let's read the object back from the file.
# Open the file 'obj.pkl' in read-binary mode ('rb') to load the serialized object
with open("sourcefile/obj.pkl", "rb") as f:
    # Use pickle.load() to deserialize (convert the byte stream back) the object from the file
    loaded = pickle.load(f)

# After this point, 'loaded' should hold the same value as 'data'
print(loaded)  # Output the loaded data to confirm it's the same as the original data


{'a': [1, 2], 'b': {'x': 3}}


**Temporary files & dirs: tempfile**

In [72]:
import tempfile  # Import the tempfile module to create temporary files and directories
import pathlib   # Import the pathlib module for easy path manipulation

# Create a temporary file using NamedTemporaryFile
with tempfile.NamedTemporaryFile("w+", delete=True, encoding="utf-8") as tf:
    # 'w+' mode: Open the file for both reading and writing in text mode
    # 'delete=True': The file will be deleted as soon as it's closed
    # 'encoding="utf-8"': The file is opened with UTF-8 encoding for reading/writing text
    
    # Write some content to the temporary file
    tf.write("temp content")
    
    # Move the file pointer back to the beginning of the file to read it
    tf.seek(0)
    
    # Read the content of the temporary file and print it
    print(tf.read())  # Output: 'temp content'
    
    # Print the temporary file's name (full path)
    print("Temp path:", tf.name)  # The file name (path) is printed (this is a temp file)

# Create a temporary directory using TemporaryDirectory
with tempfile.TemporaryDirectory() as td:
    # 'TemporaryDirectory()' creates a temporary directory that will be cleaned up after use
    
    # Create a new path object (using pathlib) for a file 'a.txt' in the temporary directory
    p = pathlib.Path(td) / "a.txt"  # Combining path and filename
    
    # Write some text to 'a.txt' using pathlib's write_text method
    p.write_text("hi", encoding="utf-8")  # Write 'hi' to the file in UTF-8 encoding
    
    # List and print all files in the temporary directory
    print("Dir contents:", list(p.parent.iterdir()))  # Iterates over the contents of the directory


temp content
Temp path: C:\Users\dhira\AppData\Local\Temp\tmpn5l7_s7n
Dir contents: [WindowsPath('C:/Users/dhira/AppData/Local/Temp/tmpwuunt5zx/a.txt')]


**Memory-mapped files: mmap (advanced, huge files)**

In [76]:
import mmap  # Import the 'mmap' module for memory-mapped file access

# Open the file 'sourcefile/big.bin' in read and write binary mode ('r+b')
with open("sourcefile/big.bin", "r+b") as f:
    # Create a memory-mapped object for the file. 
    # f.fileno() returns the file descriptor for the open file
    # 0 means map the entire file into memory
    mm = mmap.mmap(f.fileno(), 0)  

    # Print the first 10 bytes of the memory-mapped file.
    # 'mm[:10]' gives a slice of the first 10 bytes as a byte object
    print(mm[:10])  

    # mm[0:4] reads bytes from position 0 to 4 (but doesn't print them)
    # This is just accessing the data but not doing anything with it
    mm[0:4]  # Access bytes 0-3 of the mapped file (no operation on the result)

    # Close the memory-mapped file to free resources
    mm.close()  

b'\xa3\x1c\x06\xbdF>9#\xbc\x1a'


**File locking (advanced, OS-dependent)**
- Windows: msvcrt.locking
- Unix: fcntl.flock Or use a cross-platform library (e.g., portalocker).

In [82]:
import portalocker  # Import the portalocker module to handle file locking
import time  # Import time module for simulating delays

# Open the file 'shared.log' in append mode ('a'), which allows adding new lines without overwriting
with open("sourcefile/shared.log", "a", encoding="utf-8") as f:
    # Lock the file with an exclusive lock (LOCK_EX)
    portalocker.lock(f, portalocker.LOCK_EX)  # Exclusive lock: only one process can hold this lock at a time
    
    try:
        # Write a line to the file
        f.write("line\n")
        
        # Simulate a long write operation with a 1-second sleep
        time.sleep(1)  # Sleep is used to simulate a time-consuming operation (e.g., network I/O)
    
    finally:
        # Unlock the file once the writing is done
        portalocker.unlock(f)  # Release the lock so other processes/threads can access the file

**Logging file operations (robust apps)**

In [83]:
import logging
from pathlib import Path

# Set up logging configuration
logging.basicConfig(
    filename="app.log",  # Log messages will be saved to 'app.log' in the current directory.
    level=logging.INFO,  # Only log messages of INFO level or higher (INFO, WARNING, ERROR, CRITICAL).
    format="%(asctime)s %(levelname)s %(message)s"  # Log format: timestamp, log level (INFO, ERROR), and the actual message.
)

# Function to save the report to a file
def save_report(text: str, path: Path):
    try:
        # Create the parent directories if they don't exist.
        # 'parents=True' ensures all missing directories are created.
        # 'exist_ok=True' prevents an error if the directories already exist.
        path.parent.mkdir(parents=True, exist_ok=True)
        
        # Write the text content to the specified file path.
        # 'path.write_text()' writes the string 'text' into the file at 'path'.
        # It also ensures that the file is saved with UTF-8 encoding.
        path.write_text(text, encoding="utf-8")
        
        # Log an informational message after successfully saving the report.
        # This is helpful for keeping track of the file-saving process.
        logging.info("Saved report to %s", path)
    except Exception as e:
        # If an error occurs (e.g., permission issues, invalid path), the exception is caught.
        # The error message along with the traceback is logged.
        # 'logging.exception' automatically includes the stack trace of the exception.
        logging.exception("Failed to save report: %s", e)

# Call the function to save the report
# The text "Hello" is written to the file "reports/2025-10-17.txt".
# If the directories (e.g., 'reports') don't exist, they will be created.
save_report("Hello", Path("reports/2025-10-17.txt"))
