In [3]:
#Deepdive into what the python standard library has to offer with respect to file/folder I/O and manipulation.

#List a directory in python
import os

def list_directory(fld):
    print(os.getcwd())
    for fn in os.listdir(fld):
        print(fn)

list_directory('.')


c:\Users\chitrarthsahai\Documents\Repos\Workbench
.venv
a2a-python
agent-definition-registry-service
agent-runtime-service
AI Workbench.code-workspace
crew
DataPreProcessing.ipynb
digital-matrix-app
digital-matrix-infra
digital-matrix-infra.wiki
digital-matrix-infra1
digital-matrix-microservices
digital-matrix-regression
digital-matrix-utilities
FilesInPy.ipynb
InvokeLLMFromTransformers.ipynb
Java
LangGraphExample.ipynb
LangGraphExample_MultiAgent.ipynb
OOPsInPy.ipynb
semantic-kernel
Test.txt
wb-core


In [7]:
#String Methods to look for file name within folder.

# def ends_with(fld, search):
#     print(os.getcwd())
#     for fn in os.listdir(fld):
#         if fn.endswith(search):
#             print(fn)

# def starts_with(fld, search):
#     print(os.getcwd())
#     for fn in os.listdir(fld):
#         if fn.startswith(search):
#             print(fn)

#lambda implementation of the above

ends_with = lambda fld, search: [print(fn) for fn in os.listdir(fld) if fn.endswith(search)]
starts_with = lambda fld, search: [print(fn) for fn in os.listdir(fld) if fn.startswith(search)]

ends_with('.', '.ipynb')
starts_with('.', 'test')

DataPreProcessing.ipynb
FilesInPy.ipynb
InvokeLLMFromTransformers.ipynb
LangGraphExample.ipynb
LangGraphExample_MultiAgent.ipynb
OOPsInPy.ipynb


[]

In [17]:
#Pattern matching with fnmatch
import fnmatch

list_directory_with_pattern = lambda fld, pattern: [print(fn) for fn in os.listdir(fld) if fnmatch.fnmatch(fn, pattern)]

list_directory_with_pattern('.', '*[-]*[-]*')

agent-definition-registry-service
agent-runtime-service
digital-matrix-app
digital-matrix-infra
digital-matrix-infra.wiki
digital-matrix-infra1
digital-matrix-microservices
digital-matrix-regression
digital-matrix-utilities


[None, None, None, None, None, None, None, None, None]

In [19]:
#Advanced pattern matching with fnmatch

match = lambda fld, pattern: [print(fn) for fn in os.listdir(fld) if fnmatch.fnmatchcase(fn, pattern)]
match('.', '*-*-*')

agent-definition-registry-service
agent-runtime-service
digital-matrix-app
digital-matrix-infra
digital-matrix-infra.wiki
digital-matrix-infra1
digital-matrix-microservices
digital-matrix-regression
digital-matrix-utilities


[None, None, None, None, None, None, None, None, None]

In [None]:
#Pattern matching with glob
from pathlib import Path
import glob

#This approach is different from the one above, instead of listing contents of the folder and then filtering using fnmatch, we are using the glob module to match the pattern directly.
#We are creating a Path object for the given path and then using the glob method to match the pattern. Notice unlike, fnmatch that returns a bool of whether filename is equal to the pattern, glob returns a list of Path objects that match the pattern.
glob_match = lambda fld, search: [print(n) for n in Path(fld).glob(search)]

glob_match('.', '*.ipynb')

#You avoid iterating over the entire directory and then filtering it. Instead, you are directly getting the list of files that match the pattern.

DataPreProcessing.ipynb
FilesInPy.ipynb
InvokeLLMFromTransformers.ipynb
LangGraphExample.ipynb
LangGraphExample_MultiAgent.ipynb
OOPsInPy.ipynb


[None, None, None, None, None, None]

In [36]:
#Get File Attributes.

from datetime import datetime

get_date = lambda timestamp: datetime.fromtimestamp(timestamp).strftime('%d %b %Y')

def get_file_attributes(fld):
    print(os.getcwd())
    with os.scandir(fld) as dir:
        for f in dir:
            if f.is_file():
                inf = f.stat()
                print(f'Modified {get_date(inf.st_mtime)} {f.name} {inf.st_size} bytes')
            elif f.is_dir():
                print(f.name, 'is a directory')
            else:
                print(f.name, 'is not a file or directory')

get_file_attributes('.')

c:\Users\chitrarthsahai\Documents\Repos\Workbench
.venv is a directory
a2a-python is a directory
agent-definition-registry-service is a directory
agent-runtime-service is a directory
Modified 04 May 2025 AI Workbench.code-workspace 76 bytes
crew is a directory
Modified 04 May 2025 DataPreProcessing.ipynb 255 bytes
digital-matrix-app is a directory
digital-matrix-infra is a directory
digital-matrix-infra.wiki is a directory
digital-matrix-infra1 is a directory
digital-matrix-microservices is a directory
digital-matrix-regression is a directory
digital-matrix-utilities is a directory
Modified 21 May 2025 FilesInPy.ipynb 8986 bytes
Modified 04 May 2025 InvokeLLMFromTransformers.ipynb 9018 bytes
Java is a directory
Modified 17 May 2025 LangGraphExample.ipynb 15617 bytes
Modified 17 May 2025 LangGraphExample_MultiAgent.ipynb 76988 bytes
Modified 20 May 2025 OOPsInPy.ipynb 493737 bytes
semantic-kernel is a directory
wb-core is a directory


In [None]:
#Traversing a directory tree

def traverse_directory(fld):
    print(os.getcwd())
    for dirpath, dirnames, filenames in os.walk(fld):
        print(f'Current directory: {dirpath}')
        print(f'Directories: {dirnames}')
        print(f'Files: {filenames}')
        print('---')

traverse_directory('.')

In [38]:
#Copying files

import shutil

def copy_file(src, dst):
    print(os.getcwd())
    if os.path.isfile(src):
        shutil.copy(src, dst)
        print(f'Copied {src} to {dst}')
    else:
        print(f'{src} does not exist')

def copy_directory(src, dst):
    print(os.getcwd())
    if os.path.isdir(src):
        shutil.copytree(src, dst)
        print(f'Copied {src} to {dst}')
    else:
        print(f'{src} does not exist')

In [39]:
def move_file(src, dst): #can move both files and directories
    #shutil.move(src, dst) #this will move the file or directory to the destination
    print(os.getcwd())
    if os.path.isfile(src):
        shutil.move(src, dst)
        print(f'Copied {src} to {dst}')
    else:
        print(f'{src} does not exist')

In [44]:
#Rename files

def rename_file(src, dst):
    os.rename(src, dst) #one way to rename

def rename_file_2(src, dst):
    Path(src).rename(dst) #another way to rename

#rename_file('LangGraphExample.ipynb', 'LangGraph_Example.ipynb')
rename_file_2('LangGraph_Example.ipynb', 'LangGraphExample.ipynb')
#Deleting files and directories

In [4]:
#deleting files and directories
def delete_file(src):
    if os.path.isfile(src):
        try:
            os.remove(src)
            print(f'Deleted {src}')
        except OSError as e:
            print(f'Error deleting {src}: {e}')
    else:
            print(f'{src} does not exist')

delete_file("./Test.txt")


Deleted ./Test.txt


In [5]:
#Archiving files and directories
import zipfile

def create_zip(zipf:str, files:list[str], opt):
    with zipfile.ZipFile(zipf, opt) as zf:
        for f in files:
            zf.write(f)
            print(f'Added {f} to {zipf}')

to_zip = [
    'test.txt',
    "test copy.txt",
    "test copy 2.txt",
    "test copy 3.txt",
    "test copy 4.txt",
]

create_zip('test_1.zip', to_zip, 'w')

Added test.txt to test_1.zip
Added test copy.txt to test_1.zip
Added test copy 2.txt to test_1.zip
Added test copy 3.txt to test_1.zip
Added test copy 4.txt to test_1.zip


In [12]:
#adding files to an existing zip file

def add_to_zip(zipf:str, files:list[str], opt):
    with zipfile.ZipFile(zipf, opt) as zf:
        for f in files:
            lst = zf.namelist()
            if f not in lst:
                zf.write(f)
                print(f'Added {f} to {zipf}')
            else:
                print(f'{f} already exists in {zipf}')

to_zip = [
    'test.txt',
    "test copy.txt",
    "test copy 2.txt",
    "test copy 3.txt",
    "test copy 4.txt",
    "test copy 5.txt",
    "test copy 6.txt",
    "test copy 7.txt"
]

add_to_zip('test_1.zip', to_zip, 'a')

test.txt already exists in test_1.zip
test copy.txt already exists in test_1.zip
test copy 2.txt already exists in test_1.zip
test copy 3.txt already exists in test_1.zip
test copy 4.txt already exists in test_1.zip
Added test copy 5.txt to test_1.zip
Added test copy 6.txt to test_1.zip
Added test copy 7.txt to test_1.zip


In [13]:
#Reading a zip file

def read_zip(zipf:str):
    with zipfile.ZipFile(zipf, 'r') as zf:
        for f in zf.namelist():
            print(f'Info of {f}: {zf.getinfo(f)}')
            print(f'{f} => {zf.getinfo(f).file_size} bytes, {zf.getinfo(f).compress_size} bytes compressed')

read_zip('test_1.zip')

Info of test.txt: <ZipInfo filename='test.txt' filemode='-rw-rw-rw-' file_size=0>
test.txt => 0 bytes, 0 bytes compressed
Info of test copy.txt: <ZipInfo filename='test copy.txt' filemode='-rw-rw-rw-' file_size=0>
test copy.txt => 0 bytes, 0 bytes compressed
Info of test copy 2.txt: <ZipInfo filename='test copy 2.txt' filemode='-rw-rw-rw-' file_size=0>
test copy 2.txt => 0 bytes, 0 bytes compressed
Info of test copy 3.txt: <ZipInfo filename='test copy 3.txt' filemode='-rw-rw-rw-' file_size=0>
test copy 3.txt => 0 bytes, 0 bytes compressed
Info of test copy 4.txt: <ZipInfo filename='test copy 4.txt' filemode='-rw-rw-rw-' file_size=0>
test copy 4.txt => 0 bytes, 0 bytes compressed
Info of test.txt: <ZipInfo filename='test.txt' filemode='-rw-rw-rw-' file_size=0>
test.txt => 0 bytes, 0 bytes compressed
Info of test copy.txt: <ZipInfo filename='test copy.txt' filemode='-rw-rw-rw-' file_size=0>
test copy.txt => 0 bytes, 0 bytes compressed
Info of test copy 2.txt: <ZipInfo filename='test copy

In [16]:
#Extracting files from a zip file

def extract_zip(zipf:str, dst:str):
    with zipfile.ZipFile(zipf, 'r') as zf:
        zf.extractall(dst)
        print(f'Extracted {zipf} to {dst}')

extract_zip('test_1.zip', './extracted')


Extracted test_1.zip to ./extracted


In [None]:
#working with text files

def read_text(fn):
    with open(fn) as f:
        print(f.read())


def read_text_line(fn):
    with open(fn) as f:
        lines = f.readlines()
        for line in lines:
            print(line.strip(), end='')
            print('---------------------------------------')


def write_new_text(fn, text):
    with open(fn, 'w', encoding='utf-8') as f:
        f.write(text)
        print(f'Wrote {text} to {fn}')

def append_text(fn, text):
    with open(fn, 'a', encoding='utf-8') as f:
        f.write('\n')
        f.write(text)
        print(f'Appended {text} to {fn}')

# read_text('InvokeLLMFromTransformers.ipynb')
# read_text_line('InvokeLLMFromTransformers.ipynb')




{---------------------------------------
"cells": [---------------------------------------
{---------------------------------------
"cell_type": "code",---------------------------------------
"execution_count": 2,---------------------------------------
"id": "87e462aa",---------------------------------------
"metadata": {},---------------------------------------
"outputs": [],---------------------------------------
"source": [---------------------------------------
"%%capture --no-stderr\n",---------------------------------------
"%pip install --quiet transformers torch --verbose"---------------------------------------
]---------------------------------------
},---------------------------------------
{---------------------------------------
"cell_type": "code",---------------------------------------
"execution_count": null,---------------------------------------
"id": "625fcaa7",---------------------------------------
"metadata": {},---------------------------------------
"outputs": [-

In [24]:
#Working with CSV files
import csv
from typing import Iterable, Any

def read_csv(fn, delimiter):
    with open(fn) as csv_f:
        cnt = -1
        rows = csv.reader(csv_f,delimiter=delimiter)
        for row in rows:
                print(f'{" | ".join(row)}')
                cnt += 1
        print(f'Total rows: {cnt}')

read_csv('SubnetDetails.csv', ',')


def write_csv(fn, header:Iterable[Any], row:Iterable[Any]):
     with open(fn, mode='w', newline='') as csv_f:
        writer = csv.writer(csv_f)
        writer.writerow(header)
        writer.writerow(row)
        print(f'Wrote {header} and {row} to {fn}')

header = ['Name', 'Age', 'City']
row = ['John', '30', 'New York']
write_csv('test.csv', header, row)

SubnetName | CIDRRange | ShortDescription
sub-core-pe-wb-gac-use-ua | 10.56.6.0/25 | Core PE subnet
AzureFirewallSubnet | 10.56.0.0/26 | Firewall subnet
sub-dpr-outbound-wb-gac-use-ua | 10.56.1.16/28 | Outbound DPR subnet
sub-dpr-inbound-wb-gac-use-ua | 10.56.1.32/28 | Inbound DPR subnet
sub-core-buildagent-wb-gac-use-ua | 10.56.0.128/25 | Build agent subnet
AzureBastionSubnet | 10.56.0.64/26 | Bastion service subnet
sub-pe-wb-gac-use-ua | 10.56.5.0/25 | PE subnet
sub-asp-wb-gac-use-ua | 10.56.6.192/26 | ASP subnet
sub-apim-wb-gac-use-ua | 10.56.5.192/26 | APIM subnet
Total rows: 9
Wrote ['Name', 'Age', 'City'] and ['John', '30', 'New York'] to test.csv


In [35]:
#Working with XML files

import xml.etree.ElementTree as ET

def parse_xml_et(fn):
    tree = ET.parse(fn)
    root = tree.getroot()
    print('Domains for:' + root.tag)
    for items in root.iter():
        print(f'{items.tag} => {[f'{k}:{v}' for k,v in items.attrib.items()]} having values {items.text}')

parse_xml_et('sample.xml')

def add_xml_et(fn, el, attr, val):
    tree = ET.parse(fn)
    root = tree.getroot()
    new_el = ET.Element(el, attr) #instantiating a new element
    new_el.text = val
    root.append(new_el)
    tree.write(fn)
    print(f'Added {el} with attributes {attr} and value {val} to {fn}')

def change_xml_er(fn, el, attr, val):
    tree = ET.parse(fn)
    root = tree.getroot()
    root.find(el).set(attr, val) #changing the value of the attribute
    tree.write(fn)

change_xml_er('sample.xml', 'neighbor', 'name', 'example.org')

Domains for:data
data => [] having values 
    
country => ['name:Liechtenstein'] having values 
        
rank => [] having values 1
year => [] having values 2008
gdppc => [] having values 141100
neighbor => ['name:Austria', 'direction:E'] having values None
neighbor => ['name:Switzerland', 'direction:W'] having values None
country => ['name:Singapore'] having values 
        
rank => [] having values 4
year => [] having values 2011
gdppc => [] having values 59900
neighbor => ['name:Malaysia', 'direction:N'] having values None
country => ['name:Panama'] having values 
        
rank => [] having values 68
year => [] having values 2011
gdppc => [] having values 13600
neighbor => ['name:Costa Rica', 'direction:W'] having values None
neighbor => ['name:Colombia', 'direction:E'] having values None


AttributeError: 'NoneType' object has no attribute 'set'

In [37]:
#Working with JSON files

import json

def read_print_json(fn, pretty, sort):
    with open(fn) as json_f:
        data = json.load(json_f)
        if pretty:
            print(json.dumps(data, indent=4, sort_keys=sort))
        else:
            print(json.dumps(data))

def update_author_json(fn, arr_name, pos, key, value):
       with open(fn) as json_f:
            data = json.load(json_f)
            data[arr_name][pos][key] = value
            with open(fn, 'w') as json_f:
                json.dump(data, json_f, indent=4)
                print(f'Updated {key} to {value} in {arr_name} at position {pos} in {fn}')

read_print_json('authors.json', True, True)

update_author_json('authors.json', 'authors', 0, 'name', 'Mark Twain')

{
    "authors": [
        {
            "courses": 10,
            "name": "John Doe"
        },
        {
            "courses": 10,
            "name": "Jane Smith"
        },
        {
            "courses": 5,
            "name": "Foo Fighter"
        }
    ]
}
Updated name to Mark Twain in authors at position 0 in authors.json


In [50]:
#Persisting data with pickle

import pickle as p


class Person:
    age = 45
    name = 'John Doe'
    kids = ['Pete', 'Mary', 'Tom']
    employers = {'AWS': 2022, 'Google': 2023, 'Microsoft': 2024}
    shoe_sizes = (10, 11, 12)

person = Person()

def serialize(obj):
    pickled = p.dumps(obj, protocol=p.HIGHEST_PROTOCOL)
    print(f'Pickled object: {pickled}')
    return pickled

def deserialize(pickled):
    obj = p.loads(pickled)
    print(f'Unpickled object: {obj}')
    return obj

serialized = serialize(person)

deserialized = deserialize(serialized)


Pickled object: b'\x80\x05\x95\x1a\x00\x00\x00\x00\x00\x00\x00\x8c\x08__main__\x94\x8c\x06Person\x94\x93\x94)\x81\x94.'
Unpickled object: <__main__.Person object at 0x0000022A3C923B10>
