In [23]:
# data reduction and feature selection
import csv
input_file = "people.csv"
output_file = "updated_people.csv"

keep = ["Index","Customer Id","First Name","Last Name","Country"]
with open(input_file,"r") as file1, open(output_file,"w") as file2:
    reader = csv.DictReader(file1)
    writer = csv.DictWriter(file2, fieldnames=keep)
    writer.writeheader()
    for row in reader:
        filtered_row = {col: row[col] for col in keep}
        writer.writerow(filtered_row)

print("Reduced CSV created successfully!")

Reduced CSV created successfully!


In [35]:
# .csv file manupilation operations using import csv
with open("people.csv", "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(["Index", "Customer Id", "First Name", "Last Name", "Country"])
    writer.writerow(["1", "CUST001", "Akshita", "Mishra", "India"])
    writer.writerow(["2", "CUST002", "Abhi", "Sharma", "USA"])
    writer.writerow(["3", "CUST003", "Rahul", "Sharma", "Australia"])
    writer.writerow(["4", "CUST004", "Ansh", "Jain", "India"])

print("CSV written using csv.writer")

print("\n--- Reading with csv.reader ---")
with open("people.csv", "r") as f:
    reader = csv.reader(f)
    for row in reader:
        print(row)

print("\n--- Reading with csv.DictReader ---")
with open("people.csv", "r") as f:
    reader = csv.DictReader(f)
    for row in reader:
        print(row["First Name"], row["Country"])

print("\n--- Filtering rows (Country == 'India') ---")
with open("people.csv", "r") as f:
    reader = csv.DictReader(f)
    for row in reader:
        if row["Country"] == "India":
            print("Matched:", row)

# register a dialect (custom CSV format)
csv.register_dialect(
    "mydialect",
    delimiter=";", 
    quotechar="'", 
    skipinitialspace=True
)

csv.register_dialect(
    "dialect2",
    delimiter="|",
    quotechar="`",
    skipinitialspace=True
)

# Write using dialect
with open("people_dialect.csv", "w", newline="") as f:
    writer = csv.writer(f, dialect="mydialect")
    writer.writerow(["Name", "City"])
    writer.writerow(["Akshita", "Jaipur"])
    writer.writerow(["Riya", "Delhi"])

print("\nCSV written using custom dialect 1")

print("\n--- Reading CSV using Dialect 1 ---")
with open("people_dialect.csv", "r") as f:
    reader = csv.reader(f, dialect="mydialect")
    for row in reader:
        print(row)

with open("people_dialect.csv", "w", newline="") as f:
    writer = csv.writer(f, dialect="dialect2")
    writer.writerow(["Name", "City"])
    writer.writerow(["Akshita", "Jaipur"])
    writer.writerow(["Riya", "Delhi"])

print("\nCSV written using custom dialect 2")

# read using custom dialect
print("\n--- Reading CSV using Dialect 2 ---")
with open("people_dialect.csv", "r") as f:
    reader = csv.reader(f, dialect="mydialect")
    for row in reader:
        print(row)

# sniffer → Auto-detect delimiter
print("\n--- Using csv.Sniffer to detect delimiter ---")
with open("people_dialect.csv", "r") as f:
    sample = f.read(1024)           # read small part
    dialect = csv.Sniffer().sniff(sample)
    f.seek(0)                

    print("Detected delimiter:", dialect.delimiter)

    reader = csv.reader(f, dialect)
    for row in reader:
        print(row)



CSV written using csv.writer

--- Reading with csv.reader ---
['Index', 'Customer Id', 'First Name', 'Last Name', 'Country']
['1', 'CUST001', 'Akshita', 'Mishra', 'India']
['2', 'CUST002', 'Abhi', 'Sharma', 'USA']
['3', 'CUST003', 'Rahul', 'Sharma', 'Australia']
['4', 'CUST004', 'Ansh', 'Jain', 'India']

--- Reading with csv.DictReader ---
Akshita India
Abhi USA
Rahul Australia
Ansh India

--- Filtering rows (Country == 'India') ---
Matched: {'Index': '1', 'Customer Id': 'CUST001', 'First Name': 'Akshita', 'Last Name': 'Mishra', 'Country': 'India'}
Matched: {'Index': '4', 'Customer Id': 'CUST004', 'First Name': 'Ansh', 'Last Name': 'Jain', 'Country': 'India'}

CSV written using custom dialect 1

--- Reading CSV using Dialect 1 ---
['Name', 'City']
['Akshita', 'Jaipur']
['Riya', 'Delhi']

CSV written using custom dialect 2

--- Reading CSV using Dialect 2 ---
['Name|City']
['Akshita|Jaipur']
['Riya|Delhi']

--- Using csv.Sniffer to detect delimiter ---
Detected delimiter: |
['Name', 'Ci

In [29]:
import pandas as pd

df = pd.read_csv("people.csv")
print("\n--- ORIGINAL DATA ---")
print(df.head())

print("\n--- DATAFRAME INFO ---")
print(df.info())

print("\n--- SHAPE (ROWS, COLUMNS) ---")
print(df.shape)

print("\n--- SELECTED COLUMNS: First Name, Last Name, Country ---")
selected_cols = df[["First Name", "Last Name", "Country"]]
print(selected_cols.head())

print("\n--- FILTER WHERE First Name == 'Sheryl' ---")
filtered = df[df["First Name"] == "Sheryl"]
print(filtered)

print("\n--- FILTER: Country == 'India' AND Index > 5 ---")
multi_filter = df[(df["Country"] == "India") & (df["Index"] > 5)]
print(multi_filter)

print("\n--- SORT BY First Name (Ascending) ---")
sorted_df = df.sort_values("First Name")
print(sorted_df.head())

df["Full Name"] = df["First Name"] + " " + df["Last Name"]
print("\n--- ADDED 'Full Name' COLUMN ---")
print(df[["First Name", "Last Name", "Full Name"]].head())

df["Customer Id"] = df["Customer Id"].str.upper()
print("\n--- UPDATED Customer Id TO UPPERCASE ---")
print(df["Customer Id"].head())

columns_to_drop = ["Phone 1", "Phone 2", "Website"]
df = df.drop(columns=columns_to_drop, axis=1, errors="ignore")
print("\n--- COLUMNS AFTER DROPPING Phone & Website ---")
print(df.head())

new_row = {
    "Index": 999,
    "Customer Id": "NEW001",
    "First Name": "Akshita",
    "Last Name": "Mishra",
    "Company": "Future Coders",
    "City": "Jaipur",
    "Country": "India",
    "Subscription Date": "2025-01-01",
    "Full Name": "Akshita Mishra"
}

df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
print("\n--- NEW ROW ADDED ---")
print(df.tail())

print("\n--- HANDLING MISSING VALUES (fill with 'Unknown') ---")
df = df.fillna("Unknown")
print(df.head())

df.to_csv("updated_people_pandas.csv", index=False)
print("\nCSV SAVED AS updated_people_pandas.csv")


--- ORIGINAL DATA ---
   Index Customer Id First Name Last Name    Country
0      1     CUST001    Akshita    Mishra      India
1      2     CUST002       Abhi    Sharma        USA
2      3     CUST003      Rahul    Sharma  Australia
3      4     CUST004       Ansh      Jain      India

--- DATAFRAME INFO ---
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Index        4 non-null      int64 
 1   Customer Id  4 non-null      object
 2   First Name   4 non-null      object
 3   Last Name    4 non-null      object
 4   Country      4 non-null      object
dtypes: int64(1), object(4)
memory usage: 292.0+ bytes
None

--- SHAPE (ROWS, COLUMNS) ---
(4, 5)

--- SELECTED COLUMNS: First Name, Last Name, Country ---
  First Name Last Name    Country
0    Akshita    Mishra      India
1       Abhi    Sharma        USA
2      Rahul    Sharma  Australia
3       A

In [32]:
import pandas as pd
import json

print("\n--- 1. Read Simple JSON File ---")
df_simple = pd.read_json("simple.json")
print(df_simple)

print("\n--- 2. Read JSON Lines (.jsonl) ---")
df_jsonl = pd.read_json("lines.jsonl", lines=True)
print(df_jsonl)

print("\n--- 3. Read & Flatten Nested JSON ---")
with open("nested.json") as f:
    nested_data = json.load(f)

df_nested = pd.json_normalize(nested_data, max_level=2)
print(df_nested)

print("\n--- 4. Convert DataFrame to JSON (records) ---")
json_records = df_simple.to_json(orient="records", indent=4)
print(json_records)

with open("output_records.json", "w") as f:
    f.write(json_records)

print("\n--- 5. Convert JSON String to Python Object ---")
json_str = '{"name": "Akshita", "age": 22}'
obj = json.loads(json_str)
print(obj)

print("\n--- 6. Convert Python Object to JSON String ---")
py_obj = {"city": "Jaipur", "skills": ["Python", "AI"]}
json_output = json.dumps(py_obj, indent=4)
print(json_output)

print("\n--- 7. Load JSON File Using json.load() ---")
with open("simple.json") as f:
    simple_obj = json.load(f)
print(simple_obj)

print("\n--- 8. Save Python Dict → JSON File ---")
data_to_save = {"message": "Hello Akshita", "status": "success"}

with open("saved.json", "w") as f:
    json.dump(data_to_save, f, indent=4)

print("saved.json created!")

print("\n--- 9. Convert CSV → JSON ---")

df_csv = pd.read_csv("people.csv")
df_csv.to_json("converted.json", orient="records", indent=4)
print("CSV converted to JSON and saved as converted.json")

print("\n--- 10. Convert JSON → CSV ---")

df_from_json = pd.read_json("converted.json")
df_from_json.to_csv("converted_back.csv", index=False)
print("JSON converted back to CSV!")

print("\n--- 11. Deep JSON Flattening ---")

deep_nested = {
    "student": {
        "name": "Akshita",
        "info": {
            "age": 22,
            "address": {
                "city": "Jaipur",
                "pincode": 302012
            }
        }
    }
}

df_flat = pd.json_normalize(deep_nested, sep="_")
print(df_flat)



--- 1. Read Simple JSON File ---
      name  age    city
0  Akshita   21  Jaipur
1     Riya   25   Delhi
2    Rahul   27  Mumbai

--- 2. Read JSON Lines (.jsonl) ---
      name  age    city
0  Akshita   21  Jaipur
1     Riya   25   Delhi
2    Rahul   27  Mumbai

--- 3. Read & Flatten Nested JSON ---
                                            students
0  [{'name': 'Akshita', 'details': {'age': 21, 'c...

--- 4. Convert DataFrame to JSON (records) ---
[
    {
        "name":"Akshita",
        "age":21,
        "city":"Jaipur"
    },
    {
        "name":"Riya",
        "age":25,
        "city":"Delhi"
    },
    {
        "name":"Rahul",
        "age":27,
        "city":"Mumbai"
    }
]

--- 5. Convert JSON String to Python Object ---
{'name': 'Akshita', 'age': 22}

--- 6. Convert Python Object to JSON String ---
{
    "city": "Jaipur",
    "skills": [
        "Python",
        "AI"
    ]
}

--- 7. Load JSON File Using json.load() ---
[{'name': 'Akshita', 'age': 21, 'city': 'Jaipur'}, 

In [None]:
# create GZIP file from people.csv
import pandas as pd

df = pd.read_csv("people.csv")
df.to_csv("people.csv.gz", index=False, compression="gzip")

print("GZIP file created: people.csv.gz")

In [None]:
import pandas as pd
import gzip

print("\n--- Reading GZIP CSV ---")
df = pd.read_csv("people.csv.gz", compression="gzip")
print(df.head())

print("\n--- Reading GZIP via gzip module ---")
with gzip.open("people.csv.gz", "rt") as f:
    df2 = pd.read_csv(f)

print(df2.head())

print("\n--- Selected Columns ---")
print(df[["First Name", "Country"]].head())

print("\n--- Filter First Name == 'Akshita' ---")
print(df[df["First Name"] == "Akshita"])

df["Full Name"] = df["First Name"] + " " + df["Last Name"]
print("\n--- Added Full Name Column ---")
print(df[["First Name", "Last Name", "Full Name"]].head())

print("\n--- Sorted by First Name ---")
print(df.sort_values("First Name").head())

print("\n--- Grouping by Country ---")
print(df.groupby("Country")["Customer Id"].count())


df.to_csv("updated_people.csv.gz", index=False, compression="gzip")
print("\nSaved updated GZIP CSV as updated_people.csv.gz")

df.to_json("people.json.gz", orient="records", compression="gzip")
print(" Saved JSON GZIP as people.json.gz")


In [None]:
# manipulation using tar file
import tarfile

# Step 1: Create sample files
with open("file1.txt", "w") as f:
    f.write("This is file 1")

with open("file2.csv", "w") as f:
    f.write("name,age\nAkshita,22\nRiya,25")

with open("notes.json", "w") as f:
    f.write('{"message": "Hello from JSON"}')

print("Sample files created!")

# Step 2: Create TAR file
with tarfile.open("sample.tar", "w") as tar:
    tar.add("file1.txt")
    tar.add("file2.csv")
    tar.add("notes.json")

print("sample.tar created successfully!")


In [None]:
# manupilation using tar file + pandas
import tarfile
import pandas as pd

# 1. List files in tar
tar = tarfile.open("sample.tar", "r")
print(tar.getnames())

# 2. Extract all
tar.extractall("output_folder")

# 3. Extract one file
tar.extract("file2.csv", "output_folder")

# 4. Read CSV inside tar without extracting
file_obj = tar.extractfile("file2.csv")
df = pd.read_csv(file_obj)
print(df.head())

tar.close()


In [11]:
# g=(i for i in range(2))
# while True:
#     value = next(g)
#     print(g)

# generator function using Stopiterator using exception handling
g = (i for i in range(5))

while True:
    try:
        value = next(g)
        print(value)
    except StopIteration:
        print("Generator finished!")
        break

# Auto handle Stopiterator using for loop
for value in (i for i in range(2)):
    print(value)

# chaining multiple generators
def numbers():
    for i in range(1, 10):
        yield i

def squares(nums):
    for n in nums:
        yield n*n

sqr = squares(numbers())
print(list(sqr))

0
1
2
3
4
Generator finished!
0
1
[1, 4, 9, 16, 25, 36, 49, 64, 81]


In [None]:
# iterator using email body content 
class EmailIterator:
    def __init__(self, emails):
        self.emails = emails
        self.index = 0

    def __iter__(self):
        return self

    def __next__(self):
        if self.index >= len(self.emails):
            raise StopIteration
        
        email = self.emails[self.index]
        self.index += 1
        return email
    
emails = [
    "Welcome!",
    "Your OTP is 1234",
    "Your statement is ready"
]

email_iter = EmailIterator(emails)

for email in email_iter:
    print(email)


In [None]:
# iterator + generator
class Passbook:
    def __init__(self, transactions):
        self.transactions = transactions

    def __iter__(self):
        # iterate only transactions above 5000
        for tx in self.transactions:
            if tx["amount"] > 5000:
                yield tx
transactions = [
    {"type": "deposit", "amount": 3000},
    {"type": "withdraw", "amount": 7000},
    {"type": "deposit", "amount": 12000},
]

high_value = Passbook(transactions)

for tx in high_value:
    print("High value:", tx)
    print("hello")


High value: {'type': 'withdraw', 'amount': 7000}
hello
High value: {'type': 'deposit', 'amount': 12000}
hello


In [None]:
# parsing text
import re
from collections import defaultdict

def detect_bruteforce(logfile, threshold=4):
    pattern = re.compile(
        r"Failed password for (?P<user>\w+) from (?P<ip>\d+\.\d+\.\d+\.\d+)"
    )

    ip_counter = defaultdict(int)

    with open(logfile, "r") as f:
        for line in f:
            m = pattern.search(line)
            if m:
                ip = m.group("ip")
                ip_counter[ip] += 1

    for ip, count in ip_counter.items():
        if count >= threshold:
            print(f"[BRUTE-FORCE ALERT] IP {ip} attempted {count} failed logins!")


# RUN IT
detect_bruteforce("auth.log", threshold=2)


[BRUTE-FORCE ALERT] IP 192.168.1.10 attempted 4 failed logins!
[BRUTE-FORCE ALERT] IP 192.168.1.20 attempted 2 failed logins!
