In [4]:
# #1. You’re working on a text processing tool that extracts all phone numbers from a document. The phone numbers can vary in format
# # (e.g., with or without country code, spaces, dashes). How would you write a regular expression to handle these variations and
# # extract all the phone numbers efficiently?

# # regex = (?:\+?\d{1,3}[-.\s]?)?\(?\d{1,4}\)?[-.\s]?\d{1,4}[-.\s]?\d{1,9}

# Explanation:
#1. \+?\d{1,3}: Matches an optional country code (e.g., +1, +44), which can be 1 to 3 digits long.
#2. [-.\s]?: Matches an optional separator (hyphen, dot, or space) after the country code.
#3. \(?\d{1,4}\)?: Matches an optional area code, which can be enclosed in parentheses and 1 to 4 digits long.
#4. [-.\s]?: Matches an optional separator after the area code.
#5. \d{1,4}: Matches the first part of the local number, which can be 1 to 4 digits long.
#6. [-.\s]?: Matches an optional separator between parts of the local number.
#7. \d{1,4}: Matches the second part of the local number, which can be 1 to 4 digits long.
#8. [-.\s]?: Matches an optional separator between parts of the local number.
#9. \d{1,9}: Matches the final part of the local number, which can be 1 to 9 digits long.

In [5]:
import re

text = """
Here are some phone numbers: +1-800-555-1234, +44 20 7946 0958, (123) 456-7890, 123.456.7890, and 1234567890.
"""

# Define the regex pattern
pattern = r'(?:\+?\d{1,3}[-.\s]?)?\(?\d{1,4}\)?[-.\s]?\d{1,4}[-.\s]?\d{1,9}'

# Find all matches
phone_numbers = re.findall(pattern, text)

# Print the extracted phone numbers
print(phone_numbers)

['+1-800-555-1234', '+44 20 7946 0958', '(123) 456-7890', '123.456.7890', '1234567890']


In [6]:
# 2. Imagine you're cleaning a large dataset that contains addresses in different formats, some with unnecessary spaces and inconsistent capitalization.
# How would you use regular expressions to standardize and clean the address data, ensuring you only capture relevant information like street names and zip codes?



In [10]:
import re

def clean_address(address):
    # Remove extra spaces
    address = re.sub(r'\s+', ' ', address.strip())
    # Standardize capitalization
    address = address.title()
    # Remove unnecessary punctuation (except for #, -, and ,)
    address = re.sub(r'[^a-zA-Z0-9,\s\-#]', '', address)
    # Ensure a single comma space format
    address = re.sub(r'\s*,\s*', ', ', address)

    return address

# Example dataset
addresses = [
    "  123 main st. , new york , NY   10001  ",
    "456 BROADWAY,, Los Angeles, ca 90012 ",
    "789 Elm-street Apt#5, Boston, MA 02108-1234",
    "PO Box 567, Houston ,TX 77001"
]

# Clean addresses
cleaned_addresses = [clean_address(addr) for addr in addresses]

# Print results
for original, cleaned in zip(addresses, cleaned_addresses):
    print(f"Original: {original}\nCleaned:  {cleaned}\n")

Original:   123 main st. , new york , NY   10001  
Cleaned:  123 Main St, New York, Ny 10001

Original: 456 BROADWAY,, Los Angeles, ca 90012 
Cleaned:  456 Broadway, , Los Angeles, Ca 90012

Original: 789 Elm-street Apt#5, Boston, MA 02108-1234
Cleaned:  789 Elm-Street Apt#5, Boston, Ma 02108-1234

Original: PO Box 567, Houston ,TX 77001
Cleaned:  Po Box 567, Houston, Tx 77001



In [14]:
# # 3. You have a string containing multiple sentences, and you need to extract all the words that are capitalized.
# # How would you write a regular expression to find these words and ensure that the pattern works for different punctuation marks and sentence structures?

# # regex = \b[A-Z][a-zA-Z]*\b

# Explanation:
# 1. \b: Matches a word boundary to ensure we capture whole words.
# 2. [A-Z]: Matches an uppercase letter at the beginning of the word.
# 3. [a-zA-Z]*: Matches zero or more lowercase or uppercase letters following the initial capital letter.
# 4. \b: Ensures the match ends at a word boundary.

# Handling Edge Cases:
# If you want to include words with apostrophes (e.g., "John's") or hyphens (e.g., "New-York"), you can modify the regex slightly:
# regex = \b[A-Z][a-zA-Z'’-]*\b


In [13]:
import re

text = """
Hello! This is a Test. The Quick Brown Fox jumps over the Lazy Dog. 
In New-York, you can visit the Statue of Liberty. 
Dr. Smith's and Mr. Johnson's meeting is at 5 PM.
"""

# Define the regex pattern
pattern = r'\b[A-Z][a-zA-Z\'’-]*\b'

# Find all matches
capitalized_words = re.findall(pattern, text)

# Print the extracted capitalized words
print(capitalized_words)

['Hello', 'This', 'Test', 'The', 'Quick', 'Brown', 'Fox', 'Lazy', 'Dog', 'In', 'New-York', 'Statue', 'Liberty', 'Dr', "Smith's", 'Mr', "Johnson's", 'PM']


In [18]:
# 4.Suppose you're building a system that needs to create or modify several files in a directory.
# How would you handle cases where the file or directory doesn’t exist or the program doesn't have permission to write to it, while providing meaningful error messages to the user?

# Steps:
# Check if the directory exists: If it doesn't, create it.
# Check for write permissions: Ensure the program has permission to write to the directory.
# Handle file operations: Create or modify files, catching and handling exceptions.
# Provide meaningful error messages: Inform the user about what went wrong.

In [19]:
import os

def safe_write_file(directory, filename, content):
    try:
        # Create directory if it doesn't exist
        os.makedirs(directory, exist_ok=True)
        # Define full file path
        file_path = os.path.join(directory, filename)
        # Write to the file
        with open(file_path, "w") as file:
            file.write(content)
        print(f"File saved: {file_path}")

    except PermissionError:
        print(f"Error: No permission to write to '{directory}'.")

    except OSError as e:
        print(f"Error: {e}")

safe_write_file("my_directory", "example.txt", "Hello, world!")
safe_write_file("/restricted/path", "example.txt", "Test content.")

File saved: my_directory\example.txt
File saved: /restricted/path\example.txt


In [21]:
# 5. You're working with a CSV file that contains customer data with  fields like Name, Email, Phone, and Address.
# How would you read  this file and print the names and email addresses of all customers,
# ensuring that each row is processed correctly and that any missing  values are handled gracefully?

In [1]:
import csv

def read_customer_data(file_path):
    try:
        with open(file_path, mode='r', encoding='utf-8') as file:
            reader = csv.DictReader(file)  # Read CSV as dictionary
            
            for row in reader:
                name = row["Name"] if row["Name"] else "Unknown"
                email = row["Email"] if row["Email"] else "No Email"
                print(f"Name: {name}, Email: {email}")

    except FileNotFoundError:
        print(f"Error: File '{file_path}' not found.")

    except Exception as e:
        print(f"An error occurred: {e}")

read_customer_data("Assessment 19/c.csv")

Error: File 'Assessment 19/c.csv' not found.


In [4]:
# 6. You're tasked with updating a CSV file where each row represents an order, and the Status field needs to be updated based on
# whether the order was shipped or not. How would you update the status for each order in the file, and write the modified data back to the CSV file, making sure that no data is lost?


In [6]:
import csv

def update_order_status(file_path):
    rows = []

    try:
        # Read the CSV file
        with open(file_path, mode='r', newline='', encoding='utf-8') as file:
            reader = csv.DictReader(file) #Read the CSV file using csv.DictReader()
            fieldnames = reader.fieldnames

            for row in reader:
                row["Status"] = "Shipped" if row.get("Shipped") == "Yes" else "Pending"  #Modify the Status field based on conditions (e.g., "Shipped" or "Pending").
                rows.append(row)

        # Write the updated data back to the file
        with open(file_path, mode='w', newline='', encoding='utf-8') as file:
            writer = csv.DictWriter(file, fieldnames=fieldnames) #Write the updated data back to the file using csv.DictWriter()
            writer.writeheader()
            writer.writerows(rows)

        print("Order statuses updated.")

    except FileNotFoundError:
        print(f"Error: File '{file_path}' not found.")

update_order_status("orders.csv")

Order statuses updated.
