In [None]:
# 1. In a scenario where you're building a tool that interacts with a web API,
# you need to accept an API key, endpoint, and parameters as command-line arguments.
# How would you implement this and ensure that the provided values are valid before making the API request?

import argparse
import requests
import sys

def main():
    # Parse command-line arguments
    parser = argparse.ArgumentParser(description="Interact with web APT")
    parser.add_argument('--api-key', required=True, help="API key for authentication")
    parser.add_argument('--endpoint', required=True, help="API endpoint URL")
    parser.add_argument('--params', nargs='*', help="Parameters as key-value pairs")
    args = parser.parse_args()

    # Validate API key (simple check for non-empty string)
    if not args.api_key:
        print("Error:API key cannot be empty.", file=sys.stderr)
        sys.exit(1)

    # Validate endpoint (simple check for non-empty string and starts with http/https)
    if not args.endpoint.startswith(('http://','https://')):
        print("Error: Endpoint must be valid URL starting with http:// or https://", file=sys.stderr)
        sys.exit(1)

    # Parse parameters into a dictionary
    params = {}
    if args.params:
        for param in args.params:
            if '=' not in params:
                print(f"Error: Invalid parameter format: {param}. Expected key=value.", file=sys.stderr)
                sys.exit(1)

            key, value = param.split('=', 1)
            param[key] = value

    # Make the API request
    try:
        headers = {'Authorization' : f'Bearer {args.api_key}'}
        response = requests.get(args.endpoint, headers=headers, params=params)
        response.raise_for_status()
        print("API Response:", response.json())

    except requests.exceptions.RequestExceptions as e:
        print(f"Error: Failed to make API request. {e}", file=sys.stderr)
        sys.exit(1)

if __name__ == "__main__":
    main()    

In [None]:
# 2 Your task is to create a command-line tool that reads a list of integers passed as arguments and computes their sum and average.
# How would you handle potential errors, such as non-numeric values or incorrect number formats, and ensure that the program functions as expected?

import argparse
import sys

def main():
    # Set up argument parser
    parser = argparse.ArgumentParser(description="Compute the sum and average of a list of integers.")
    parser.add_argument('numbers', nargs='+', help="List of integers to process")

    # Parse arguments
    args = parser.parse_args()

    # Validate and convert input to integers
    try:
        numbers = [int(num) for num in args.numbers]
    except ValueError:
        print("Error: All arguments must be valid integers.", file=sys.stderr)
        sys.exit(1)

    # Compute sum and average
    total = sum(numbers)
    average = total / len(numbers)

    # Output results
    print(f"Sum: {total}")
    print(f"Average: {average}")

if __name__ == "__main__":
    main()

In [3]:
# 3. You are working with a log file containing system event data,  and each log entry has a timestamp, log level (INFO, WARN, ERROR), and a message.
# How would you use regular expressions to extract all ERROR messages from the logs and  sort them by timestamp?

import re
from datetime import datetime

# Regular expression to match log entries
log_pattern = re.compile(r'(?P<timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}) (?P<level>INFO|WARN|ERROR) (?P<message>.*)')

# Read the log file
with open('logfile.txt', 'r') as file:
    log_data = file.readlines()

# Extract ERROR messages and store them with timestamps
error_entries = []
for line in log_data:
    match = log_pattern.match(line)
    if match and match.group('level') == 'ERROR':
        timestamp = datetime.strptime(match.group('timestamp'), '%Y-%m-%d %H:%M:%S')
        message = match.group('message')
        error_entries.append((timestamp, message))

# Sort ERROR messages by timestamp
error_entries.sort(key=lambda x: x[0])

# Output the sorted ERROR messages
for timestamp, message in error_entries:
    print(f"{timestamp}: {message}")

2023-10-01 12:10:00: Failed to connect to database.
2023-10-01 12:20:00: Unable to read configuration file.


In [7]:
# 4. You're tasked with writing a function that takes an array of integers and returns the indices of two numbers that add up to a specific target sum.
# How can you improve the time complexity to O(n) while ensuring that the array is traversed only once?

def two_sum(nums, target):
    seen = {}  # Dictionary to store numbers and their indices
    for i, num in enumerate(nums):
        complement = target - num
        if complement in seen:
            return [seen[complement], i]  # Return indices
        seen[num] = i  # Store the number and its index
    return []  # Return empty if no pair is found


nums = [2, 7, 11, 15]
target = 13
print(two_sum(nums, target))


[0, 2]


In [None]:
# 5. You have a large matrix representing a grid of values. You are asked to rotate this grid by 90 degrees clockwise in place.
# What algorithm would you use to accomplish this, and how would you handle the array in a memory efficient manner?

def rotate_matrix(matrix):
    n = len(matrix)
    
    # Step 1: Transpose the matrix
    for i in range(n):
        for j in range(i, n):
            matrix[i][j], matrix[j][i] = matrix[j][i], matrix[i][j]
    
    # Step 2: Reverse each row
    for i in range(n):
        matrix[i] = matrix[i][::-1]

matrix = [
    [1, 2, 3],
    [4, 5, 6],
    [7, 8, 9]
]

rotate_matrix(matrix)

# Print the rotated matrix
for row in matrix:
    print(row)

In [None]:
# Extension for Non-Square Matrices:
def rotate_non_square_matrix(matrix):
    m = len(matrix)
    n = len(matrix[0])
    
    # Create a new matrix with swapped dimensions
    rotated = [[0] * m for _ in range(n)]
    
    # Populate the new matrix
    for i in range(m):
        for j in range(n):
            rotated[j][m - 1 - i] = matrix[i][j]
    
    return rotated

matrix = [
    [1, 2, 3],
    [4, 5, 6]
]

rotated = rotate_non_square_matrix(matrix)

# Print the rotated matrix
for row in rotated:
    print(row)

In [8]:
# 6. In a large web scraping project, you need to track the frequency of visited URLs and filter out URLs that are visited less than a certain number of times.
# How would you leverage collections.defaultdict to accomplish this efficiently?
# Provide an example of when this might be useful in practice.

from collections import defaultdict

# Initialize a defaultdict to track URL frequencies
url_frequencies = defaultdict(int)

# List of visited URLs 
visited_urls = [
    "https://example.com/page1",
    "https://example.com/page2",
    "https://example.com/page1",
    "https://example.com/page3",
    "https://example.com/page2",
    "https://example.com/page1",
]

# Count the frequency of each URL
for url in visited_urls:
    url_frequencies[url] += 1

# Filter URLs visited less than a certain number of times 
threshold = 2
filtered_urls = {url: count for url, count in url_frequencies.items() if count >= threshold}

print("Filtered URLs (visited at least 2 times):")
for url, count in filtered_urls.items():
    print(f"{url}: {count} times")

Filtered URLs (visited at least 2 times):
https://example.com/page1: 3 times
https://example.com/page2: 2 times


In [9]:
# 7. Imagine you're working with a dataset of users, where each user has a unique ID and a list of preferences.
# How can you efficiently group these users based on common preferences using the collections module?
# Explain why this approach would be more optimal than other methods.

from collections import defaultdict

# Sample dataset
users = {
    1: ["python", "java", "c"],
    2: ["java", "sql"],
    3: ["python", "c"],
    4: ["sql", "java"],
}

# Step 1: Map each preference to a list of users
preference_to_users = defaultdict(list)
for user_id, preferences in users.items():
    for preference in preferences:
        preference_to_users[preference].append(user_id)

# Step 2: Group users with common preferences
# Create a dictionary to store groups of users
user_groups = defaultdict(list)

# Iterate through the preferences and group users
for preference, user_list in preference_to_users.items():
    # Use a tuple of user IDs as the key to represent a group
    user_groups[tuple(sorted(user_list))].append(preference)

# Output the groups
for group, preferences in user_groups.items():
    print(f"Users {group} share preferences: {preferences}")

Users (1, 3) share preferences: ['python', 'c']
Users (1, 2, 4) share preferences: ['java']
Users (2, 4) share preferences: ['sql']


In [16]:
# 8.In a scenario where you're analyzing web page data, you need to extract all the URLs from an HTML page stored in a text file.
# How would you write a regular expression to capture URLs while considering variations in domain names and protocols?

# https?:\/\/[^\s"']+|ftp:\/\/[^\s"']+|www\.[^\s"']+

In [10]:
import re

# Regular expression to match URLs
url_pattern = re.compile(r'https?:\/\/[^\s"\'\]]+|ftp:\/\/[^\s"\'\]]+|www\.[^\s"\'\]]+')

# Read the HTML content from a file
with open('sample_web.html', 'r', encoding='utf-8') as file:
    html_content = file.read()

# Find all URLs in the HTML content
urls = url_pattern.findall(html_content)

# Print the extracted URLs
for url in urls:
    print(url)

https://example.com
http://sub.example.com/path?query=param
ftp://files.example.com
www.example.com
https://example.com/image.png


In [1]:
import re

# Regular expression to match URLs
url_pattern = re.compile(r'https?:\/\/[^\s"\'\]]+|ftp:\/\/[^\s"\'\]]+|www\.[^\s"\'\]]+')

# Read the HTML content from a file
with open('sample_web.html', 'r', encoding='utf-8') as file:
    html_content = file.read()

# Find all URLs in the HTML content
urls = url_pattern.findall(html_content)

# Print the extracted URLs
for url in urls:
    print(url)


In [11]:
# 9. You need to implement a system that tracks the maximum value of a sliding window of a fixed size within a stream of integers.
# For each new number added to the stream, you must calculate the maximum value within the window that contains that number, efficiently updating the result as the window slides.
# How would you implement this with collections.deque, and why is this method more efficient than simply iterating over the window each time?

from collections import deque

def sliding_window_maximum(nums, k):
    if not nums or k <= 0:
        return []
    
    # Initialize a deque
    dq = deque()
    result = []
    
    for i, num in enumerate(nums):
        # Remove indices of elements from the back of the deque that are smaller than the current element
        while dq and nums[dq[-1]] < num:
            dq.pop()
        
        # Add the current element's index to the back of the deque
        dq.append(i)
        
        # Remove the index at the front of the deque if it is outside the current window
        if dq[0] == i - k:
            dq.popleft()
        
        # If the window has reached size k, append the maximum to the result
        if i >= k - 1:
            result.append(nums[dq[0]])
    
    return result

nums = [1, 3, -1, -3, 5, 3, 6, 7]
k = 3
print(sliding_window_maximum(nums, k))  # Output: [3, 3, 5, 5, 6, 7]

[3, 3, 5, 5, 6, 7]
