In [15]:
# 1. Imagine you're building a recommendation system that uses  permutations of items to suggest bundles to users.
# How would  you use itertools to generate all possible 3-item combinations  from a list of 5 items?
# Ensure that the combinations don't  repeat across multiple iterations.

import itertools

# List of 5 items
items =['A', 'B', 'C', 'D', 'E','F']

# Generate all possible 3-item combinations
combinations = list(itertools.combinations(items, 3))

# Print the combinations
for combo in combinations:
    print(combo)


# The list() function is used to convert the iterator returned by itertools.combinations into a list of tuples,
# where each tuple represents a unique combination.

('A', 'B', 'C')
('A', 'B', 'D')
('A', 'B', 'E')
('A', 'B', 'F')
('A', 'C', 'D')
('A', 'C', 'E')
('A', 'C', 'F')
('A', 'D', 'E')
('A', 'D', 'F')
('A', 'E', 'F')
('B', 'C', 'D')
('B', 'C', 'E')
('B', 'C', 'F')
('B', 'D', 'E')
('B', 'D', 'F')
('B', 'E', 'F')
('C', 'D', 'E')
('C', 'D', 'F')
('C', 'E', 'F')
('D', 'E', 'F')


In [4]:
# 2. Using itertools.cycle(), create a function that takes a list of keywords and rotates through them endlessly.
# The function should print each keyword every 3 seconds.
# How would you handle situations where the program needs to be safely interrupted?

import itertools
import time

def rotate_keyword(keywords):
    keywords_cycle = itertools.cycle(keywords)     # Create an infinite cycle of keywords

    try:
        while True:
            print(next(keywords_cycle))             # Print the next keyword
            time.sleep(3)  # Wait for 3 seconds

    except KeyboardInterrupt:
        print("\n Stopped") # Stop the program gracefully

keywords= ["Python", "JavaScript", "Java", "C++", "Go"]

rotate_keyword(keywords)


Python

 Stopped


In [None]:
# 3. You’re tasked with processing a massive log file to extract meaningful insights.
# How would you implement a solution using multiprocessing that splits the file into chunks, processes them concurrently,
# and then combines the results at the end?

import multiprocessing

def process_chunk(chunk):
    """Processes a chunk and extracts error lines."""
    return [line.strip() for line in chunk if "ERROR" in line]

def read_chunks(filename, chunk_size=1024*1024):
    """Reads a file in chunks."""
    with open(filename, "r", encoding="utf-8") as file:
        while chunk := file.readlines(chunk_size):
            yield chunk

def process_log_file(filename, num_workers=4):
    """Uses multiprocessing to process a log file in chunks."""
    with multiprocessing.Pool(num_workers) as pool:
        results = pool.map(process_chunk, read_chunks(filename))
    return [line for sublist in results for line in sublist]
errors = process_log_file("server.log")
for line in errors[:10]:
    print(line)

In [2]:
# 4. In your application, you need to simulate multiple users interacting with a system simultaneously.
# How would you use multiprocessing to create and run parallel tasks that simulate user interactions,
# ensuring that each user’s actions are handled independently?

import multiprocessing
import time
import random

def simulate_user(user_id):
    print(f"User {user_id} started interacting with the system.")
    
    # Simulate some user actions
    for i in range(3):  # Simulate 3 actions
        action_time = random.randint(1, 5)  # Random delay between actions
        time.sleep(action_time)
        print(f"User {user_id} performed action {i+1} after {action_time} seconds.")
    
    print(f"User {user_id} finished interacting with the system.")

if __name__ == "__main__":
    num_users = 5  # Number of users to simulate
    processes = []

    for user_id in range(num_users):
        process = multiprocessing.Process(target=simulate_user, args=(user_id,))
        processes.append(process)
        process.start()

    # Wait for all processes to complete
    for process in processes:
        process.join()

    print("All users have finished interacting with the system.")



All users have finished interacting with the system.


In [1]:
# 5. You need to clean up a CSV file where some rows contain missing or corrupted data.
# What’s the most efficient way to use Python to identify and remove or fix these rows,
# while preserving as much data as possible for further analysis?

import pandas as pd

# Load the CSV file into a DataFrame
input_file = "Diabetes Missing Data.csv"
df = pd.read_csv(input_file)

# Step 1: Identify missing data
missing_data = df.isna().sum()
print("Missing data per column:")
print(missing_data)

missing_data = df.isnull()
missing_summary = df.isnull().sum()

# Step 2: Handle missing or corrupted data
# Option 1: Remove rows with missing or corrupted data
df_cleaned = df.dropna()  # Remove rows with any missing values

# Option 2: Fill missing values

df_cleaned = df.dropna()
df_filled = df.fillna(value=0)  # Replace with 0
# or
df_filled = df.fillna(df.mean())  # Replace with column mean

# Step 3: Save the cleaned data to a new CSV file
output_file = "cleaned_data.csv"
df_cleaned.to_csv(output_file, index=False)
print(f"\nCleaned data saved to {output_file}")

print("\nSummary of cleaning:")
print(f"Original number of rows: {len(df)}")
print(f"Number of rows after cleaning: {len(df_cleaned)}")
print(f"Rows removed: {len(df) - len(df_cleaned)}")

Missing data per column:
Pregnant               0
Glucose                5
Diastolic_BP          35
Skin_Fold            227
Serum_Insulin        374
BMI                   11
Diabetes_Pedigree      0
Age                    0
Class                  0
dtype: int64

Cleaned data saved to cleaned_data.csv

Summary of cleaning:
Original number of rows: 768
Number of rows after cleaning: 392
Rows removed: 376


In [None]:
# 6. You are tasked with scraping product data from a website that loads content dynamically with JavaScript.
# How would you handle scraping such a site where the content is not readily available in the static HTML,
# and what tools would you use to capture the data efficiently?

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
import time

# Configure Chrome options
chrome_options = Options()
chrome_options.add_argument("--headless")  # Run in headless mode (no GUI)
chrome_options.add_argument("--disable-gpu")  # Disable GPU acceleration

# Path to your ChromeDriver
chrome_driver_path = "/path/to/chromedriver"

# Initialize the WebDriver
service = Service(chrome_driver_path)
driver = webdriver.Chrome(service=service, options=chrome_options)

# URL of the dynamic website
url = "https://example.com/products"

# Open the website
driver.get(url)

# Wait for the dynamic content to load
time.sleep(5)  # Adjust the sleep time based on the website's loading speed

# Extract product data
products = driver.find_elements(By.CLASS_NAME, "product")  # Replace with the correct class name

for product in products:
    name = product.find_element(By.CLASS_NAME, "product-name").text  # Replace with the correct class name
    price = product.find_element(By.CLASS_NAME, "product-price").text  # Replace with the correct class name
    print(f"Product: {name}, Price: {price}")

# Close the browser
driver.quit()