In [31]:
# 1. You are given a list of tuples, where each tuple contains a student's name and their grades for a semester.
# Using the collections module, implement a solution that groups students by the grade they received, and return
# the highest grade in each group (e.g., A, B, C). Can you efficiently retrieve this data using a defaultdict or Counter?

#import defaultdict from collection module
from collections import defaultdict

# student's name and their grades for a semester
students =[
    ("Amar" , 'A'),
    ("Tushar" , 'C'),
    ("Sarthak" , 'A'),
    ("Pankaj" , 'B'),
    ("Smit" , 'B')
]

## Create a defaultdict to group students by grade
high_grades = defaultdict(list)

# Group students by their grades
for name, grade in students:
    high_grades[grade].append(name)

# Get the highest name in each grade group
highest_students = {grade: max(names) for grade, names in high_grades.items()}

print("Students grouped by grades:", dict(high_grades))
print("Highest name in each grade group:", highest_students)

Students grouped by grades: {'A': ['Amar', 'Sarthak'], 'C': ['Tushar'], 'B': ['Pankaj', 'Smit']}
Highest name in each grade group: {'A': 'Sarthak', 'C': 'Tushar', 'B': 'Smit'}


In [13]:
# You need to design a deque-based caching system that allows fast addition and removal of elements from both ends.
# Implement this system in Python, and demonstrate how it can efficiently manage a fixed-size
# cache where the least recently used (LRU) item is removed when the cache exceeds its capacity.

from collections import deque

class SimpleLRUCache:
    def __init__(self, capacity):
        self.cache = deque()  # Stores items
        self.lookup = set()   # Helps in quick item lookup
        self.capacity = capacity  # Max size of cache

    def access(self, item):
        # Access an item, moving it to the front if it exists, or adding it
        if item in self.lookup:
            self.cache.remove(item)  # Remove from current position
        elif len(self.cache) >= self.capacity:
            removed = self.cache.pop()  # Remove LRU item (from the back)
            self.lookup.remove(removed)

        self.cache.appendleft(item)  # Add new/used item at the front
        self.lookup.add(item)

    def show(self):
        # Print the cache from most to least recently used.
        print("Cache:", list(self.cache))


lru = SimpleLRUCache(3)
lru.access("A")
lru.access("B")
lru.access("C")
lru.show()  # Cache: ['C', 'B', 'A']

lru.access("D")  # 'A' is removed (LRU)
lru.show()  # Cache: ['D', 'C', 'B']

lru.access("B")  # Move 'B' to front
lru.show()  # Cache: ['B', 'D', 'C']

Cache: ['C', 'B', 'A']
Cache: ['D', 'C', 'B']
Cache: ['B', 'D', 'C']


In [32]:
# 3. Imagine you have a list of sales transactions, each containing a product name and a quantity sold.
# Using Counter, calculate the total sales for each product, and return the three products with the highest total sales.

from collections import Counter

# Sample list of sales transactions
sales_transactions = [
    ("iphone", 5),
    ("redmi", 3),
    ("realme", 2),
    ("poco", 7),
    ("samsung", 4),
    ("mi", 1),
    ("nokia", 2),
    ("LG", 8),
    ("micromax", 1),
    ("gonee", 3),
]

# Step 1: Calculate total sales for each product using Counter
total_sales = Counter()
for product, quantity in sales_transactions:
    total_sales[product] += quantity

# Step 2: Get the top 3 products with the highest sales
top_products = total_sales.most_common(3)

# Print the results
print("Total sales for each product:")
for product, total in total_sales.items():
    print(f"{product}: {total}")

print("\nTop 3 products with the highest sales:")
for product, total in top_products:
    print(f"{product}: {total}")

# print("Total sales per product:", dict())
# print("Top 3 products:", )

Total sales for each product:
iphone: 5
redmi: 3
realme: 2
poco: 7
samsung: 4
mi: 1
nokia: 2
LG: 8
micromax: 1
gonee: 3

Top 3 products with the highest sales:
LG: 8
poco: 7
iphone: 5


In [33]:
# # 4. Given a list of nested dictionaries with data about employees in a company (name, role, department),
# use defaultdict from the collections module to create a solution that returns a dictionary mapping each department to the list of employees working in that department.
# Ensure the solution handles edge cases where no employees are present in some departments.
    
from collections import defaultdict

# Sample list of employees (nested dictionaries)
employees = [
    {"name": "Amar", "role": "Developer", "department": "Engineering"},
    {"name": "Badal", "role": "Manager", "department": "Engineering"},
    {"name": "Chanakya", "role": "Designer", "department": "Design"},
    {"name": "Dev", "role": "Analyst", "department": "Data Science"},
    {"name": "Tejas", "role": "Developer", "department": "Engineering"},
    {"name": "Farukh", "role": "Manager", "department": "HR"},
    {"name": "Ganesh", "role": "Designer", "department": "Design"},
    {"name": "Himesh", "role": "Analyst", "department": "Data Science"},
]

# Step 1: Create a defaultdict to map departments to lists of employees
department_employees = defaultdict(list)

# Step 2: Populate the defaultdict with employees
for employee in employees:
    department = employee["department"]
    department_employees[department].append(employee)

# Step 3: Convert defaultdict to a regular dictionary (optional, for better readability)
department_employees = dict(department_employees)

# Print the result
for department, employees_in_dept in department_employees.items():
    print(f"Department: {department}")
    for employee in employees_in_dept:
        print(f"  - {employee['name']} ({employee['role']})")
    print()

Department: Engineering
  - Amar (Developer)
  - Badal (Manager)
  - Tejas (Developer)

Department: Design
  - Chanakya (Designer)
  - Ganesh (Designer)

Department: Data Science
  - Dev (Analyst)
  - Himesh (Analyst)

Department: HR
  - Farukh (Manager)



In [35]:
# 5. You are tasked with processing a large CSV file containing user information, including their name, email, and purchase history.
# Write a Python script that reads the file and calculates the total amount spent by each user.
# Ensure the script can handle missing values and invalid formats, and generate a summary report for the top 5 highest spenders.

import csv

def process_csv(file_path):
    
    user_spending = {}  # Dictionary to store total spending per user

    with open(file_path, mode="r", encoding="utf-8") as file:
        reader = csv.DictReader(file)

        for row in reader:
            name = row.get("name", "").strip()
            amount_spent = row.get("amount_spent", "").strip()

            if not name or not amount_spent:  # Skip missing values
                continue

            try:
                amount = float(amount_spent)  # Convert to number
                user_spending[name] = user_spending.get(name, 0) + amount
            except ValueError:
                continue  # Skip invalid numbers

    # Get top 5 highest spenders
    top_spenders = sorted(user_spending.items(), key=lambda x: x[1], reverse=True)[:5]

    # Print results
    print("\nTop 5 Highest Spenders:")
    for i, (user, total) in enumerate(top_spenders, start=1):
        print(f"{i}. {user}: ${total:.2f}")

# Example usage
file_path = "u_data.csv"  # Replace with your actual file
process_csv(file_path)


Top 5 Highest Spenders:
1. Bob: $520.75
2. Eve: $500.00
3. Grace: $420.30
4. Alice: $350.75
5. Helen: $320.75


In [29]:
# # 6. Consider a list of employee records, each represented as a dictionary containing the employee's name, department, and years of experience.
# Use list comprehension to filter out employees who have less than 5 years of experience and work in the HR department.
# How would you perform this task in one line of code?

filtered_employees = [emp for emp in employees if not (emp["department"] == "HR" and emp["years_of_experience"] < 5)]

In [37]:
# Sample list of employee records
employees = [
    {"name": "Amit", "department": "HR", "years_of_experience": 6},
    {"name": "Banny", "department": "Engineering", "years_of_experience": 4},
    {"name": "Chetan", "department": "HR", "years_of_experience": 3},
    {"name": "Dhaval", "department": "HR", "years_of_experience": 7},
    {"name": "Emamul", "department": "Marketing", "years_of_experience": 10},
]

# Filter employees using list comprehension
filtered_employees = [emp for emp in employees if not (emp["department"] == "HR" and emp["years_of_experience"] < 5)]

# Print the filtered employees
print(filtered_employees)

[{'name': 'Amit', 'department': 'HR', 'years_of_experience': 6}, {'name': 'Banny', 'department': 'Engineering', 'years_of_experience': 4}, {'name': 'Dhaval', 'department': 'HR', 'years_of_experience': 7}, {'name': 'Emamul', 'department': 'Marketing', 'years_of_experience': 10}]


In [44]:
# 7. Suppose you have a log file where each entry contains the date and an associated action, like a user login or logout.
# Using the Counter from the collections module, write a function that identifies the most frequent action for each day.
# Can you handle cases where multiple actions have the same frequency?

from collections import Counter, defaultdict

def most_frequent_action(log_entries):
    
    action_counts = defaultdict(Counter)  # Stores action counts per day

    # Count actions per day
    for date, action in log_entries:
        action_counts[date][action] += 1

    # Find the most frequent action(s) per day
    result = {}
    for date, counter in action_counts.items():
        max_count = max(counter.values())  # Get highest count
        most_frequent = [act for act, count in counter.items() if count == max_count]
        result[date] = most_frequent

    return result

# Example log data
log_data = [
    ("2024-02-10", "login"),
    ("2024-02-10", "logout"),
    ("2024-02-10", "login"),
    ("2024-02-11", "login"),
    ("2024-02-11", "view_page"),
    ("2024-02-11", "view_page"),
    ("2024-02-12", "logout"),
    ("2024-02-12", "logout"),
    ("2024-02-12", "purchase"),
]

# Call function and print results
print(most_frequent_action(log_data))

{'2024-02-10': ['login'], '2024-02-11': ['view_page'], '2024-02-12': ['logout']}
