# Lists
- Ordered, mutable (can change), and allow duplicates.
- Useful when you want to store a collection of related items (e.g. groceries, scores, etc.)


In [None]:
# Lists in Python: Essential for Data Science & ML

# 1. Creating lists
fruits = ["apple", "banana", "cherry"]

# 2. Indexing & slicing
first = fruits[0]         # 'apple'
last_two = fruits[-2:]    # ['banana', 'cherry']

# 3. Adding elements
fruits.append("orange")   # Add to end
fruits.insert(1, "kiwi")  # Insert at position 1

# 4. Removing elements
fruits.remove("banana")   # Remove by value
popped = fruits.pop()     # Remove last and return it

# 5. List length
count = len(fruits)       # Number of items

# 6. Iterating (common in data processing)
for fruit in fruits:
    print(fruit)

# 7. List comprehensions (powerful for feature engineering)
lengths = [len(f) for f in fruits]  # Length of each fruit name

# 8. Filtering with comprehensions
long_fruits = [f for f in fruits if len(f) > 5]

# 9. Sorting (in-place and copy)
fruits.sort()             # Sort alphabetically
sorted_fruits = sorted(fruits, reverse=True)  # Descending order

# 10. Checking membership
if "apple" in fruits:
    print("Apple is in the list")

# 11. Combining lists
more_fruits = ["grape", "melon"]
all_fruits = fruits + more_fruits

# 12. Removing duplicates (convert to set and back)
unique_fruits = list(set(all_fruits))

# 13. Enumerate for index & value (useful for ML feature mapping)
for idx, fruit in enumerate(fruits):
    print(idx, fruit)

# 14. List of lists (e.g., dataset rows)
dataset = [
    ["Alice", 24, "F"],
    ["Bob", 30, "M"]
]
ages = [row[1] for row in dataset]  # Extract ages

# 15. Copying lists (avoid reference bugs)
copy_fruits = fruits.copy()

## Tuples
- Ordered, immutable (cannot change after creation), and allow duplicates.
- Useful for fixed collections like coordinates or config values.

In [None]:
# Tuples in Python: Essential for Data Science & ML

# 1. Creating tuples (immutable, ordered)
coords = (40.7128, -74.0060)  # Latitude, Longitude (e.g., NYC)
person = ("Alice", 25, "F")   # Name, Age, Gender

# 2. Accessing elements (indexing & slicing)
lat = coords[0]
lon = coords[1]
first_two = person[:2]  # ('Alice', 25)

# 3. Unpacking tuples (common in ML for splitting data)
name, age, gender = person

# 4. Tuple length
n = len(coords)

# 5. Immutability (cannot change elements)
# coords[0] = 41.0  # Error!

# 6. Useful for returning multiple values from functions
def min_max(values):
    return min(values), max(values)

data = [3, 7, 2, 9]
minimum, maximum = min_max(data)

# 7. Tuples as dictionary keys (must be immutable)
location_data = {(40.7128, -74.0060): "New York", (34.0522, -118.2437): "Los Angeles"}

# 8. Nested tuples (e.g., dataset rows)
dataset = [
    ("Alice", 24, "F"),
    ("Bob", 30, "M")
]
for name, age, gender in dataset:
    print(f"{name} is {age} years old.")

# 9. Enumerate with tuples (index, value pairs)
for idx, row in enumerate(dataset):
    print(idx, row)

# 10. Swapping variables (classic Python trick)
a, b = 1, 2
a, b = b, a  # Now a=2, b=1

# 11. Tuple comprehension: use generator expressions (tuples can't be comprehended directly)
squared = tuple(x**2 for x in range(5))

# 12. Checking membership
if "Alice" in person:
    print("Alice is in the tuple")

## Sets
- Unordered, mutable, and **do not allow duplicates**.
- Great for checking membership or removing duplicates.

In [None]:
# Sets in Python: Essential for Data Science & ML

# 1. Creating sets (no duplicates, unordered)
ingredients = {"salt", "pepper", "onion"}
print("Ingredients:", ingredients)

# 2. Adding elements
ingredients.add("tomato")  # Add a new ingredient
print("After add:", ingredients)

# 3. Removing elements
ingredients.remove("pepper")  # Remove by value (KeyError if not present)
ingredients.discard("garlic") # Remove if present, no error if missing

# 4. Set length
print("Number of ingredients:", len(ingredients))

# 5. Membership test (fast, O(1))
if "salt" in ingredients:
    print("Salt is in the recipe")

# 6. Set operations (great for feature engineering)
recipe_1 = {"pepper", "onion", "salt"}
recipe_2 = {"tomato", "onion", "salt"}

# Intersection: common ingredients
common = recipe_1 & recipe_2  # or recipe_1.intersection(recipe_2)
print("Common:", common)

# Union: all unique ingredients
unique_ingredients = recipe_1 | recipe_2  # or recipe_1.union(recipe_2)
print("All unique:", unique_ingredients)

# Difference: in recipe_1 but not recipe_2
diff = recipe_1 - recipe_2
print("Only in recipe_1:", diff)

# Symmetric difference: in one, not both
sym_diff = recipe_1 ^ recipe_2
print("In one recipe only:", sym_diff)

# 7. Subset & superset checks (useful for feature selection)
print("recipe_1 is subset of unique_ingredients:", recipe_1.issubset(unique_ingredients))
print("unique_ingredients is superset of recipe_2:", unique_ingredients.issuperset(recipe_2))

# 8. Removing duplicates from a list (common in data cleaning)
shopping_list = ["eggs", "bread", "butter", "coffee", "eggs"]
unique_shopping = set(shopping_list)
print("Unique shopping items:", unique_shopping)

# 9. Set comprehension (for feature extraction)
lengths = {len(item) for item in ingredients}
print("Lengths of ingredient names:", lengths)

# 10. Frozen sets (immutable, hashable, can be dict keys)
frozen = frozenset(recipe_1)
print("Frozen set:", frozen)

## Dictionaries
- Unordered (in older Python), mutable, and store data as key-value pairs.
- Extremely useful for structured data like user profiles, settings, etc.

In [None]:
from collections import Counter

# Dictionary fundamentals and common operations

# 1. Creating dictionaries
empty_dict = {}
person = {"name": "Alice", "age": 25, "city": "New York"}

# 2. Accessing values
print(person["name"])  # 'Alice'

# 3. Adding or updating values
person["email"] = "alice@example.com"
person["age"] = 26

# 4. Removing items
del person["city"]  # Remove by key
removed_value = person.pop("email")  # Remove and get value

# 5. Checking if a key exists
if "name" in person:
    print("Name is present")

# 6. Iterating over keys, values, and items
for key in person:
    print(key, person[key])

for value in person.values():
    print(value)

for key, value in person.items():
    print(f"{key}: {value}")

# 7. Dictionary comprehension
squared_numbers = {x: x**2 for x in range(5)}  # {0:0, 1:1, 2:4, 3:9, 4:16}

# 8. Merging dictionaries (Python 3.5+)
dict_a = {"a": 1, "b": 2}
dict_b = {"b": 3, "c": 4}
merged = {**dict_a, **dict_b}  # {'a': 1, 'b': 3, 'c': 4}

# 9. Getting keys/values safely
value = person.get("name", "Unknown")  # Returns 'Alice'
missing = person.get("salary", 0)      # Returns 0 if not found

# 10. Setting default values
person.setdefault("country", "USA")  # Adds 'country' if not present

# 11. Clearing a dictionary
temp = {"x": 1}
temp.clear()  # temp is now {}

# 12. Useful for counting (collections.Counter)
words = ["apple", "banana", "apple", "orange"]
word_counts = Counter(words)  # {'apple': 2, 'banana': 1, 'orange': 1}

# 13. Nested dictionaries (for structured data)
students = {
    "Ayesha": {"age": 29, "score": 88},
    "Bob": {"age": 22, "score": 91}
}
print(students["Ayesha"]["score"])  # 88

# 14. Dictionary as a lookup table (fast access)
lookup = {"cat": "meow", "dog": "bark"}
sound = lookup.get("cat")  # 'meow'

{'name': 'Ayesha', 'age': 29, 'is_active': True, 'purchases': ['notebook', 'pen'], 'membership': 'Gold'}
