# Python collection data types

## Python Lists

A list is a collection which is ordered and changeable. In Python lists are written with square brackets.

In [None]:
thislist = ["apple", "banana", "cherry"]
for x in thislist:
    print(x)

In [None]:
# find all elements in the list that contains the letter "a"
fruits = ["banana", "apple", "kiwi", "cherry", "mango"]
newlist = []

for x in fruits:
    if "a" in x:
        newlist.append(x)

print(newlist)

In [None]:
# using list comprehension
newlist = [x for x in fruits if "a" in x]

print(newlist)

In [None]:
import time

# Generate a large list of numbers
large_list = list(range(10000000))

# Traditional for loop
start_time = time.time()
squared_numbers_loop = []
for number in large_list:
    squared_numbers_loop.append(number**2)
loop_time = time.time() - start_time
print(f"Time taken using traditional for loop: {loop_time:.4f} seconds")

# List comprehension
start_time = time.time()
squared_numbers_comprehension = [number**2 for number in large_list]
comprehension_time = time.time() - start_time
print(f"Time taken using list comprehension: {comprehension_time:.4f} seconds")

In [None]:
# only accept items that are not "apple"

newlist = [x for x in fruits if x != "apple"]

print(newlist)

In [None]:
# set the values in the new list to upper case
newlist = [x.upper() for x in fruits]

print(newlist)

In [None]:
# set all values in the new list to 'apple'
newlist = ["apple" for x in fruits]

print(newlist)

In [None]:
# return "orange" instead of "banana"

newlist = [x if x != "banana" else "orange" for x in fruits]

print(newlist)

In [None]:
newlist = [x for x in range(10) if x < 5]

print(newlist)

In [None]:
mylist = fruits.copy()
mylist.sort()
print(mylist)

## Python Tuples

A tuple is a collection which is ordered and unchangeable. In Python tuples are written with round brackets.

**Note:** Tuples are immutable, meaning that you can't change the values in a tuple once it's created.

In [None]:
fruitstuple = ("apple", "banana", "cherry", "apple", "cherry")
print(fruitstuple)

In [None]:
print(len(fruitstuple))

In [None]:
# one item tuple
newtuple = ("apple",)
print(type(newtuple))

In [None]:
# tuple constructor
newtuple = tuple(("apple", "banana", "cherry"))
print(newtuple)

In [None]:
# access tuple items is similar to list
fruitstuple = ("apple", "banana", "cherry", "orange", "kiwi", "melon", "mango")
print(fruitstuple[2:5])


In [None]:
# unpacking a tuple
fruits = ("apple", "banana", "cherry")
(green, yellow, red) = fruits

print(green)
print(yellow)
print(red)

In [None]:
# using asterisk to assign the rest of the values to a variable

fruits = ("apple", "banana", "cherry", "strawberry", "raspberry")
(green, yellow, *red) = fruits

print(green)
print(yellow)
print(red)

In [None]:
for x in fruitstuple:
    print(x)

In [None]:
# joining two tuples is similar to joining two lists
tuple1 = ("a", "b", "c")
tuple2 = (1, 2, 3)

tuple3 = tuple1 + tuple2
print(tuple3)

In [None]:
# multiplying tuples is similar to multiplying lists
fruits = ("apple", "banana", "cherry")
mytuple = fruits * 2

print(mytuple)


In [None]:
# generator expression
mygenerator = (x for x in fruitstuple if "a" in x)
# mylist = [x for x in fruitstuple if "a" in x]

# generator is a special type of iterator
# the object is called when it is needed and it is not stored in memory
# so the memory is saved
print(mygenerator)

# to print all the values in the generator
print(list(mygenerator))
# or use a for loop
for x in mygenerator:
    print(x)

In [None]:
# Generate a large list of numbers
large_list = list(range(10000000))

# List comprehension
start_time = time.time()
squared_numbers_comprehension = [number**2 for number in large_list]
comprehension_time = time.time() - start_time
print(f"Time taken using list comprehension: {comprehension_time:.4f} seconds")

# Generator expression
start_time = time.time()
squared_numbers_generator = (number**2 for number in large_list)
generator_time = time.time() - start_time
print(f"Time taken using generator expression: {generator_time:.4f} seconds")


In [None]:
fruitsset = {"apple", "banana", "cherry", "orange", "kiwi", "melon", "mango"}

print(fruitsset)
print(type(fruitsset))

In [None]:
# set constructor
myset = set(("apple", "banana", "cherry"))
print(myset)

In [None]:
for x in fruitsset:
    print(x)

In [None]:
print("banana" in fruitsset)
# print("watermelon" not in fruitsset)

In [None]:
# add an item to a set
thisset = {"apple", "banana", "cherry"}
thisset.add("orange")
print(thisset)


In [None]:
# add multiple items to a set
thisset = {"apple", "banana", "cherry"}
tropical = {"pineapple", "mango", "papaya"}
thisset.update(tropical)
print(thisset)


In [None]:
# or add any iterable to a set
thisset = {"apple", "banana", "cherry"}
mylist = ["kiwi", "orange"]
thisset.update(mylist)
print(thisset)

In [None]:
# union method
thisset = {"apple", "banana", "cherry"}
tropical = {"pineapple", "mango", "papaya"}

newset = thisset.union(tropical)
print(newset)

In [None]:
newset = thisset | tropical

print(newset)

In [None]:
# set intersection
set1 = {"apple", "banana", "cherry"}
set2 = {"google", "microsoft", "apple"}

set3 = set1.intersection(set2)
print(set3)


In [None]:
set3 = set1 & set2
print(set3)

In [None]:
# when joining sets, the values True and 1 are considered the same
# similarly, False and 0 are also considered the same
set1 = {"apple", 1, "banana", 0, "cherry"}
set2 = {False, "google", 1, "apple", 2, True}

set3 = set1.intersection(set2)

print(set3)


In [None]:
# remove an item from a set
# if the item does not exist, it will raise an error
thisset = {"apple", "banana", "cherry"}
thisset.remove("banana")
# thisset.remove("watermelon")
print(thisset)

In [None]:
# if the item does not exist, it will not raise an error
thisset = {"apple", "banana", "cherry"}
thisset.discard("banana")
# thisset.discard("watermelon")
print(thisset)

In [None]:
# remove the last item from the set
# since sets are unordered, the last item is arbitrary
thisset = {"apple", "banana", "cherry"}
thisset.pop()
print(thisset)

In [None]:
thisset = {"apple", "banana", "cherry"}
thisset.clear()
print(thisset)

In [None]:
# set difference
set1 = {"apple", "banana", "cherry"}
set2 = {"google", "microsoft", "apple"}

set3 = set1.difference(set2)

print(set3)


In [None]:
set3 = set1 - set2
print(set3)


In [None]:
# set symmetric difference
# keep the items that are not present in both sets
set3 = set1.symmetric_difference(set2)

print(set3)


In [None]:
set3 = set1 ^ set2
print(set3)


In [None]:
# Generate a large list of numbers
large_list = list(range(10000000))

# Set comprehension
start_time = time.time()
squared_numbers_set = {number**2 for number in large_list}
set_time = time.time() - start_time
print(f"Time taken using set comprehension: {set_time:.4f} seconds")

# Generator expression
start_time = time.time()
squared_numbers_generator = (number**2 for number in large_list)
# To measure the time, we need to iterate through the generator
squared_numbers_generator = list(squared_numbers_generator)
generator_time = time.time() - start_time
print(f"Time taken using generator expression: {generator_time:.4f} seconds")

In [None]:
# dictionary
thisdict = {"brand": "Ford", "model": "Mustang", "year": 1964}
print(thisdict)


In [None]:
# duplicate keys are not allowed
thisdict = {"brand": "Ford", "model": "Mustang", "year": 1964, "year": 2020}
print(thisdict)

In [None]:
# the values in the dictionary can be of any data type
thisdict = {
    "brand": "Ford",
    "electric": False,
    "year": 1964,
    "colors": ["red", "white", "blue"],
}
print(thisdict)

In [None]:
# dict constructor
thisdict = dict(
    brand="Ford",
    model="Mustang",
    year=1964,
    electric=False,
    colors=["red", "white", "blue"],
)
print(thisdict)

In [None]:
# add a new item to the dictionary.
# see the keys list also updates
# this behavior can be seen in values() and items() methods
car = {"brand": "Ford", "model": "Mustang", "year": 1964}
x = car.keys()
print(x)  # before the change
car["color"] = "white"
print(x)  # after the change


In [None]:
print("model" in car)

In [None]:
# change items
# if the key does not exist, it will add the key-value pair
thisdict = {"brand": "Ford", "model": "Mustang", "year": 1964}
thisdict["year"] = 2018
print(thisdict)

In [None]:
thisdict = {"brand": "Ford", "model": "Mustang", "year": 1964}
thisdict.update({"year": 2020})
print(thisdict)

In [None]:
# remove an item
thisdict = {"brand": "Ford", "model": "Mustang", "year": 1964}
thisdict.pop("model")
print(thisdict)

In [None]:
# or use the del keyword
thisdict = {"brand": "Ford", "model": "Mustang", "year": 1964}
del thisdict["model"]
print(thisdict)

In [None]:
# remove the last item
# since dictionaries are unordered, the last item is arbitrary
thisdict = {"brand": "Ford", "model": "Mustang", "year": 1964}
thisdict.popitem()
print(thisdict)

In [None]:
thisdict = {"brand": "Ford", "model": "Mustang", "year": 1964}
thisdict.clear()
print(thisdict)


In [None]:
# loop through a dictionary using keys

thisdict = {"brand": "Ford", "model": "Mustang", "year": 1964}

for x in thisdict:
    print(x)

# for x in thisdict.keys():
#     print(x)

In [None]:
# loop through a dictionary using values
for x in thisdict.values():
    print(x)

In [None]:
# loop through both keys and values
for x, y in thisdict.items():
    print(x, y)

In [None]:
# nested dictionaries
myfamily = {
    "child1": {"name": "Emil", "year": 2004},
    "child2": {"name": "Tobias", "year": 2007},
    "child3": {"name": "Linus", "year": 2011},
}

print(myfamily)

In [None]:
child1 = {"name": "Emil", "year": 2004}
child2 = {"name": "Tobias", "year": 2007}
child3 = {"name": "Linus", "year": 2011}

myfamily = {"child1": child1, "child2": child2, "child3": child3}

print(myfamily)

In [None]:
# access the items in the nested dictionary
print(myfamily["child1"]["name"])

In [None]:
# loop through the nested dictionary
for x, obj in myfamily.items():
    print(x)

    for y in obj:
        print(y + ":", obj[y])


In [64]:
import time
import random

# Generate a large dataset
num_students = 100000
student_ids = [random.randint(100000, 999999) for _ in range(num_students)]
student_names = [f"Student_{i}" for i in range(num_students)]

# List of tuples for student data
student_data_list = list(zip(student_ids, student_names))
# student_data_list

In [65]:
# Dictionary for fast lookups
student_data_dict = dict(zip(student_ids, student_names))
# student_data_dict

In [None]:
# Performance test for list
start_time = time.time()
for student in student_data_list:
    if student[0] in student_ids:
        pass  # Simulate processing
list_time = time.time() - start_time
print(f"Time taken for list processing: {list_time:.4f} seconds")

# Performance test for dictionary
start_time = time.time()
for student_id in student_ids:
    if student_id in student_data_dict:
        pass  # Simulate processing
dict_time = time.time() - start_time
print(f"Time taken for dictionary processing: {dict_time:.4f} seconds")


In [None]:
# Generate a large dataset
large_purchases = [
    {
        "customer_id": random.randint(1, 1000),
        "item": random.choice(["apple", "banana", "orange"]),
    }
    for _ in range(100000)
]

# Measure time for unique customers using a set
start_time = time.time()
unique_customers_large = {purchase["customer_id"] for purchase in large_purchases}
print(
    f"Unique customers ({len(unique_customers_large)}) (set) time: {time.time() - start_time:.4f} seconds"
)

# Measure time for unique customers using a dictionary
start_time = time.time()
unique_customers_dict_large = {
    purchase["customer_id"]: True for purchase in large_purchases
}
print(
    f"Unique customers ({len(unique_customers_dict_large)}) (dict) time: {time.time() - start_time:.4f} seconds"
)
