# Python Set

In [1]:
my_set = {1, 2, 3, 4, 4, 2}
print(my_set)  # Output: {1, 2, 3, 4}

{1, 2, 3, 4}


Creating Sets:

In [3]:
# Using curly braces
chai = {'green', 'black', 'herbal', 'chai', 'masala', 'ginger', 'lemon'}

# Using set() constructor (useful for empty sets)
empty_set = set()  # NOT {} —> that creates an empty dictionary

Why Use Sets

In [4]:
setOne = {1, 2, 3, 4, 5}
setTwo = {4, 5, 6, 7, 8, 9}

# Length of a set
print("Length of Set One:", len(setOne)) # Output: 5
print("Length of Set Two:", len(setTwo)) # Output: 6

# Union of two sets
print("Union:", setOne | setTwo) # Output: {1, 2, 3, 4, 5, 6, 7, 8, 9}

# Intersection of two sets
print("Intersection:", setOne & setTwo) # Output: {4, 5}

# Difference between two sets
print("Difference (SetOne - SetTwo):", setOne - setTwo) # Output: {1, 2, 3}
print("Difference (SetTwo - SetOne):", setTwo - setOne) # Output: {8, 9, 6, 7}

# Symmetric difference between two sets
print("Symmetric Difference:", setOne ^ setTwo) # Output: {1, 2, 3, 6, 7, 8, 9}

# Check if an element is in a set
print("Is 1 in Set One:", 1 in setOne) # Output: True
print("Is 1 in Set Two:", 1 in setTwo) # Output: False

Length of Set One: 5
Length of Set Two: 6
Union: {1, 2, 3, 4, 5, 6, 7, 8, 9}
Intersection: {4, 5}
Difference (SetOne - SetTwo): {1, 2, 3}
Difference (SetTwo - SetOne): {8, 9, 6, 7}
Symmetric Difference: {1, 2, 3, 6, 7, 8, 9}
Is 1 in Set One: True
Is 1 in Set Two: False


Example: List vs Set Lookup Time

In [5]:
# List (O(n) lookup time)
nums_list = list(range(1000000))
print(999999 in nums_list)  # Slower

# Set (O(1) average lookup time)
nums_set = set(range(1000000))
print(999999 in nums_set)  # Much faster

True
True


Important Notes About Hashing in Sets

In [6]:
my_set = {1, "hello", (2, 3)}  # 
my_set = {[1, 2]}              # TypeError: unhashable type: 'list'

TypeError: unhashable type: 'list'

# Common Use Cases for Python Sets

1. Removing Duplicates from a List

In [2]:
tea_varities = ['green', 'black', 'chai', 'green', 'ginger', 'black', 'masala', 'ginger', 'lemon']
unique_names = set(tea_varities)
print(unique_names)  


{'ginger', 'green', 'black', 'lemon', 'masala', 'chai'}


2. Fast Membership Testing



In [3]:
check_varities = ['green', 'black', 'chai', 'green', 'ginger', 'black', 'masala', 'ginger', 'lemon']
print("chai" in check_varities)   # True
print("coffee" in check_varities)  # False


True
False


3. Set Operations: Union, Intersection, Difference



In [4]:
a = {1, 2, 3}
b = {3, 4, 5}

print(a | b)   # Union: {1, 2, 3, 4, 5}
print(a & b)   # Intersection: {3}
print(a - b)   # Difference: {1, 2}
print(a ^ b)   # Symmetric Difference: {1, 2, 4, 5}


{1, 2, 3, 4, 5}
{3}
{1, 2}
{1, 2, 4, 5}



4. Filtering Data Efficiently



In [5]:
visited_pages = {"home", "about", "contact"}
all_pages = ["home", "products", "about", "blog"]

# Find new (unvisited) pages
new_pages = [page for page in all_pages if page not in visited_pages]
print(new_pages)  # Output: ['products', 'blog']

['products', 'blog']


5. Finding Duplicates

In [7]:
items = [1, 2, 3, 2, 4, 3, 5]
seen = set()
duplicates = set()

for item in items:
    if item in seen:
        duplicates.add(item)
    else:
        seen.add(item)

print(duplicates)  # Output: {2, 3}

{2, 3}


6. Set Comprehensions

In [8]:
squares = {x*x for x in range(5)}
print(squares)  # Output: {0, 1, 4, 9, 16}

{0, 1, 4, 9, 16}


# How Are Sets Used in the Industry?

1. Web Development & APIs

In [10]:
allowed_roles = {"admin", "editor", "moderator"}
if user_role in allowed_roles:
    grant_access()

NameError: name 'user_role' is not defined

2. Cybersecurity

In [11]:
blocked_ips = {"192.168.1.100", "10.0.0.5"}
if request_ip in blocked_ips:
    block_request()

NameError: name 'request_ip' is not defined

3. Data Science & Analytics

In [12]:
sales_2023 = {"Alice", "Bob", "Charlie"}
sales_2024 = {"Bob", "Diana"}

repeat_customers = sales_2023 & sales_2024  # Intersection

4. E-commerce & Recommendation Engines

In [13]:
user1_likes = {"shoes", "jeans", "jackets"}
user2_likes = {"jeans", "hats", "sneakers"}

similar_items = user1_likes & user2_likes

5. Email and Communication Systems

In [14]:
spam_keywords = {"win", "free", "urgent", "money"}
if any(word in spam_keywords for word in email_words):
    flag_as_spam()

NameError: name 'email_words' is not defined

6. Supply Chain and Inventory Systems

In [15]:
scanned_items = set()
if item_id not in scanned_items:
    scanned_items.add(item_id)

NameError: name 'item_id' is not defined

# When Not to Use Sets in Python

1. When You Need to Preserve Order

In [16]:
my_list = ["a", "b", "c"]
print(my_list[0])  # 'a'

my_set = {"a", "b", "c"}
# print(my_set[0])  # Error: sets are unordered and unindexed

a


2. When You Need Duplicates

In [17]:
items = ["apple", "apple", "banana"]
my_set = set(items)
print(my_set)  # Output: {'apple', 'banana'} — one "apple" is removed!

{'banana', 'apple'}


4. When You Need to Store Unhashable (Mutable) Types

In [18]:
my_set = set()
# my_set.add([1, 2, 3])  # TypeError: unhashable type: 'list'