In [None]:
# Import required libraries
import pandas as pd
import random
from mlxtend.frequent_patterns import apriori, association_rules

# Define a list of 8 action movies
movies = [
    'John Wick', 'Mad Max', 'The Dark Knight', 'Avengers',
    'Gladiator', 'Inception', 'Die Hard', 'Mission Impossible'
]


In [None]:
# Create 10 fake transactions with 2–5 randomly chosen movies
transactions = []
for _ in range(10):
    transactions.append(random.sample(movies, random.randint(2, 5)))

# Display the transactions
print("Transactions:")
for i, t in enumerate(transactions, 1):
    print(f"Transaction {i}: {t}")


In [None]:
# Convert list of transactions to one-hot encoded DataFrame
encoded = []
for transaction in transactions:
    row = {movie: (movie in transaction) for movie in movies}
    encoded.append(row)

df = pd.DataFrame(encoded)

# Show the encoded DataFrame
print("\nOne-Hot Encoded Data:")
print(df)


In [None]:
# Find frequent itemsets with minimum support of 30%
frequent_itemsets = apriori(df, min_support=0.3, use_colnames=True)

# Display frequent itemsets
print("\nFrequent Itemsets:")
print(frequent_itemsets)


In [None]:
# Generate rules with confidence ≥ 70%
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.7)

# Display selected rule columns
rules = rules[['antecedents', 'consequents', 'support', 'confidence']]
print("\nAssociation Rules (confidence >= 0.7):")
print(rules)


In [None]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

# Step 1: Simulate 10 movie transactions
dataset = [
    ['John Wick', 'Die Hard'],
    ['Avengers', 'The Dark Knight', 'Inception'],
    ['John Wick', 'Mission Impossible', 'Die Hard'],
    ['Gladiator', 'Mad Max', 'Inception'],
    ['Avengers', 'Mad Max'],
    ['John Wick', 'Mission Impossible', 'Avengers'],
    ['The Dark Knight', 'Inception', 'Die Hard'],
    ['Gladiator', 'Die Hard', 'John Wick'],
    ['Mission Impossible', 'The Dark Knight', 'Avengers'],
    ['Inception', 'Mission Impossible', 'Mad Max']
]

# Step 2: One-hot encode
all_movies = sorted(set(movie for t in dataset for movie in t))
encoded_data = [{movie: (movie in t) for movie in all_movies} for t in dataset]
df = pd.DataFrame(encoded_data)

# Step 3: Find frequent itemsets with min support = 0.3
frequent_itemsets = apriori(df, min_support=0.3, use_colnames=True)

# Step 4: Generate rules with min confidence = 0.7
rules = association_rules(frequent_itemsets, metric='confidence', min_threshold=0.7)

# Step 5: Display results
print("Frequent Itemsets:\n", frequent_itemsets)
print("\nAssociation Rules:\n", rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']])


# Sample rule explanation
# Rule 1: If someone watches John Wick and Mission Impossible → 75% chance they also watch Die Hard.
# Meaning: Fans of fast-paced spy action movies are also likely to enjoy older classics.

# Rule 2: If someone watches Inception and The Dark Knight → high chance they also watch Avengers.
# Meaning: Viewers of smart action thrillers often enjoy superhero movies.



Frequent Itemsets:
    support               itemsets
0      0.4             (Avengers)
1      0.4             (Die Hard)
2      0.4            (Inception)
3      0.4            (John Wick)
4      0.3              (Mad Max)
5      0.4   (Mission Impossible)
6      0.3      (The Dark Knight)
7      0.3  (Die Hard, John Wick)

Association Rules:
    antecedents  consequents  support  confidence   lift
0   (Die Hard)  (John Wick)      0.3        0.75  1.875
1  (John Wick)   (Die Hard)      0.3        0.75  1.875
