In [49]:
#To avoid warnings
import warnings
warnings.filterwarnings("ignore")

## Loading the dataset

In [50]:
transactions = []
with open('TV Shows - Association Rule Learning.csv', 'r', encoding='utf-8') as file:
    for i, line in enumerate(file):
        # Split by ',' and remove empty/space-only entries
        items = [show.strip() for show in line.strip().split(',') if show.strip()]
        transactions.append(items)
        if i < 10:
            print(transactions[i])


['Cobra Kai', 'Lupin', '12 Monkeys', 'Sherlock']
['Lost', 'Jack Ryan', 'The Flash', 'Game of thrones', 'House of Cards', '12 Monkeys', 'Vikings', 'Fringe', 'The Mentalist', 'The Alienist', 'Big Little Lies', 'Chernobyl']
['Sex Education', 'Dr. House', 'Kingdom', 'The Walking Dead']
['Ozark', 'Sex Education', 'Constantine', 'Preacher', 'Vikings', 'The Tick']
['Naruto']
['Sex Education']
['Locke & Key']
['Lupin']
['Fringe', 'Shooter', 'How to get away with murder']
['Queen of the South', 'Better Call Saul', 'Peaky Blinders']


## Convert the transactions to One-Hot Encoded DataFrame
### This step prepares the data in a format required by the Apriori algorithm in mlxtend—all TV shows become columns (True/False for each transaction).

In [58]:
import pandas as pd

# All unique shows
all_items = set(item for t in transactions for item in t)

# Transaction to one-hot DataFrame
df = pd.DataFrame([{item: (item in t) for item in all_items} for t in transactions])

print(df.head())
print(df.shape)


   Altered Carbon  The Innocent    You  House of Cards  Succession  Riverdale  \
0           False         False  False           False       False      False   
1           False         False  False            True       False      False   
2           False         False  False           False       False      False   
3           False         False  False           False       False      False   
4           False         False  False           False       False      False   

   Game of thrones  Elementary  Family Guy  Designated Survivor  ...  \
0            False       False       False                False  ...   
1             True       False       False                False  ...   
2            False       False       False                False  ...   
3            False       False       False                False  ...   
4            False       False       False                False  ...   

   The Originals   Loki  Atypical  Mirzapur  Supernatural  Heros  Travellers  \


## Find frequent itemsets with apriori algorithm

In [59]:
import pandas as pd

# All unique shows
all_items = set(item for t in transactions for item in t)

# Transaction to one-hot DataFrame
df = pd.DataFrame([{item: (item in t) for item in all_items} for t in transactions])

print(df.head())
print(df.shape)


   Altered Carbon  The Innocent    You  House of Cards  Succession  Riverdale  \
0           False         False  False           False       False      False   
1           False         False  False            True       False      False   
2           False         False  False           False       False      False   
3           False         False  False           False       False      False   
4           False         False  False           False       False      False   

   Game of thrones  Elementary  Family Guy  Designated Survivor  ...  \
0            False       False       False                False  ...   
1             True       False       False                False  ...   
2            False       False       False                False  ...   
3            False       False       False                False  ...   
4            False       False       False                False  ...   

   The Originals   Loki  Atypical  Mirzapur  Supernatural  Heros  Travellers  \


## Mine Association Rules from Frequent Itemsets
### This will give the insights into which combinations of shows reliably predict the presence of other shows—useful for recommendations and understanding show clusters

In [60]:
from mlxtend.frequent_patterns import association_rules

rules = association_rules(frequent_itemsets, metric='confidence', min_threshold=0.1)
print(rules[['antecedents', 'consequents', 'support', 'confidence']].sort_values('confidence', ascending=False).head(10))


            antecedents           consequents   support  confidence
5            (Atypical)       (Sex Education)  0.056140    0.401180
3               (Ozark)       (Sex Education)  0.075129    0.387853
1  (Two and a half men)       (Sex Education)  0.056553    0.308038
2       (Sex Education)               (Ozark)  0.075129    0.293904
0       (Sex Education)  (Two and a half men)  0.056553    0.221235
4       (Sex Education)            (Atypical)  0.056140    0.219621


## Make Recommendations Based on Association Rules
### *For example*: Suppose the user watched 'Sex Education'. Here’s how to recommend additional shows:

In [62]:
watched = {'Sex Education'}
candidate_rules = rules[rules['antecedents'].apply(lambda x: watched.issuperset(x))]
recommendations = set()
for s in candidate_rules['consequents']:
    recommendations.update(s)
recommendations -= watched
print("Recommended shows:", recommendations)


Recommended shows: {'Ozark', 'Two and a half men', 'Atypical'}


## Find Show Clusters (Frequent itemsets with size ≥ 2)

In [63]:
# Show clusters: itemsets with 2 or more shows, sorted by support
clusters = frequent_itemsets[frequent_itemsets['itemsets'].apply(lambda x: len(x) >= 2)]
print(clusters.sort_values('support', ascending=False).head(10))

     support                             itemsets
30  0.075129               (Sex Education, Ozark)
29  0.056553  (Sex Education, Two and a half men)
31  0.056140            (Sex Education, Atypical)


## Analyze User Preferences/Trends

In [64]:
# Find most popular shows and combinations
popular_shows = df.sum().sort_values(ascending=False)
print(popular_shows.head(10))

# Show top frequent pair/group clusters again
print(clusters.head(10))


Sex Education         2477
Ozark                 1877
Two and a half men    1779
Outer Banks           1696
Atypical              1356
Stranger Things       1070
Mr. Robot             1060
The Blacklist         1016
Lucifer                949
Daredevil              910
dtype: int64
     support                             itemsets
29  0.056553  (Sex Education, Two and a half men)
30  0.075129               (Sex Education, Ozark)
31  0.056140            (Sex Education, Atypical)


### Top single shows

In [66]:
popular_shows = df.sum().sort_values(ascending=False)
print("Most Popular Shows:")
print(popular_shows.head(10))

Most Popular Shows:
Sex Education         2477
Ozark                 1877
Two and a half men    1779
Outer Banks           1696
Atypical              1356
Stranger Things       1070
Mr. Robot             1060
The Blacklist         1016
Lucifer                949
Daredevil              910
dtype: int64


### Top Show Combinations (Clusters)

In [67]:
# Frequent itemsets with ≥2 shows, sorted by support
clusters = frequent_itemsets[frequent_itemsets['itemsets'].apply(lambda x: len(x) >= 2)]
print("Most Common Combinations:")
print(clusters.sort_values('support', ascending=False).head(10))


Most Common Combinations:
     support                             itemsets
30  0.075129               (Sex Education, Ozark)
29  0.056553  (Sex Education, Two and a half men)
31  0.056140            (Sex Education, Atypical)
