# Association Rules 

# Apriori 

#### Import libraries 

In [1]:
import pandas as pd 
import numpy as np
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

## Preprocessing Phase : 

### Reading the data : 

In [2]:
df = pd.read_csv("Data/TV shows.csv", header = None )
df.describe

<bound method NDFrame.describe of                  0              1                   2                 3   \
0         Cobra Kai          Lupin          12 Monkeys          Sherlock   
1              Lost      Jack Ryan           The Flash   Game of thrones   
2     Sex Education      Dr. House             Kingdom  The Walking Dead   
3             Ozark  Sex Education         Constantine          Preacher   
4            Naruto            NaN                 NaN               NaN   
...             ...            ...                 ...               ...   
9685      One Piece  The Blacklist  Two and a half men             Lupin   
9686      One Piece      Mr. Robot          Succession             Ozark   
9687      Daredevil       Atypical               Heros               NaN   
9688       Absentia   The Newsroom        The Alienist               NaN   
9689       Atypical            NaN                 NaN               NaN   

                  4                      5           

In [3]:
df.isna().sum()

0        0
1     2133
2     3743
3     5023
4     6012
5     6853
6     7493
7     8026
8     8459
9     8803
10    9046
11    9226
12    9342
13    9418
14    9494
15    9549
16    9595
17    9624
18    9638
19    9652
20    9661
21    9672
22    9676
23    9682
24    9683
25    9683
26    9684
27    9685
28    9686
29    9689
30    9689
31    9689
dtype: int64

In [4]:
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,22,23,24,25,26,27,28,29,30,31
0,Cobra Kai,Lupin,12 Monkeys,Sherlock,,,,,,,...,,,,,,,,,,
1,Lost,Jack Ryan,The Flash,Game of thrones,House of Cards,12 Monkeys,Vikings,Fringe,The Mentalist,The Alienist,...,,,,,,,,,,
2,Sex Education,Dr. House,Kingdom,The Walking Dead,,,,,,,...,,,,,,,,,,
3,Ozark,Sex Education,Constantine,Preacher,Vikings,The Tick,,,,,...,,,,,,,,,,
4,Naruto,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9685,One Piece,The Blacklist,Two and a half men,Lupin,Dark,How I met your mother,,,,,...,,,,,,,,,,
9686,One Piece,Mr. Robot,Succession,Ozark,12 Monkeys,Vikings,The Vampire Diaries,,,,...,,,,,,,,,,
9687,Daredevil,Atypical,Heros,,,,,,,,...,,,,,,,,,,
9688,Absentia,The Newsroom,The Alienist,,,,,,,,...,,,,,,,,,,


### Convert data into a list of lists: 

In [5]:
TVshows = df.values.astype(str).tolist()

### Dropping null values 

In [6]:
def remove_items(test_list, item): ## a function that deletes a given element from a list 
    res = [i for i in test_list if i != item] 
    return res 

In [7]:
## Dropping all null values from each list 
for i in range(len(TVshows)):
    TVshows[i] = remove_items(TVshows[i],'nan')
    TVshows[i] = remove_items(TVshows[i],'Sex Education')
TVshows

[['Cobra Kai', 'Lupin', '12 Monkeys', 'Sherlock'],
 ['Lost',
  'Jack Ryan',
  'The Flash',
  'Game of thrones',
  'House of Cards',
  '12 Monkeys',
  'Vikings',
  'Fringe',
  'The Mentalist',
  'The Alienist',
  'Big Little Lies',
  'Chernobyl'],
 ['Dr. House', 'Kingdom', 'The Walking Dead'],
 ['Ozark', 'Constantine', 'Preacher', 'Vikings', 'The Tick'],
 ['Naruto'],
 [],
 ['Locke & Key'],
 ['Lupin'],
 ['Fringe', 'Shooter', 'How to get away with murder'],
 ['Queen of the South', 'Better Call Saul', 'Peaky Blinders'],
 [],
 ['Atypical', 'Stranger Things', 'Homeland', 'Brooklyn Nine Nine'],
 ['Daredevil', 'Atypical', 'The Walking Dead', 'The Witcher'],
 ['One Piece', 'Queen of the South'],
 ['Demon Slayer',
  'Atypical',
  'Stranger Things',
  'Absentia',
  'The Alienist',
  'Chernobyl'],
 ['Queen of the South',
  'The Blacklist',
  'Succession',
  'Berlin Station',
  'Atypical',
  'Two and a half men',
  'Kingdom',
  'Orphan Black',
  'Shooter',
  'Stranger Things',
  'The Mentalist',
  

### Recreating the dataframe : 

In [8]:
shows = pd.DataFrame({"Watched":TVshows})
pd.options.display.max_colwidth=1000

In [9]:
shows

Unnamed: 0,Watched
0,"[Cobra Kai, Lupin, 12 Monkeys, Sherlock]"
1,"[Lost, Jack Ryan, The Flash, Game of thrones, House of Cards, 12 Monkeys, Vikings, Fringe, The Mentalist, The Alienist, Big Little Lies, Chernobyl]"
2,"[Dr. House, Kingdom, The Walking Dead]"
3,"[Ozark, Constantine, Preacher, Vikings, The Tick]"
4,[Naruto]
...,...
9685,"[One Piece, The Blacklist, Two and a half men, Lupin, Dark, How I met your mother]"
9686,"[One Piece, Mr. Robot, Succession, Ozark, 12 Monkeys, Vikings, The Vampire Diaries]"
9687,"[Daredevil, Atypical, Heros]"
9688,"[Absentia, The Newsroom, The Alienist]"


In [10]:
## In this section we selected the lists that contain at leasy 19 elemnts so we can get proper results : 
shows = shows[shows['Watched'].apply(lambda x: len(x) >= 19)]
shows 

Unnamed: 0,Watched
85,"[Daredevil, The Stranger, Demon Slayer, The Boys, Rick And Morty, Mr. Robot, Succession, Ozark, Berlin Station, Family Guy, The Outsider, Two and a half men, 12 Monkeys, Sopranos, Sherlock, Stranger Things, Absentia, You, Big Little Lies]"
361,"[Hanna, Into the night, The Blacklist, The Boys, Mr. Robot, Ozark, Deception, Doctor Who, The man in the high castle, The Walking Dead, Lost in Space, Mindhunter, Outer Banks, Mare of Easttown, How to get away with murder, Haven, Shadow and Bone, Perception, Startup]"
601,"[Daredevil, The Stranger, Succession, Ozark, Better Call Saul, Berlin Station, Atypical, Emily in Paris, The Flash, Deception, Dr. House, The Wire, Stranger Things, Outer Banks, The Mentalist, Dark, Absentia, Friends, Spartacus, Shadow and Bone, The Night Manager, Bojack Horseman, The Wheel of Time]"
733,"[Daredevil, The Blacklist, Demon Slayer, Grey's Anatomy, Breaking Bad, The Wire, The Walking Dead, Lupin, Only Murders in the building, Sopranos, Sons of Anarchy, Outer Banks, Mare of Easttown, Banshee, Spartacus, Shadow and Bone, Designated Survivor, Perception, Chernobyl, Startup]"
959,"[Daredevil, The Wire, Into the night, Cobra Kai, The Blacklist, Demon Slayer, Mr. Robot, Succession, Person of Interest, Ozark, Jack Ryan, Atypical, Family Guy, Grey's Anatomy, Preacher, Deception, Kingdom, The Walking Dead, Lupin, 12 Monkeys]"
1189,"[Inhumans, Arrow, Demon Slayer, Mr. Robot, Ozark, Jack Ryan, Constantine, Travellers, Atypical, True Detective, The Wire, The Walking Dead, Locke & Key, Vikings, The Umbrella Academy, Archer, Outer Banks, The Mentalist, How to get away with murder, Banshee, Chernobyl]"
1229,"[The Wire, Into the night, The Blacklist, Mr. Robot, Succession, Ozark, Berlin Station, Atypical, Family Guy, Alice in Borderland, The Flash, Doctor Who, Upload, Dr. House, The Wire, The Innocent, Locke & Key, Only Murders in the building, 12 Monkeys, Sopranos, Punisher, Nine Perfect Strangers, Stranger Things, The Mentalist, The Queen's Gambit, Banshee, Shadow and Bone, American Gods]"
1255,"[The Blacklist, Ozark, Atypical, Dr. House, Two and a half men, The Walking Dead, Lupin, Mirzapur, Only Murders in the building, 12 Monkeys, Death Note, Archer, Stranger Things, Outer Banks, Squid Game, Spartacus, Outlander, The Night Manager, Ragnarok]"
1382,"[Daredevil, Demon Slayer, Mr. Robot, Succession, Berlin Station, Constantine, Grey's Anatomy, Preacher, Dr Stone, Dr. House, The Innocent, House of Cards, X-Files, Only Murders in the building, Sopranos, Sons of Anarchy, Shooter, Lost in Space, Black Clover, Dexter, Homeland, Mare of Easttown, Dark, Rome, Shadow and Bone, Schitts Creek, Big Little Lies]"
1812,"[Hanna, The Stranger, Mr. Robot, Ozark, Constantine, Travellers, Atypical, Family Guy, Alice in Borderland, The Flash, The Walking Dead, Lupin, Game of thrones, House of Cards, Shooter, Designated Survivor, Westworld, How I met your mother, Lucifer]"


### Encoding : 

In [11]:
shows_encoded = shows['Watched'].str.join(',').str.get_dummies(',')

In [12]:
shows_encoded

Unnamed: 0,12 Monkeys,24,Absentia,Alice in Borderland,American Gods,Archer,Arrow,Atypical,Banshee,Berlin Station,...,The man in the high castle,Travellers,True Detective,Two and a half men,Upload,Vikings,Westworld,White Collar,X-Files,You
85,1,0,1,0,0,0,0,0,0,1,...,0,0,0,1,0,0,0,0,0,1
361,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
601,0,0,1,0,0,0,0,1,0,1,...,0,0,0,0,0,0,0,0,0,0
733,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
959,1,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
1189,0,0,0,0,0,1,1,1,1,0,...,0,1,1,0,0,1,0,0,0,0
1229,1,0,0,1,1,0,0,1,1,1,...,0,0,0,0,1,0,0,0,0,0
1255,1,0,0,0,0,1,0,1,0,0,...,0,0,0,1,0,0,0,0,0,0
1382,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,1,0
1812,0,0,0,1,0,0,0,1,0,0,...,0,1,0,0,0,0,1,0,0,0


In [13]:
shows_encoded.columns.tolist()

['12 Monkeys',
 '24',
 'Absentia',
 'Alice in Borderland',
 'American Gods',
 'Archer',
 'Arrow',
 'Atypical',
 'Banshee',
 'Berlin Station',
 'Better Call Saul',
 'Big Little Lies',
 'Black Clover',
 'Bojack Horseman',
 'Breaking Bad',
 'Brooklyn Nine Nine',
 'Chernobyl',
 'Cobra Kai',
 'Constantine',
 'Daredevil',
 'Dark',
 'Death Note',
 'Deception',
 'Demon Slayer',
 'Designated Survivor',
 'Dexter',
 'Doctor Who',
 'Dr Stone',
 'Dr. House',
 'Elementary',
 'Emily in Paris',
 'Family Guy',
 'Friends',
 'Fringe',
 'Game of thrones',
 'Goliath',
 "Grey's Anatomy",
 'Grimm',
 'Hanna',
 'Haven',
 'Heros',
 'Homeland',
 'House of Cards',
 'How I met your mother',
 'How to get away with murder',
 'Hunters',
 'Inhumans',
 'Inside Job',
 'Into the night',
 'Jack Ryan',
 'Kingdom',
 'Locke & Key',
 'Loki',
 'Lost',
 'Lost in Space',
 'Lucifer',
 'Lupin',
 'Madmen',
 'Mare of Easttown',
 'Mindhunter',
 'Mirzapur',
 'Mr. Robot',
 'Narcos',
 'Nine Perfect Strangers',
 'One Piece',
 'Only Murde

## Training Phase : 

### Selecting the most frequently apprearing item sets using the Apriori algorithm :

In [14]:
frequent_itemsets = (apriori(shows_encoded,min_support=0.2, use_colnames=True)).sort_values(by='support', ascending=False)
frequent_itemsets



Unnamed: 0,support,itemsets
26,0.738095,(Ozark)
34,0.642857,(The Blacklist)
3,0.571429,(Atypical)
22,0.500000,(Mr. Robot)
119,0.500000,"(Ozark, The Blacklist)"
...,...,...
44,0.214286,"(12 Monkeys, Stranger Things)"
40,0.214286,(Vikings)
76,0.214286,"(Ozark, Chernobyl)"
133,0.214286,"(The Mentalist, The Walking Dead)"


### Applying the association rules modele : 

In [15]:
rules = association_rules(frequent_itemsets, metric='lift')
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(Ozark),(The Blacklist),0.738095,0.642857,0.500000,0.677419,1.053763,0.025510,1.107143,0.194805
1,(The Blacklist),(Ozark),0.642857,0.738095,0.500000,0.777778,1.053763,0.025510,1.178571,0.142857
2,(Atypical),(Ozark),0.571429,0.738095,0.452381,0.791667,1.072581,0.030612,1.257143,0.157895
3,(Ozark),(Atypical),0.738095,0.571429,0.452381,0.612903,1.072581,0.030612,1.107143,0.258373
4,(Ozark),(Mr. Robot),0.738095,0.500000,0.428571,0.580645,1.161290,0.059524,1.192308,0.530303
...,...,...,...,...,...,...,...,...,...,...
459,(Chernobyl),(Ozark),0.285714,0.738095,0.214286,0.750000,1.016129,0.003401,1.047619,0.022222
460,(The Mentalist),(The Walking Dead),0.428571,0.476190,0.214286,0.500000,1.050000,0.010204,1.047619,0.083333
461,(The Walking Dead),(The Mentalist),0.476190,0.428571,0.214286,0.450000,1.050000,0.010204,1.038961,0.090909
462,(Deception),(Mr. Robot),0.476190,0.500000,0.214286,0.450000,0.900000,-0.023810,0.909091,-0.175000


In [16]:
# Sort the rules by support in descending order
rules_df = rules.sort_values(by='lift', ascending=False)
rules_df

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
298,"(The Blacklist, Family Guy)","(Atypical, Berlin Station)",0.333333,0.285714,0.214286,0.642857,2.250000,0.119048,2.000000,0.833333
295,"(Atypical, Berlin Station)","(The Blacklist, Family Guy)",0.285714,0.333333,0.214286,0.750000,2.250000,0.119048,2.666667,0.777778
290,"(Atypical, The Blacklist, Berlin Station)",(Family Guy),0.238095,0.452381,0.214286,0.900000,1.989474,0.106576,5.476190,0.652778
316,(Family Guy),"(Atypical, Ozark, Berlin Station)",0.452381,0.238095,0.214286,0.473684,1.989474,0.106576,1.447619,0.908213
303,(Family Guy),"(Atypical, The Blacklist, Berlin Station)",0.452381,0.238095,0.214286,0.473684,1.989474,0.106576,1.447619,0.908213
...,...,...,...,...,...,...,...,...,...,...
154,(Hanna),(Ozark),0.380952,0.738095,0.238095,0.625000,0.846774,-0.043084,0.698413,-0.226190
448,(Outer Banks),(Ozark),0.357143,0.738095,0.214286,0.600000,0.812903,-0.049320,0.654762,-0.263636
73,(Demon Slayer),(Ozark),0.476190,0.738095,0.285714,0.600000,0.812903,-0.065760,0.654762,-0.305263
449,(Ozark),(Outer Banks),0.738095,0.357143,0.214286,0.290323,0.812903,-0.049320,0.905844,-0.467742


In [17]:
rules_df = rules.sort_values(by='support', ascending=False)
rules_df

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(Ozark),(The Blacklist),0.738095,0.642857,0.500000,0.677419,1.053763,0.025510,1.107143,0.194805
1,(The Blacklist),(Ozark),0.642857,0.738095,0.500000,0.777778,1.053763,0.025510,1.178571,0.142857
2,(Atypical),(Ozark),0.571429,0.738095,0.452381,0.791667,1.072581,0.030612,1.257143,0.157895
3,(Ozark),(Atypical),0.738095,0.571429,0.452381,0.612903,1.072581,0.030612,1.107143,0.258373
4,(Ozark),(Mr. Robot),0.738095,0.500000,0.428571,0.580645,1.161290,0.059524,1.192308,0.530303
...,...,...,...,...,...,...,...,...,...,...
335,(Shadow and Bone),"(Demon Slayer, The Blacklist)",0.476190,0.333333,0.214286,0.450000,1.350000,0.055556,1.212121,0.494949
337,"(Ozark, Demon Slayer)",(The Blacklist),0.285714,0.642857,0.214286,0.750000,1.166667,0.030612,1.428571,0.200000
338,"(Demon Slayer, The Blacklist)",(Ozark),0.333333,0.738095,0.214286,0.642857,0.870968,-0.031746,0.733333,-0.181818
339,(Ozark),"(Demon Slayer, The Blacklist)",0.738095,0.333333,0.214286,0.290323,0.870968,-0.031746,0.939394,-0.361290


def associatonRulesFunction(data=shows_encoded?

### Functions : 

In [18]:
def cleanList(l):
    ## Deleting the useless String parts 
    for i in range(len(l)) :
        l[i]=str(l[i]).replace("frozenset({'",'')
        l[i]=l[i].replace("'})",'')
        l[i]=str(l[i]).replace('frozenset({"','')
        l[i]=l[i].replace('"})','')
        l[i]=l[i].replace('", "',',')
        l[i]=l[i].replace("', '",',')
        l[i]=l[i].replace("', ",',')
        l[i]=l[i].replace(", '",',')
        l[i]=l[i].replace('"','')
    return l;


def Recommended(show,number=4):
    ## A function that will only select the first 4 Series that has the highest support with the desired show 
    desired_antecedents_value = {show}
    desired_rows = rules_df.loc[rules_df['antecedents'] == desired_antecedents_value]

    ## We only choose the lines in the rules table that has only one elemnt 
    filtered_rows = desired_rows[desired_rows['consequents'].apply(lambda x: len(x) == 1)] 
    
    if not filtered_rows.empty:
        first_4_rows = filtered_rows.head(number)
        result_list = first_4_rows['consequents'].tolist()
        
        return cleanList(result_list)


In [19]:
import tkinter as tk
from tkinter import ttk

def on_dropdown_change(event):
    selected_value = dropdown_var.get()
    selected_label.config(text=f"Selected value: {selected_value}")

def apply_function():
    selected_value = dropdown_var.get()
    # Perform your desired operation with the selected value
    result_list = Recommended(selected_value)
    result_label.config(text=f"Result list: {result_list}")
    dropdown['values'] = result_list
    dropdown.set("Select an option")

def reset_values():
    selected_label.config(text="")
    result_label.config(text="")
    dropdown.set("Select an option")
    dropdown['values']=my_list
    
root = tk.Tk()
root.title("Dropdown Menu Example")
my_list =shows_encoded.columns.tolist()

dropdown_var = tk.StringVar()

dropdown = ttk.Combobox(root, textvariable=dropdown_var, values=my_list)
dropdown.set("Select an option")  # Set a default value
dropdown.bind("<<ComboboxSelected>>", on_dropdown_change)


apply_button = tk.Button(root, text="Apply Function", command=apply_function)
reset_button = tk.Button(root, text="Reset", command=reset_values)

# Create labels to display selected value and result list
selected_label = tk.Label(root, text="")
result_label = tk.Label(root, text="")

dropdown.pack(pady=20)
apply_button.pack(pady=10)
reset_button.pack(pady=10)
selected_label.pack()
result_label.pack()

root.mainloop()

### Converting the frequent itemsets into a dataframe and exporting it into a csv 

In [20]:
dataset=frequent_itemsets['itemsets'].tolist()
dataset = cleanList(dataset)
dataset

['Ozark',
 'The Blacklist',
 'Atypical',
 'Mr. Robot',
 'Ozark,The Blacklist',
 'The Walking Dead',
 'Deception',
 'Shadow and Bone',
 'Demon Slayer',
 'Atypical,Ozark',
 'Family Guy',
 'Berlin Station',
 'Ozark,Mr. Robot',
 'Atypical,The Blacklist',
 'The Mentalist',
 'Daredevil',
 'Ozark,Family Guy',
 'Atypical,Family Guy',
 'Ozark,The Walking Dead',
 'Hanna',
 'Stranger Things',
 'The Wire',
 'Ozark,Deception',
 'Ozark,Berlin Station',
 'Queen of the South',
 '12 Monkeys',
 'Outer Banks',
 'Atypical,Ozark,The Blacklist',
 'Demon Slayer,The Blacklist',
 'Cobra Kai',
 'The Blacklist,Family Guy',
 'Mr. Robot,The Blacklist',
 'Ozark,Shadow and Bone',
 'Two and a half men',
 'Atypical,Deception',
 'The Blacklist,Shadow and Bone',
 'The Stranger',
 'Big Little Lies',
 'Dr. House',
 'Atypical,Mr. Robot',
 'Alice in Borderland',
 'Atypical,The Blacklist,Family Guy',
 'Ozark,The Mentalist',
 'The Blacklist,The Walking Dead',
 'Atypical,Ozark,Family Guy',
 'Atypical,The Mentalist',
 'Chernoby

In [21]:
result = (pd.DataFrame({"itemsets": dataset,
                       "support": frequent_itemsets['support']})).reset_index(drop=True)
result.to_csv('./Apriori.csv')
result

Unnamed: 0,itemsets,support
0,Ozark,0.738095
1,The Blacklist,0.642857
2,Atypical,0.571429
3,Mr. Robot,0.500000
4,"Ozark,The Blacklist",0.500000
...,...,...
172,"12 Monkeys,Stranger Things",0.214286
173,Vikings,0.214286
174,"Ozark,Chernobyl",0.214286
175,"The Mentalist,The Walking Dead",0.214286
