# FP-Growth

#### Import libraries 

In [1]:
import pandas as pd 
import numpy as np
from mlxtend.frequent_patterns import fpgrowth
from mlxtend.frequent_patterns import association_rules

## Preprocessing Phase : 

### Reading the data : 

In [2]:
df = pd.read_csv("Data/TV shows.csv", header = None )

### Convert data into a list of lists: 

In [3]:
TVshows = df.values.astype(str).tolist()

### Dropping null values 

In [4]:
def remove_items(test_list, item): ## a function that deletes a given element from a list 
    res = [i for i in test_list if i != item] 
    return res 

In [5]:
## Dropping all null values from each list 
for i in range(len(TVshows)):
    TVshows[i] = remove_items(TVshows[i],'nan')
    TVshows[i] = remove_items(TVshows[i],'Sex Education')

### Recreating the dataframe : 

In [6]:
shows = pd.DataFrame({"Watched":TVshows})
pd.options.display.max_colwidth=1000

### Transactions Selection : 

In [7]:
## In this section we selected the lists that contain at leasy 19 elemnts so we can get proper results : 
shows = shows[shows['Watched'].apply(lambda x: len(x) >= 19)]

### Encoding : 

In [8]:
## Encoding the data using dummy encoding : 
shows_encoded = shows['Watched'].str.join(',').str.get_dummies(',')

## Training Phase : 

### Selecting the most frequently apprearing item sets using the FPGrowth algorithm :

In [9]:
frequent_itemsets = (fpgrowth(shows_encoded,min_support=0.2, use_colnames=True)).sort_values(by='support', ascending=False)



### Applying the association rules modele : 

In [10]:
rules = association_rules(frequent_itemsets, metric='lift')

In [11]:
# Sort the rules by support in descending order
rules_df = rules.sort_values(by='lift', ascending=False)
rules_df

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
396,"(The Blacklist, Family Guy)","(Atypical, Berlin Station)",0.333333,0.285714,0.214286,0.642857,2.250000,0.119048,2.000000,0.833333
397,"(Atypical, Berlin Station)","(The Blacklist, Family Guy)",0.285714,0.333333,0.214286,0.750000,2.250000,0.119048,2.666667,0.777778
386,(Family Guy),"(Ozark, Atypical, Berlin Station)",0.452381,0.238095,0.214286,0.473684,1.989474,0.106576,1.447619,0.908213
390,"(The Blacklist, Atypical, Berlin Station)",(Family Guy),0.238095,0.452381,0.214286,0.900000,1.989474,0.106576,5.476190,0.652778
379,"(Ozark, Atypical, Berlin Station)",(Family Guy),0.238095,0.452381,0.214286,0.900000,1.989474,0.106576,5.476190,0.652778
...,...,...,...,...,...,...,...,...,...,...
192,(Hanna),(Ozark),0.380952,0.738095,0.238095,0.625000,0.846774,-0.043084,0.698413,-0.226190
80,(Demon Slayer),(Ozark),0.476190,0.738095,0.285714,0.600000,0.812903,-0.065760,0.654762,-0.305263
419,(Outer Banks),(Ozark),0.357143,0.738095,0.214286,0.600000,0.812903,-0.049320,0.654762,-0.263636
81,(Ozark),(Demon Slayer),0.738095,0.476190,0.285714,0.387097,0.812903,-0.065760,0.854637,-0.467742


### Functions : 

In [12]:
##  function for cleaning the results : 
def cleanList(l):
    ## Deleting the useless String parts 
    for i in range(len(l)) :
        l[i]=str(l[i]).replace("frozenset({'",'')
        l[i]=l[i].replace("'})",'')
        l[i]=str(l[i]).replace('frozenset({"','')
        l[i]=l[i].replace('"})','')
        l[i]=l[i].replace('", "',',')
        l[i]=l[i].replace("', '",',')
        l[i]=l[i].replace("', ",',')
        l[i]=l[i].replace(", '",',')
        l[i]=l[i].replace('"','')
    return l;


## Function to recommand a certain number of shows 
def Recommended(show,number=4):
    ## A function that will only select the first 4 Series that has the highest support with the desired show 
    desired_antecedents_value = {show}
    desired_rows = rules_df.loc[rules_df['antecedents'] == desired_antecedents_value]

    ## We only choose the lines in the rules table that has only one elemnt 
    filtered_rows = desired_rows[desired_rows['consequents'].apply(lambda x: len(x) == 1)] 
    
    if not filtered_rows.empty:
        first_4_rows = filtered_rows.head(n)
        result_list = first_4_rows['consequents'].tolist()
        
        return cleanList(result_list)


### Converting the frequent itemsets into a dataframe and exporting it into a csv 

In [13]:
dataset=frequent_itemsets['itemsets'].tolist()
dataset = cleanList(dataset)
dataset


['Ozark',
 'The Blacklist',
 'Atypical',
 'The Blacklist,Ozark',
 'Mr. Robot',
 'Deception',
 'The Walking Dead',
 'Shadow and Bone',
 'Demon Slayer',
 'Ozark,Atypical',
 'Family Guy',
 'Berlin Station',
 'The Blacklist,Atypical',
 'The Mentalist',
 'Mr. Robot,Ozark',
 'Hanna',
 'Ozark,The Walking Dead',
 'Family Guy,Ozark',
 'Family Guy,Atypical',
 'Daredevil',
 'Stranger Things',
 'Outer Banks',
 '12 Monkeys',
 'Ozark,Deception',
 'Ozark,Berlin Station',
 'The Wire',
 'Queen of the South',
 'Two and a half men',
 'Demon Slayer,The Blacklist',
 'The Blacklist,Shadow and Bone',
 'The Blacklist,Ozark,Atypical',
 'Cobra Kai',
 'The Blacklist,Mr. Robot',
 'The Blacklist,Family Guy',
 'Atypical,Deception',
 'Ozark,Shadow and Bone',
 'Family Guy,Ozark,Atypical',
 'The Blacklist,The Walking Dead',
 'Mr. Robot,Atypical',
 'Alice in Borderland',
 'The Blacklist,Family Guy,Atypical',
 'The Mentalist,Atypical',
 'Big Little Lies',
 'Ozark,The Mentalist',
 'The Stranger',
 'Dr. House',
 '12 Monke

In [14]:
result = (pd.DataFrame({"itemsets": dataset,
                       "support": frequent_itemsets['support']})).reset_index(drop=True)
result.to_csv('./FPgrowf.csv')
result

Unnamed: 0,itemsets,support
0,Ozark,0.738095
1,The Blacklist,0.642857
2,Atypical,0.571429
3,"The Blacklist,Ozark",0.500000
4,Mr. Robot,0.500000
...,...,...
172,"Demon Slayer,Deception",0.214286
173,One Piece,0.214286
174,"Mr. Robot,Deception",0.214286
175,"Demon Slayer,Mr. Robot",0.214286
