In [None]:
pip install mlxtend

In [1]:
#Importing the libraries
import pandas as pd
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
import warnings

#Suppressing deprecation warnings
warnings.filterwarnings ("ignore")


In [4]:
#Reading the dataset
df = pd.read_csv(r"C:\Users\DELL\Downloads\Transaction_List.csv")


In [5]:
df.head()

Unnamed: 0,InvoiceId,Country,TrackName
0,1,Germany,Balls to the Wall
1,1,Germany,Restless and Wild
2,2,Norway,Put The Finger On You
3,2,Norway,Inject The Venom
4,2,Norway,Evil Walks


In [6]:
#Inspecting the shape of the data
df.shape

(2240, 3)

In [7]:
#Inspecting the datatype
df.dtypes

InvoiceId     int64
Country      object
TrackName    object
dtype: object

In [8]:
#Checking for presence of missing values
df.isnull().sum()

InvoiceId    0
Country      0
TrackName    0
dtype: int64

In [9]:
#importing re library for regular expressions
import re

In [10]:

#Removing extra white space in track name
df['TrackName'] = df['TrackName'].astype('str').apply(lambda x: re.sub(r'\s+','',x).strip())

In [11]:
df

Unnamed: 0,InvoiceId,Country,TrackName
0,1,Germany,BallstotheWall
1,1,Germany,RestlessandWild
2,2,Norway,PutTheFingerOnYou
3,2,Norway,InjectTheVenom
4,2,Norway,EvilWalks
...,...,...,...
2235,411,Finland,LookingForLove
2236,411,Finland,SweetLadyLuck
2237,411,Finland,FeirinhadaPavuna/LuzdoRepente/BagaçodaLaranja
2238,411,Finland,Sambaprasmoças


In [12]:
#Checking the number of unque countries to decide which to focus on
print('Total number of countries:', df['Country'].nunique())
df['Country'].value_counts(normalize = True).head(24)

Total number of countries: 24


Country
USA               0.220536
Canada            0.135714
France            0.084821
Brazil            0.084821
Germany           0.067857
United Kingdom    0.050893
Czech Republic    0.033929
Portugal          0.033929
India             0.033036
Australia         0.016964
Chile             0.016964
Ireland           0.016964
Norway            0.016964
Netherlands       0.016964
Spain             0.016964
Sweden            0.016964
Belgium           0.016964
Finland           0.016964
Denmark           0.016964
Italy             0.016964
Poland            0.016964
Austria           0.016964
Hungary           0.016964
Argentina         0.016964
Name: proportion, dtype: float64

In [13]:
#Focusing on the entire data
grouped_df = (df.groupby(['InvoiceId', 'TrackName'])
                 .size() #Counts of transaction
                 .unstack() #pivot the TrackName to column
                 .fillna(0) #fills cells with no transaction with zero i.e empty cells
                 .reset_index()
                 .set_index('InvoiceId') #set the InvoiceId as index of dataframe
                 )


In [14]:
#Focusing on one geography at a time to easily identify customer peference by country
USA_grouped_df = (df[df['Country']=='USA']
                 .groupby(['InvoiceId', 'TrackName'])
                 .size() #Counts of transaction
                 .unstack() #pivot the TrackName to column
                 .fillna(0) #fills cells with no transaction with zero i.e empty cells
                 .reset_index()
                 .set_index('InvoiceId') #set the InvoiceId as index of dataframe
                 )
Canada_grouped_df = (df[df['Country']=='Canada']
                 .groupby(['InvoiceId', 'TrackName'])
                 .size() #Counts of transaction
                 .unstack() #pivot the TrackName to column
                 .fillna(0) #fills cells with no transaction with zero i.e empty cells
                 .reset_index()
                 .set_index('InvoiceId') #set the InvoiceId as index of dataframe
                 )
France_grouped_df = (df[df['Country']=='France']
                 .groupby(['InvoiceId', 'TrackName'])
                 .size() #Counts of transaction
                 .unstack() #pivot the TrackName to column
                 .fillna(0) #fills cells with no transaction with zero i.e empty cells
                 .reset_index()
                 .set_index('InvoiceId') #set the InvoiceId as index of dataframe
                 )
Brazil_grouped_df = (df[df['Country']=='Brazil']
                 .groupby(['InvoiceId', 'TrackName'])
                 .size() #Counts of transaction
                 .unstack() #pivot the TrackName to column
                 .fillna(0) #fills cells with no transaction with zero i.e empty cells
                 .reset_index()
                 .set_index('InvoiceId') #set the InvoiceId as index of dataframe
                 )
Germany_grouped_df = (df[df['Country']=='Germany']
                 .groupby(['InvoiceId', 'TrackName'])
                 .size() #Counts of transaction
                 .unstack() #pivot the TrackName to column
                 .fillna(0) #fills cells with no transaction with zero i.e empty cells
                 .reset_index()
                 .set_index('InvoiceId') #set the InvoiceId as index of dataframe
                 )
UK_grouped_df = (df[df['Country']=='United Kingdom']
                 .groupby(['InvoiceId', 'TrackName'])
                 .size() #Counts of transaction
                 .unstack() #pivot the TrackName to column
                 .fillna(0) #fills cells with no transaction with zero i.e empty cells
                 .reset_index()
                 .set_index('InvoiceId') #set the InvoiceId as index of dataframe
                 )

In [15]:
USA_grouped_df.head()

TrackName,"""?""",05-PhantomoftheOpera,14Years,"ABenihanaChristmas,Pts.1&2",AFestaDoSantoReis,AKindOfMagic,AMelhorForma,"AMidsummerNight'sDream,Op.61IncidentalMusic:No.7Notturno",ANovidade(Live),APaz,...,YouKeepOnMoving,YouKnowI'mNoGood,YouKnowI'mNoGood(feat.GhostfaceKillah),YouOughtaKnow,YouReallyGotMe,YourTimeHasCome,Zambação,ZecaVioleiro,Zooropa,[Untitled]
InvoiceId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
13,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
14,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
15,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
16,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [16]:
Canada_grouped_df.head()

TrackName,A.N.D.R.O.T.A.Z.,AMoçaeaChuva,ANoiteDoMeuBem,Ali,Alive,AllIReallyWant,AllMyLove,AmIEvil?,AmericanWoman,AndoMeioDesligado,...,WildSide,WomanIsTheNiggerOfTheWorld(AoVivo),WonderfulTonight,Wrathchild,You'reMyBestFriend,YouAreTheEverything,YouLearn,YourTimeIsGonnaCome,Álibi,Óculos
InvoiceId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
18,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
27,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
36,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
47,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [17]:
France_grouped_df.head()

TrackName,'RoundMidnight,(WishICould)Hideaway,...AndJusticeForAll,ABanda,AManAndAWoman,ASmallVictory,AStatistic,ASua,Acelerou,"ActIV,Symphony",...,WelcometotheJungle,WhatADay,WhiskeyInTheJar,Who'llStopTheRain,WholeLottaLove,WomenInUniform,Xanadu,Yahweh,YesterdayToTomorrow,YourTimeIsGonnaCome
InvoiceId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
8,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
19,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
31,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
74,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


In [18]:
Brazil_grouped_df.head()

TrackName,1/2Full,2MinutesToMidnight,ACorDoSol,ACura,AMeninaDança,"Abraham,MartinAndJohn",AcesHigh,AdmirávelGadoNovo,AllAlongTheWatchtower,AllIWantIsYou,...,VamoBatêLata,Vavoom:TedTheMechanic,VoceNaoEntendeNada-Cotidiano,WantedDreadAndAlive,WhyGo,X-92001,You'reMyBestFriend,You'veBeenALongTimeComing,YourBlueRoom,ÁguadeBeber
InvoiceId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
34,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
35,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
57,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
58,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [19]:
Germany_grouped_df.head()

TrackName,01-Prowler,ADayIntheLife,ATouchAway,AfraidToShootStrangers,Alberta,AllStar,AmanhãNãoSeSabe,AtrasDaPorta,Azul,Açai,...,VôoSobreoHorizonte,WalkingAfterYou,We'reGonnaGroove,WhenMyLeftEyeJumps,Whiplash,WorldWideSuicide,YeartotheDay,YouCan'tDoitRight(WiththeOneYouLove),YouShookMe(2),Álibi
InvoiceId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
12,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
29,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [20]:
UK_grouped_df.head()

TrackName,2X4,ACorDoSol,Ain'tMyBitch,Aquilo,AreWeTheWaiting,AssaltaramAGramática,AssimCaminhaAHumanidade,BleedingMe,BlindMan,BlueRythmFantasy,...,VouPraAi,WakeUp,Walter'sWalk,Waterhole(ExpressoBongo),WhenIHadYourLove,WhereEaglesDare,WhereTheRiverGoes,WholeLottaLove(Medley),Zither,[JustLike]StartingOver
InvoiceId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
11,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
20,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
43,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
54,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0
109,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [21]:
#Converting all positive values to 1 and anything less to 0 as 0
def custom_encoding(x):
    if x>=1:
        return 1
    if x<=0:
        return 0


In [22]:
grouped_df = grouped_df.applymap(custom_encoding)

In [23]:
grouped_df

TrackName,"""?""",#9Dream,'RoundMidnight,(Anesthesia)PullingTeeth,(WhiteMan)InHammersmithPalais,(WishICould)Hideaway,...AndFound,...AndJusticeForAll,01-Prowler,04-RunningFree,...,ZéTrindade,[JustLike]StartingOver,[Untitled],ÀsVezes,ÁguaEFogo,ÁguadeBeber,Álibi,ÉFogo,"Étude1,InCMajor-Preludio(Presto)-Liszt",Óculos
InvoiceId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
408,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
409,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
410,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
411,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [24]:
USA_grouped_df = USA_grouped_df.applymap(custom_encoding)
Canada_grouped_df = Canada_grouped_df.applymap(custom_encoding)
France_grouped_df = France_grouped_df.applymap(custom_encoding)
Brazil_grouped_df = Brazil_grouped_df.applymap(custom_encoding)
Germany_grouped_df = Germany_grouped_df.applymap(custom_encoding)
UK_grouped_df = UK_grouped_df.applymap(custom_encoding)

In [23]:
USA_grouped_df

TrackName,"""?""",05-PhantomoftheOpera,14Years,"ABenihanaChristmas,Pts.1&2",AFestaDoSantoReis,AKindOfMagic,AMelhorForma,"AMidsummerNight'sDream,Op.61IncidentalMusic:No.7Notturno",ANovidade(Live),APaz,...,YouKeepOnMoving,YouKnowI'mNoGood,YouKnowI'mNoGood(feat.GhostfaceKillah),YouOughtaKnow,YouReallyGotMe,YourTimeHasCome,Zambação,ZecaVioleiro,Zooropa,[Untitled]
InvoiceId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
13,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
14,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
15,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
16,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
397,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
405,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
406,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
407,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [24]:
Canada_grouped_df 

TrackName,A.N.D.R.O.T.A.Z.,AMoçaeaChuva,ANoiteDoMeuBem,Ali,Alive,AllIReallyWant,AllMyLove,AmIEvil?,AmericanWoman,AndoMeioDesligado,...,WildSide,WomanIsTheNiggerOfTheWorld(AoVivo),WonderfulTonight,Wrathchild,You'reMyBestFriend,YouAreTheEverything,YouLearn,YourTimeIsGonnaCome,Álibi,Óculos
InvoiceId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
18,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
27,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
36,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
47,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
48,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
49,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
50,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
61,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
72,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [25]:
France_grouped_df

TrackName,'RoundMidnight,(WishICould)Hideaway,...AndJusticeForAll,ABanda,AManAndAWoman,ASmallVictory,AStatistic,ASua,Acelerou,"ActIV,Symphony",...,WelcometotheJungle,WhatADay,WhiskeyInTheJar,Who'llStopTheRain,WholeLottaLove,WomenInUniform,Xanadu,Yahweh,YesterdayToTomorrow,YourTimeIsGonnaCome
InvoiceId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
8,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
19,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
31,0,0,0,0,0,1,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
74,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
83,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
84,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
105,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
106,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
107,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0


In [26]:
Brazil_grouped_df

TrackName,1/2Full,2MinutesToMidnight,ACorDoSol,ACura,AMeninaDança,"Abraham,MartinAndJohn",AcesHigh,AdmirávelGadoNovo,AllAlongTheWatchtower,AllIWantIsYou,...,VamoBatêLata,Vavoom:TedTheMechanic,VoceNaoEntendeNada-Cotidiano,WantedDreadAndAlive,WhyGo,X-92001,You'reMyBestFriend,You'veBeenALongTimeComing,YourBlueRoom,ÁguadeBeber
InvoiceId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
25,0,0,0,0,0,0,0,1,0,0,...,0,1,0,0,0,0,0,0,0,0
34,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
35,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
57,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
58,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
68,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,1,0,1,0,0,0
80,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
98,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
121,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
123,0,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0


In [27]:
Germany_grouped_df 

TrackName,01-Prowler,ADayIntheLife,ATouchAway,AfraidToShootStrangers,Alberta,AllStar,AmanhãNãoSeSabe,AtrasDaPorta,Azul,Açai,...,VôoSobreoHorizonte,WalkingAfterYou,We'reGonnaGroove,WhenMyLeftEyeJumps,Whiplash,WorldWideSuicide,YeartotheDay,YouCan'tDoitRight(WiththeOneYouLove),YouShookMe(2),Álibi
InvoiceId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
12,0,0,0,0,0,1,0,0,0,0,...,1,0,0,0,0,0,0,0,1,0
29,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
30,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
40,1,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
52,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
67,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
95,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0


In [28]:
UK_grouped_df 

TrackName,2X4,ACorDoSol,Ain'tMyBitch,Aquilo,AreWeTheWaiting,AssaltaramAGramática,AssimCaminhaAHumanidade,BleedingMe,BlindMan,BlueRythmFantasy,...,VouPraAi,WakeUp,Walter'sWalk,Waterhole(ExpressoBongo),WhenIHadYourLove,WhereEaglesDare,WhereTheRiverGoes,WholeLottaLove(Medley),Zither,[JustLike]StartingOver
InvoiceId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
11,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
20,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
43,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
54,1,0,0,1,0,1,0,0,0,0,...,0,0,0,1,1,0,0,0,0,0
109,0,0,0,0,0,0,0,0,1,0,...,0,1,0,0,0,0,0,0,0,0
140,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
141,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
152,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,1,0,0
163,0,0,1,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
185,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0


In [63]:
#Calling the Apriori method
frequent_itemsets = apriori(grouped_df, min_support = 0.005, use_colnames = True)

In [64]:
frequent_itemsets

Unnamed: 0,support,itemsets
0,0.007282,(2MinutesToMidnight)
1,0.007282,(BloodBrothers)
2,0.007282,(Brasil)
3,0.007282,(CanIPlayWithMadness)
4,0.007282,(DazedandConfused)
5,0.007282,(FlyingHighAgain)
6,0.007282,(GoodGollyMissMolly)
7,0.009709,(HallowedBeThyName)
8,0.007282,(HeavenCanWait)
9,0.007282,(IronMaiden)


In [31]:
#Calling the Apriori method
USA_frequent_itemsets = apriori(USA_grouped_df, min_support = 0.02, use_colnames = True)

In [34]:
Canada_frequent_itemsets = apriori(Canada_grouped_df, min_support = 0.01, use_colnames = True, low_memory=True)


In [35]:
France_frequent_itemsets = apriori(France_grouped_df, min_support = 0.01, use_colnames = True, low_memory=True)


In [None]:
Brazil_frequent_itemsets = apriori(Brazil_grouped_df, min_support = 0.02, use_colnames = True, low_memory=True)


In [None]:
Germany_frequent_itemsets = apriori(Germany_grouped_df, min_support = 0.04, use_colnames = True, low_memory=True)


In [None]:
UK_frequent_itemsets = apriori(UK_grouped_df, min_support = 0.02, use_colnames = True, low_memory=True)

In [None]:
USA_frequent_itemsets

In [110]:
Canada_frequent_itemsets

Unnamed: 0,support,itemsets
0,0.017857,(A.N.D.R.O.T.A.Z.)
1,0.017857,(AMoçaeaChuva)
2,0.017857,(ANoiteDoMeuBem)
3,0.017857,(Ali)
4,0.017857,(Alive)
...,...,...
135567,0.017857,"(Time, InsideJob, BicycleRace, GivenToFly, Bus..."
135568,0.017857,"(StoneFree, TurboLover, PlasterCaster, SalveSi..."
135569,0.017857,"(SeekAndYouShallFind, Brumário, AmericanWoman,..."
135570,0.017857,"(MayThisBeLove, WhateverItIs,IJustCan'tStop, A..."


In [113]:
France_frequent_itemsets

Unnamed: 0,support,itemsets
0,0.028571,('RoundMidnight)
1,0.028571,((WishICould)Hideaway)
2,0.028571,(...AndJusticeForAll)
3,0.028571,(ABanda)
4,0.028571,(AManAndAWoman)
...,...,...
84890,0.028571,"('RoundMidnight, RitaLee, ComportamentoGeral, ..."
84891,0.028571,"(C'MonEverybody, TheUnforgiven, TheBegining......"
84892,0.028571,"(TapaAqui,DescobreAli, Gavioes2001, MariaFumaç..."
84893,0.028571,"(DieWalküre:TheRideoftheValkyries, InstintoCol..."


In [None]:
Brazil_frequent_itemsets

In [None]:
Germany_frequent_itemsets

In [None]:
UK_frequent_itemsets

In [60]:
#Getting the asscoiation rule 
df_rules = association_rules(frequent_itemsets, metric ="confidence", min_threshold = 0.4)

In [37]:
#Getting the asscoiation rule 
USA_rules = association_rules(USA_frequent_itemsets, metric ="confidence", min_threshold = 0.3)

In [None]:
Canada_rules = association_rules(Canada_frequent_itemsets, metric ="confidence", min_threshold = 0.6)

In [None]:
France_rules = association_rules(France_frequent_itemsets, metric ="confidence", min_threshold = 0.6)

In [None]:
Brazil_rules = association_rules(Brazil_frequent_itemsets, metric ="confidence", min_threshold = 0.4)

In [None]:
Germany_rules = association_rules(Germany_frequent_itemsets, metric ="confidence", min_threshold = 0.4)

In [None]:
UK_rules = association_rules(UK_frequent_itemsets, metric ="confidence", min_threshold = 0.4)

In [62]:
df_rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric


In [None]:
USA_rules

In [None]:
Canada_rules

In [None]:
France_rules

In [None]:
Brazil_rules

In [None]:
Germany_rules

In [None]:
UK_rules

In [34]:
df_rules.to_excel('Market_Basket_Analysis.xlsx', index=False)
