# Implementing Market Basket Analysis

In [1]:
#Loading neccesary packages
import numpy as np
import pandas as pd
import os
import sys
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

In [2]:
#Load dataset from bookcrossing
users = pd.read_csv("./BX-Book/BX-Users.csv", names=['User-ID', 'Location', 'Age'], encoding='latin-1', skiprows=1)
books = pd.read_csv("./BX-Book/BX-Books.csv", names=['ISBN', 'Book-Title' ,'Book-Author','Year-Of-Publication', 'Publisher', 'Image-Url-S', 'Image-Url-M', 'Image-Url-L'], encoding='latin-1', skiprows=1)
ratings = pd.read_csv("./Bx-Book/BX-Book-Ratings.csv", names=['User-ID', 'ISBN', 'Book-Rating'], encoding='latin-1', skiprows=1)

In [3]:
#Data Cleaning (Replacing NULL values)
users['Age'] = users['Age'].fillna("0")

In [4]:
#Print users header
users.head()

Unnamed: 0,User-ID,Location,Age
0,1,"nyc, new york, usa",0
1,2,"stockton, california, usa",18
2,3,"moscow, yukon territory, russia",0
3,4,"porto, v.n.gaia, portugal",17
4,5,"farnborough, hants, united kingdom",0


In [5]:
#Print users tail
users.tail()

Unnamed: 0,User-ID,Location,Age
278853,278854,"portland, oregon, usa",0
278854,278855,"tacoma, washington, united kingdom",50
278855,278856,"brampton, ontario, canada",0
278856,278857,"knoxville, tennessee, usa",0
278857,278858,"dublin, n/a, ireland",0


In [6]:
#Print books header
books.head()

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-Url-S,Image-Url-M,Image-Url-L
0,195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...
1,2005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...
2,60973129,Decision in Normandy,Carlo D'Este,1991,HarperPerennial,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...
3,374157065,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata,1999,Farrar Straus Giroux,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...
4,393045218,The Mummies of Urumchi,E. J. W. Barber,1999,W. W. Norton &amp; Company,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...


In [7]:
#Print books tail
books.tail()

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-Url-S,Image-Url-M,Image-Url-L
271374,0440400988,There's a Bat in Bunk Five,Paula Danziger,1988,Random House Childrens Pub (Mm),http://images.amazon.com/images/P/0440400988.0...,http://images.amazon.com/images/P/0440400988.0...,http://images.amazon.com/images/P/0440400988.0...
271375,0525447644,From One to One Hundred,Teri Sloat,1991,Dutton Books,http://images.amazon.com/images/P/0525447644.0...,http://images.amazon.com/images/P/0525447644.0...,http://images.amazon.com/images/P/0525447644.0...
271376,006008667X,Lily Dale : The True Story of the Town that Ta...,Christine Wicker,2004,HarperSanFrancisco,http://images.amazon.com/images/P/006008667X.0...,http://images.amazon.com/images/P/006008667X.0...,http://images.amazon.com/images/P/006008667X.0...
271377,0192126040,Republic (World's Classics),Plato,1996,Oxford University Press,http://images.amazon.com/images/P/0192126040.0...,http://images.amazon.com/images/P/0192126040.0...,http://images.amazon.com/images/P/0192126040.0...
271378,0767409752,A Guided Tour of Rene Descartes' Meditations o...,Christopher Biffle,2000,McGraw-Hill Humanities/Social Sciences/Languages,http://images.amazon.com/images/P/0767409752.0...,http://images.amazon.com/images/P/0767409752.0...,http://images.amazon.com/images/P/0767409752.0...


In [8]:
#Print ratings header
ratings.head()

Unnamed: 0,User-ID,ISBN,Book-Rating
0,276725,034545104X,0
1,276726,0155061224,5
2,276727,0446520802,0
3,276729,052165615X,3
4,276729,0521795028,6


In [9]:
#Print ratings tail
ratings.tail()

Unnamed: 0,User-ID,ISBN,Book-Rating
1048570,250764,0451410777,0
1048571,250764,0452264464,8
1048572,250764,048623715X,0
1048573,250764,0486256588,0
1048574,250764,0515069434,0


In [10]:
#Print books info
books.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 271379 entries, 0 to 271378
Data columns (total 8 columns):
 #   Column               Non-Null Count   Dtype 
---  ------               --------------   ----- 
 0   ISBN                 271379 non-null  object
 1   Book-Title           271379 non-null  object
 2   Book-Author          271378 non-null  object
 3   Year-Of-Publication  271379 non-null  int64 
 4   Publisher            271377 non-null  object
 5   Image-Url-S          271379 non-null  object
 6   Image-Url-M          271379 non-null  object
 7   Image-Url-L          271379 non-null  object
dtypes: int64(1), object(7)
memory usage: 16.6+ MB


In [11]:
#Print users info
users.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 278858 entries, 0 to 278857
Data columns (total 3 columns):
 #   Column    Non-Null Count   Dtype 
---  ------    --------------   ----- 
 0   User-ID   278858 non-null  int64 
 1   Location  278858 non-null  object
 2   Age       278858 non-null  object
dtypes: int64(1), object(2)
memory usage: 6.4+ MB


In [12]:
#Print ratings info
ratings.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1048575 entries, 0 to 1048574
Data columns (total 3 columns):
 #   Column       Non-Null Count    Dtype 
---  ------       --------------    ----- 
 0   User-ID      1048575 non-null  int64 
 1   ISBN         1048575 non-null  object
 2   Book-Rating  1048575 non-null  int64 
dtypes: int64(2), object(1)
memory usage: 24.0+ MB


In [21]:
books = books.merge(ratings, on='ISBN', how="left")
books['Book-Title']

MemoryError: Unable to allocate 702. MiB for an array with shape (2, 46032391) and data type object

In [15]:
books['Book-Title']

0                                       Classical Mythology
1                                              Clara Callan
2                                              Clara Callan
3                                              Clara Callan
4                                              Clara Callan
                                ...                        
954224                           There's a Bat in Bunk Five
954225                              From One to One Hundred
954226    Lily Dale : The True Story of the Town that Ta...
954227                          Republic (World's Classics)
954228    A Guided Tour of Rene Descartes' Meditations o...
Name: Book-Title, Length: 954229, dtype: object

In [16]:
books['Year-Of-Publication']

0         2002
1         2001
2         2001
3         2001
4         2001
          ... 
954224    1988
954225    1991
954226    2004
954227    1996
954228    2000
Name: Year-Of-Publication, Length: 954229, dtype: int64

In [17]:
ratings = ratings.merge(books[['ISBN','Book-Title']], on='ISBN', how='left')

In [147]:
ratings.head()

Unnamed: 0,User-ID,ISBN,Book-Rating,Book-Title
0,276725,034545104X,0,Flesh Tones: A Novel
1,276725,034545104X,0,Flesh Tones: A Novel
2,276725,034545104X,0,Flesh Tones: A Novel
3,276725,034545104X,0,Flesh Tones: A Novel
4,276725,034545104X,0,Flesh Tones: A Novel


In [18]:
ratings.tail()

Unnamed: 0,User-ID,ISBN,Book-Rating,Book-Title
46032386,250764,0452264464,8,Beloved (Plume Contemporary Fiction)
46032387,250764,0452264464,8,Beloved (Plume Contemporary Fiction)
46032388,250764,048623715X,0,Glamorous Movie Stars of the Thirties: Paper D...
46032389,250764,0486256588,0,Schiaparelli Fashion Review: Paper Dolls in Fu...
46032390,250764,0515069434,0,Lady Laughing Eyes (To Have and to Hold)
