# Recommender System

### 1.The main objective of this project is to create a recommender system that will give an idea about which items to recommend. 
### 2.The goal is to find popular items - globally, country-wise and month-wise and to recommmend items based on estimated ratings and user ratings.

### Import the necessary libraries.

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load
import pandas as pd
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
from surprise import SVD
from surprise import Dataset
from surprise import Reader

from mlxtend.frequent_patterns import apriori, association_rules

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/onlineretail/OnlineRetail.xlsx


### Load the dataset 

In [2]:
df = pd.read_excel('/kaggle/input/onlineretail/OnlineRetail.xlsx')


In [3]:
df = df.loc[df['Quantity'] > 0]

### Displaying the dataset

In [4]:
df.head(10)

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365.0,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6.0,2010-12-01 08:26:00,2.55,17850.0,United Kingdom
1,536365.0,71053.0,WHITE METAL LANTERN,6.0,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
2,536365.0,84406B,CREAM CUPID HEARTS COAT HANGER,8.0,2010-12-01 08:26:00,2.75,17850.0,United Kingdom
3,536365.0,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6.0,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
4,536365.0,84029E,RED WOOLLY HOTTIE WHITE HEART.,6.0,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
5,536365.0,22752.0,SET 7 BABUSHKA NESTING BOXES,2.0,2010-12-01 08:26:00,7.65,17850.0,United Kingdom
6,536365.0,21730.0,GLASS STAR FROSTED T-LIGHT HOLDER,6.0,2010-12-01 08:26:00,4.25,17850.0,United Kingdom
7,536366.0,22633.0,HAND WARMER UNION JACK,6.0,2010-12-01 08:28:00,1.85,17850.0,United Kingdom
8,536366.0,22632.0,HAND WARMER RED POLKA DOT,6.0,2010-12-01 08:28:00,1.85,17850.0,United Kingdom
9,536367.0,84879.0,ASSORTED COLOUR BIRD ORNAMENT,32.0,2010-12-01 08:34:00,1.69,13047.0,United Kingdom


### List of Countries available in the dataset

In [5]:
df.value_counts(['Country'])

Country             
United Kingdom          486286
Germany                   9042
France                    8408
EIRE                      7894
Spain                     2485
Netherlands               2363
Belgium                   2031
Switzerland               1967
Portugal                  1501
Australia                 1185
Norway                    1072
Italy                      758
Channel Islands            748
Finland                    685
Cyprus                     614
Sweden                     451
Unspecified                446
Austria                    398
Denmark                    380
Poland                     330
Japan                      321
Israel                     295
Hong Kong                  284
Singapore                  222
Iceland                    182
USA                        179
Canada                     151
Greece                     145
Malta                      112
United Arab Emirates        68
European Community          60
RSA               

In [6]:
df.value_counts(['InvoiceDate'])

InvoiceDate        
2011-10-31 14:41:00    1114
2011-12-08 09:28:00     749
2011-12-09 10:03:00     731
2011-12-05 17:24:00     721
2011-06-29 15:58:00     705
                       ... 
2011-02-08 12:41:00       1
2011-11-09 12:25:00       1
2011-02-08 12:15:00       1
2011-05-31 10:38:00       1
2011-07-14 16:39:00       1
Length: 19052, dtype: int64

### Drop the Null values

In [7]:
df.dropna(inplace=True)

### Most popular items globally

In [8]:
df1 = df['Description']
duplicates = df1.duplicated()
print("Number of duplicates:", duplicates.sum())


Number of duplicates: 394058


In [9]:
duplicated_rows = df1[duplicates]
most_common_duplicates = duplicated_rows.value_counts().head(10)
print(f"Most popular items Globally:\n{most_common_duplicates}")

Most popular items Globally:
WHITE HANGING HEART T-LIGHT HOLDER    2027
REGENCY CAKESTAND 3 TIER              1723
JUMBO BAG RED RETROSPOT               1617
ASSORTED COLOUR BIRD ORNAMENT         1407
PARTY BUNTING                         1396
LUNCH BAG RED RETROSPOT               1315
SET OF 3 CAKE TINS PANTRY DESIGN      1158
LUNCH BAG  BLACK SKULL.               1104
POSTAGE                               1098
PACK OF 72 RETROSPOT CAKE CASES       1067
Name: Description, dtype: int64


### The above result generates set of item descriptions that are most popular globally

### The next is to find the most popular item among countries

In [10]:
most_popular_items = df.groupby(['Country'])['Description'].sum().sort_values(ascending=False).reset_index()


### Print the result

In [11]:
print(most_popular_items.head(25))

                 Country                                        Description
0         United Kingdom  WHITE HANGING HEART T-LIGHT HOLDERWHITE METAL ...
1                 Cyprus  WHITE HANGING HEART T-LIGHT HOLDERSPACE CADET ...
2               Portugal  VINTAGE PAISLEY STATIONERY SETLUNCH BAG SUKI  ...
3                  Italy  T-LIGHT GLASS FLUTED ANTIQUESCENTED VELVET LOU...
4                  Japan  SET OF 6 VINTAGE NOTELETS KITFANCY FONT BIRTHD...
5                Germany  SET OF 6 T-LIGHTS SANTAROTATING SILVER ANGELS ...
6                    USA  SET OF 6 SPICE TINS PANTRY DESIGNPANTRY WASHIN...
7                 Sweden  SET OF 3 BABUSHKA STACKING TINSWORLD WAR 2 GLI...
8                Belgium  SET OF 20 KIDS COOKIE CUTTERSRED RETROSPOT ROU...
9            Switzerland  ROUND SNACK BOXES SET OF4 WOODLANDPLASTERS IN ...
10                Greece  ROSES REGENCY TEACUP AND SAUCERGREEN REGENCY T...
11                  EIRE  ROSE COTTAGE KEEPSAKE BOXBLUE CHARLIE+LOLA PER...
12  United A

### The above result generates the most popular items for each country specifically

##  **The below step is to find the most popular items - monthly wise**

In [12]:
df['InvoiceDate'] = pd.to_datetime(df['InvoiceDate'])


df['date_new'] = df.InvoiceDate.dt.strftime('%Y-%m')
top_items_monthly = []
for month in df.date_new.unique():
    trans_month = df.loc[df.date_new == month]
    trans_month = (trans_month.groupby(['InvoiceNo', 'Description'])['Quantity']
                  .sum().unstack().reset_index().fillna(0)
                  .set_index('InvoiceNo'))

In [13]:

trans_month[trans_month >= 1] = True
trans_month[trans_month.isna()] = False 

In [14]:
import warnings
warnings.filterwarnings('ignore')
frequent_itemsets = apriori(trans_month, min_support=0.03,use_colnames=True)


### Using the **association rules** method

In [15]:
associationRules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)

top_k = associationRules.sort_values(by=['support'],ascending=False).iloc[:10][['antecedents','support']].reset_index(drop=True)

top_items_monthly.append((month, top_k))

### **Using Pivot**

In [16]:
pivot_dfs = []
for i, montly_pairs in enumerate(top_items_monthly):
    month, data = montly_pairs
    inv_map = {k: v for k, v in enumerate(data.antecedents)}
    rows = []
    for index, row in df.loc[(df.date_new == month)].iterrows():
        keys = [inv_map[k] for tup in str(row['Description']).split(',') for k,v in inv_map.items() if str(row['Description']) in list(v)]
        for key in keys:
            rows.append([month, key])
    pivot_df = pd.DataFrame(rows, columns=['month','Item'])
    pivot_df.head()
    pivot_dfs.append(pivot_df.pivot_table(values=["Item"],index=["month"],aggfunc="count",fill_value=0))


### The below code displays the most popular items- monthly wise

In [17]:

df['InvoiceDate'] = pd.to_datetime(df['InvoiceDate'])
df.set_index('InvoiceDate', inplace=True)

monthly_df = df.groupby(pd.Grouper(freq='M'))['Description'].value_counts()

repeated_products = monthly_df[monthly_df > 1]

for month, counts in repeated_products.groupby(level=0):
    print(f"Month: {month.strftime('%B %Y')}")
    print(counts)
    

Month: December 2010
InvoiceDate  Description                       
2010-12-31   WHITE HANGING HEART T-LIGHT HOLDER    207
             HAND WARMER BABUSHKA DESIGN           142
             PAPER CHAIN KIT 50'S CHRISTMAS        141
             REGENCY CAKESTAND 3 TIER              141
             SCOTTIE DOG HOT WATER BOTTLE          132
                                                  ... 
             WRAP, BILLBOARD FONTS DESIGN            2
             YELLOW BREAKFAST CUP AND SAUCER         2
             YELLOW GIANT GARDEN THERMOMETER         2
             YELLOW METAL CHICKEN HEART              2
             ZINC HEART LATTICE T-LIGHT HOLDER       2
Name: Description, Length: 1953, dtype: int64
Month: January 2011
InvoiceDate  Description                       
2011-01-31   WHITE HANGING HEART T-LIGHT HOLDER    160
             SET OF 3 CAKE TINS PANTRY DESIGN      133
             HEART OF WICKER SMALL                 120
             REGENCY CAKESTAND 3 TIER          

### **The following steps will be implementing the recommendation predictor using user ratings and estimated ratings.**

#### Create a new dataframe with attribute values

In [18]:
df1 = df[['CustomerID', 'Description','StockCode', 'Quantity']]

#### Create a rating matrix with the help of pivot table.

In [19]:
ratings_matrix = df1.pivot_table(index=['CustomerID'], columns=['StockCode'], values='Quantity', fill_value=0)


#### The algorithm we will be using is **SVD()-> Single Value Decomposition**

In [20]:
algo = SVD()


In [21]:

reader = Reader(rating_scale=(1, 5))
surprise_data = Dataset.load_from_df(df1[['CustomerID', 'StockCode', 'Quantity']], reader)

In [22]:

trainset = surprise_data.build_full_trainset()
testset = trainset.build_anti_testset()


#### Fit the algorithm

In [23]:
algo.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7fc3c0707690>

#### Make predictions using the test data that we found using the algorithm.

In [24]:

predictions = algo.test(testset)

In [25]:

top_n = {}
for uid, iid, true_r, est, _ in predictions:
    if uid not in top_n.keys():
        top_n[uid] = [(iid, est)]
    else:
        top_n[uid].append((iid, est))

#### Drop the null values in the Columns = StockCode and Description as both holds the items that contains missing values and outliers.

In [26]:


df1.dropna(subset=["StockCode", "Description"], inplace=True)

descriptions = df1.groupby("StockCode").first()["Description"]


desc_dict = descriptions.to_dict()


### **The below code displays the predictions based on the recommended items.**

In [27]:
global_top_n = {}

for uid, user_ratings in top_n.items():
    user_ratings.sort(key=lambda x: x[1], reverse=True)
    global_top_n[uid] = []
    for iid, est_rating in user_ratings[:5]:
        if iid in desc_dict:
            global_top_n[uid].append((iid, desc_dict[iid]))

all_items = [iid for uid in global_top_n for iid, desc in global_top_n[uid]]
item_counts = {iid: all_items.count(iid) for iid in set(all_items)}

print("Most Recommended Items (in number of recommendations):")
for item, count in sorted(item_counts.items(), key=lambda x: x[1], reverse=True):
    if item in desc_dict:
        desc = desc_dict[item]
        print("\t", "Item ID:", item, "(\"" + str(desc) + "\")", f"recommended {count} times") 


Most Recommended Items (in number of recommendations):
	 Item ID: 84406B ("CREAM CUPID HEARTS COAT HANGER") recommended 4206 times
	 Item ID: 71053.0 ("WHITE METAL LANTERN") recommended 4190 times
	 Item ID: 84029G ("KNITTED UNION FLAG HOT WATER BOTTLE") recommended 4128 times
	 Item ID: 84029E ("RED WOOLLY HOTTIE WHITE HEART.") recommended 4111 times
	 Item ID: 85123A ("WHITE HANGING HEART T-LIGHT HOLDER") recommended 3483 times
	 Item ID: 22752.0 ("SET 7 BABUSHKA NESTING BOXES") recommended 1091 times
	 Item ID: 21730.0 ("GLASS STAR FROSTED T-LIGHT HOLDER") recommended 320 times
	 Item ID: 22633.0 ("HAND WARMER UNION JACK") recommended 69 times
	 Item ID: 22632.0 ("HAND WARMER RED POLKA DOT") recommended 29 times
	 Item ID: 22745.0 ("POPPY'S PLAYHOUSE BEDROOM") recommended 22 times
	 Item ID: 84879.0 ("ASSORTED COLOUR BIRD ORNAMENT") recommended 21 times
	 Item ID: 22748.0 ("POPPY'S PLAYHOUSE KITCHEN") recommended 7 times
	 Item ID: 22749.0 ("FELTCRAFT PRINCESS CHARLOTTE DOLL") recom