### Importing the Necessary libraries

In [None]:
import numpy as np
import pandas as pd
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

### Data Collection and Pre-Processing

In [None]:
# Reading the excel file as DataFrame

product_data = pd.read_excel("/content/Product recommendation.xlsx")

In [None]:
# printing the first 5 rows of the dataframe

product_data.head()

Unnamed: 0,USER_ID,EMAIL,CUS_Month_Income,AGE,RATING,CUS_Marital_Status,Product
0,889,cust889@gmail.com,7116.64,49,3.5,MARRIED,Internationa credit card
1,890,cust890@gmail.com,1500000.0,46,3.0,SINGLE,Insurance
2,891,cust891@gmail.com,5000000.0,53,4.0,SINGLE,Family insurance
3,892,cust892@gmail.com,500.0,15,2.0,SINGLE,Insurance
4,893,cust893@gmail.com,9000000.0,48,1.0,SINGLE,Insurance


In [None]:
# Number of rows and columns in the DataFrame

product_data.shape

(49, 7)

In [None]:
# The unique columns of the DataFrame

product_data.columns.unique()

Index(['USER_ID', 'EMAIL', 'CUS_Month_Income', 'AGE', 'RATING',
       'CUS_Marital_Status', 'Product'],
      dtype='object')

In [None]:
product_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 49 entries, 0 to 48
Data columns (total 7 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   USER_ID             49 non-null     int64  
 1   EMAIL               49 non-null     object 
 2   CUS_Month_Income    49 non-null     float64
 3   AGE                 49 non-null     int64  
 4   RATING              49 non-null     float64
 5   CUS_Marital_Status  49 non-null     object 
 6   Product             49 non-null     object 
dtypes: float64(2), int64(2), object(3)
memory usage: 2.8+ KB


In [None]:
product_data.head()

Unnamed: 0,USER_ID,EMAIL,CUS_Month_Income,AGE,RATING,CUS_Marital_Status,Product
0,889,cust889@gmail.com,7116.64,49,3.5,MARRIED,Internationa credit card
1,890,cust890@gmail.com,1500000.0,46,3.0,SINGLE,Insurance
2,891,cust891@gmail.com,5000000.0,53,4.0,SINGLE,Family insurance
3,892,cust892@gmail.com,500.0,15,2.0,SINGLE,Insurance
4,893,cust893@gmail.com,9000000.0,48,1.0,SINGLE,Insurance


In [None]:
product_data["CUS_Month_Income"].value_counts()

1000000.00     6
2000000.00     5
3000.00        5
1500000.00     3
0.00           2
2500000.00     2
500000.00      2
15000000.00    2
2500.00        2
3500.00        2
5000000.00     2
12000.00       1
5648.12        1
30000000.00    1
1014.00        1
1557470.00     1
7116.64        1
250000.00      1
6000000.00     1
6131000.00     1
2800000.00     1
10000000.00    1
200000.00      1
2556.00        1
9000000.00     1
500.00         1
180.00         1
Name: CUS_Month_Income, dtype: int64

In [None]:
product_data["AGE"].value_counts()

46    5
48    4
49    3
16    3
43    2
41    2
69    2
32    2
51    2
39    2
37    2
60    1
62    1
87    1
82    1
34    1
54    1
36    1
77    1
20    1
18    1
66    1
75    1
30    1
35    1
29    1
64    1
59    1
15    1
53    1
52    1
Name: AGE, dtype: int64

In [None]:
product_data["RATING"].value_counts()

3.0    12
4.0    11
2.0     7
5.0     6
4.5     5
3.5     3
1.0     3
1.5     2
Name: RATING, dtype: int64

In [None]:
product_data["CUS_Marital_Status"].value_counts()

SINGLE     36
MARRIED    12
WIDOWED     1
Name: CUS_Marital_Status, dtype: int64

In [None]:
product_data["Product"].value_counts()

Family insurance             11
Fixed deposit                11
Insurance                    10
Gold loan                     4
Demat Account                 4
Internationa credit card      3
Long term deposit             2
Savings account               1
International credit card     1
Privilege credit card         1
Business Insurance            1
Name: Product, dtype: int64

In [None]:
# selecting the relevant features for recommendation

selected_features = ['EMAIL','Product']
print(selected_features)

['EMAIL', 'Product']


In [None]:
# Combining all the 2 selected features

combined_features = product_data["EMAIL"] + " " + product_data["Product"]

In [None]:
print(combined_features)

0      cust889@gmail.com Internationa credit card
1                     cust890@gmail.com Insurance
2              cust891@gmail.com Family insurance
3                     cust892@gmail.com Insurance
4                     cust893@gmail.com Insurance
5              cust894@gmail.com Family insurance
6                 cust895@gmail.com Fixed deposit
7             cust896@gmail.com Long term deposit
8      cust897@gmail.com Internationa credit card
9                     cust898@gmail.com Insurance
10                    cust899@gmail.com Insurance
11                cust900@gmail.com Fixed deposit
12              cust901@gmail.com Savings account
13                    cust902@gmail.com Insurance
14                    cust903@gmail.com Insurance
15             cust906@gmail.com Family insurance
16             cust907@gmail.com Family insurance
17             cust908@gmail.com Family insurance
18     cust910@gmail.com Internationa credit card
19            cust911@gmail.com Long term deposit


In [None]:
# Converting the text data to feature vectors

vectorizer = TfidfVectorizer()

In [None]:
feature_vectors = vectorizer.fit_transform(combined_features)

In [None]:
print(feature_vectors)

  (0, 2)	0.43394890037490697
  (0, 4)	0.43394890037490697
  (0, 61)	0.49033873276009293
  (0, 3)	0.13907443885441073
  (0, 58)	0.13907443885441073
  (0, 5)	0.5867377879399842
  (1, 60)	0.37079503155471294
  (1, 6)	0.880558876306521
  (1, 3)	0.20871884190476972
  (1, 58)	0.20871884190476972
  (2, 56)	0.45190676148199954
  (2, 7)	0.7855159093137271
  (2, 60)	0.3307733352282096
  (2, 3)	0.18619081051959305
  (2, 58)	0.18619081051959305
  (3, 8)	0.880558876306521
  (3, 60)	0.37079503155471294
  (3, 3)	0.20871884190476972
  (3, 58)	0.20871884190476972
  (4, 9)	0.880558876306521
  (4, 60)	0.37079503155471294
  (4, 3)	0.20871884190476972
  (4, 58)	0.20871884190476972
  (5, 10)	0.7855159093137271
  (5, 56)	0.45190676148199954
  :	:
  (44, 56)	0.45190676148199954
  (44, 60)	0.3307733352282096
  (44, 3)	0.18619081051959305
  (44, 58)	0.18619081051959305
  (45, 50)	0.7594963013884217
  (45, 55)	0.4091869719859918
  (45, 57)	0.436937698967621
  (45, 3)	0.1800233836965677
  (45, 58)	0.1800233836965

### Cosine Similarity

In [None]:
# getting the similarity scores using cosine similarity

similarity = cosine_similarity(feature_vectors)

In [None]:
print(similarity)

[[1.         0.05805491 0.05178876 ... 0.39529198 0.0500733  0.04357053]
 [0.05805491 1.         0.20037217 ... 0.05525709 0.07514854 0.16857558]
 [0.05178876 0.20037217 1.         ... 0.04929293 0.0670374  0.15038041]
 ...
 [0.39529198 0.05525709 0.04929293 ... 1.         0.04766014 0.04147075]
 [0.0500733  0.07514854 0.0670374  ... 0.04766014 1.         0.05639939]
 [0.04357053 0.16857558 0.15038041 ... 0.04147075 0.05639939 1.        ]]


### Getting the email from the user

In [None]:
# getting the email from the user

email = input(' Enter your Email : ')

 Enter your Email : cust899@gmail.com


In [None]:
# creating a list with all the emails given in the dataset

list_of_all_email = product_data['EMAIL'].tolist()
print(list_of_all_email)

['cust889@gmail.com', 'cust890@gmail.com', 'cust891@gmail.com', 'cust892@gmail.com', 'cust893@gmail.com', 'cust894@gmail.com', 'cust895@gmail.com', 'cust896@gmail.com', 'cust897@gmail.com', 'cust898@gmail.com', 'cust899@gmail.com', 'cust900@gmail.com', 'cust901@gmail.com', 'cust902@gmail.com', 'cust903@gmail.com', 'cust906@gmail.com', 'cust907@gmail.com', 'cust908@gmail.com', 'cust910@gmail.com', 'cust911@gmail.com', 'cust913@gmail.com', 'cust914@gmail.com', 'cust915@gmail.com', 'cust916@gmail.com', 'cust917@gmail.com', 'cust918@gmail.com', 'cust919@gmail.com', 'cust920@gmail.com', 'cust921@gmail.com', 'cust922@gmail.com', 'cust923@gmail.com', 'cust924@gmail.com', 'cust925@gmail.com', 'cust926@gmail.com', 'cust927@gmail.com', 'cust928@gmail.com', 'cust929@gmail.com', 'cust930@gmail.com', 'cust932@gmail.com', 'cust933@gmail.com', 'cust934@gmail.com', 'cust935@gmail.com', 'cust936@gmail.com', 'cust937@gmail.com', 'cust938@gmail.com', 'cust939@gmail.com', 'cust940@gmail.com', 'cust941@gma

In [None]:
# finding the close match for the email given by the user

find_close_match = difflib.get_close_matches(email, list_of_all_email)
print(find_close_match)

['cust899@gmail.com', 'cust939@gmail.com', 'cust929@gmail.com']


In [None]:
close_match = find_close_match[0]
print(close_match)

cust899@gmail.com


In [None]:
# # finding the userid of the customer

userid_of_the_cust = product_data[product_data.EMAIL == close_match]['USER_ID'].values
print(userid_of_the_cust)

[899]


In [None]:
# finding the rating of the customer

rating_of_the_cust = product_data[product_data.EMAIL == close_match]['RATING'].values
print(rating_of_the_cust)

[2.]


In [None]:
# finding the monthly income of the customer

monthly_income_of_the_cust = product_data[product_data.EMAIL == close_match]['CUS_Month_Income'].values
print(monthly_income_of_the_cust)

[2800000.]


In [None]:
# finding the age of the customer

Age_of_the_cust = product_data[product_data.EMAIL == close_match]['AGE'].values
print(Age_of_the_cust)

[64]


In [None]:
# finding the marital status of the customer

marital_status_of_the_cust = product_data[product_data.EMAIL == close_match]['CUS_Marital_Status'].values
print(marital_status_of_the_cust)

['MARRIED']


In [None]:
# finding the product for the customer

product_of_the_cust = product_data[product_data.EMAIL == close_match]['Product'].values
print(product_of_the_cust)

['Insurance']


In [None]:
# print the product based on the customer id

print('product suggested for you : ', product_of_the_cust)

product suggested for you :  ['Insurance']


### Product Recommendation System

In [None]:
email = input(' Enter your Email : ')

list_of_all_email = product_data['EMAIL'].tolist()

find_close_match = difflib.get_close_matches(email, list_of_all_email)

close_match = find_close_match[0]

product_of_the_cust = product_data[product_data.EMAIL == close_match]['Product'].values

print("Product suggestion for the customer - ",  product_of_the_cust)

 Enter your Email : cust921@gmail.com
Product suggestion for the customer -  ['Family insurance']


### Pickle File

In [None]:
# Flattening the entire intelligence of Machine learning Model

import pickle
pickle.dump(product_of_the_cust, open("product_of_the_cust.pkl", "wb"))