In [1]:
import pandas as pd
import numpy as np
from sklearn.decomposition import TruncatedSVD

In [2]:
df = pd.read_csv('ratings.csv')

In [3]:
df.head()

Unnamed: 0,user_id,ad_id,review_rating
0,5c5d894ce2e44477b07af2416567180b,e0933f206ef74071bef2a07146bf3452,5
1,09a015bc4845494ea9b037248e4d74d2,e0933f206ef74071bef2a07146bf3452,5
2,93cc944a7100400c943d8c1b71edb793,e0933f206ef74071bef2a07146bf3452,2
3,0dddf3d710f44a75a3a2f14dddbf3453,e0933f206ef74071bef2a07146bf3452,4
4,4f80ff8a9e19497ab2b7a8bdf230d6b6,e0933f206ef74071bef2a07146bf3452,3


In [4]:
popular_ads = pd.DataFrame(df.groupby('ad_id')['review_rating'].count()).sort_values('review_rating', ascending=False)
popular_ads

Unnamed: 0_level_0,review_rating
ad_id,Unnamed: 1_level_1
e0933f206ef74071bef2a07146bf3452,15
02a9c4e1181d4212b1e48adf25d25376,11
68c5999c1c1949ff9d7951b0bd6f9d57,11
6916d62825c04f2d86ad30358fba40c7,10
b8362773943a46c28dfba6595e49d372,9
931f57551a4c463e8262a7a92c4eb7a2,8
ab591370e4db49e59832628958b57d17,8
72fde2f9e6eb4468a976fdd70e5d7c7d,7
a0db9d868d664e2a87611c99b78f6ac0,7
eed3a93d8916465ab530dca1b576f831,7


In [5]:
# rating utility matrix
rating_utility_matrix = df.pivot_table(values='review_rating', index='user_id', columns='ad_id', fill_value=0)
rating_utility_matrix

ad_id,02a9c4e1181d4212b1e48adf25d25376,4cc44aa0416c49f4992fd0e3cabbd682,514d157c05664c18bdda78838a01c7fc,68c5999c1c1949ff9d7951b0bd6f9d57,6916d62825c04f2d86ad30358fba40c7,72fde2f9e6eb4468a976fdd70e5d7c7d,931f57551a4c463e8262a7a92c4eb7a2,9ad79fabb8264b0287a543695f055b77,a0db9d868d664e2a87611c99b78f6ac0,ab591370e4db49e59832628958b57d17,b8362773943a46c28dfba6595e49d372,bab50c86be51453bb9379ada81e4b752,e0933f206ef74071bef2a07146bf3452,eed3a93d8916465ab530dca1b576f831
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
0524ac7749bc45af8940fce9fe55ef9b,0,0,0,0,0,0,0,0,0,0,0,0,0,3
09693819b1ee4edc930d8aa8282c4f4f,0,0,0,0,5,0,0,0,2,0,0,0,0,0
09a015bc4845494ea9b037248e4d74d2,1,0,0,0,0,4,0,0,0,0,0,0,5,5
0dddf3d710f44a75a3a2f14dddbf3453,0,0,0,0,0,0,0,0,0,0,0,0,4,3
0ed9504c00b7436488c319c4e57480b9,0,0,0,0,4,0,0,0,0,4,0,0,3,0
1448f57b5f4f4558b7f221400c43f037,0,4,0,0,0,0,5,0,0,0,0,0,5,0
1745178b7a5e4504b1fde3801f81e6a7,5,3,0,0,0,0,0,0,0,0,0,0,5,0
17e2c8dd7e024881970f85a8d1132ce8,0,0,0,0,0,3,0,0,0,5,0,0,0,0
28edaebb7c104865a7d339f373b86bf3,0,0,0,0,5,0,4,0,0,0,0,0,0,0
2ee63817d1b74243a19622279655ab81,0,0,0,3,0,0,0,0,0,0,0,0,0,0


In [6]:
# Transposing the matrix
rating_utility_matrix_transpose = rating_utility_matrix.T
rating_utility_matrix_transpose

user_id,0524ac7749bc45af8940fce9fe55ef9b,09693819b1ee4edc930d8aa8282c4f4f,09a015bc4845494ea9b037248e4d74d2,0dddf3d710f44a75a3a2f14dddbf3453,0ed9504c00b7436488c319c4e57480b9,1448f57b5f4f4558b7f221400c43f037,1745178b7a5e4504b1fde3801f81e6a7,17e2c8dd7e024881970f85a8d1132ce8,28edaebb7c104865a7d339f373b86bf3,2ee63817d1b74243a19622279655ab81,...,b31ebf0a8e474c02a2208f52b7d0309e,b4841e08aea74fc9937d6e9bb9dfe49a,bbf2079473574b6fa04875e299cc542b,c48c8c4902b345239fe9ea2b40dedc9c,d00e783aa92344a9909c72d85f383ab7,dfb4867e50d44e93b635b072149cc885,e18094af7f4d4fd8b1281cb98d3605f2,e29275e68cf24c5489f2a26ee0b6e74f,e799d1274ccd4e77a7cd8b734992eda7,f233050a18134718a33ce9f3a7cf8327
ad_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
02a9c4e1181d4212b1e48adf25d25376,0,0,1,0,0,0,5,0,0,0,...,0,0,0,0,0,0,4,0,0,0
4cc44aa0416c49f4992fd0e3cabbd682,0,0,0,0,0,4,3,0,0,0,...,0,0,0,0,0,0,0,0,0,0
514d157c05664c18bdda78838a01c7fc,0,0,0,0,0,0,0,0,0,0,...,0,0,5,0,0,0,0,0,4,0
68c5999c1c1949ff9d7951b0bd6f9d57,0,0,0,0,0,0,0,0,0,3,...,0,0,0,4,0,0,0,0,0,0
6916d62825c04f2d86ad30358fba40c7,0,5,0,0,4,0,0,0,5,0,...,2,0,0,0,0,0,0,0,0,0
72fde2f9e6eb4468a976fdd70e5d7c7d,0,0,4,0,0,0,0,3,0,0,...,0,5,4,0,0,0,0,0,0,0
931f57551a4c463e8262a7a92c4eb7a2,0,0,0,0,0,5,0,0,4,0,...,4,0,5,0,0,0,0,0,0,0
9ad79fabb8264b0287a543695f055b77,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
a0db9d868d664e2a87611c99b78f6ac0,0,2,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,4
ab591370e4db49e59832628958b57d17,0,0,0,0,4,0,0,5,0,0,...,0,0,0,0,0,4,0,5,0,0


In [7]:
# unique products in subset of data
unique_products = rating_utility_matrix_transpose
unique_products

user_id,0524ac7749bc45af8940fce9fe55ef9b,09693819b1ee4edc930d8aa8282c4f4f,09a015bc4845494ea9b037248e4d74d2,0dddf3d710f44a75a3a2f14dddbf3453,0ed9504c00b7436488c319c4e57480b9,1448f57b5f4f4558b7f221400c43f037,1745178b7a5e4504b1fde3801f81e6a7,17e2c8dd7e024881970f85a8d1132ce8,28edaebb7c104865a7d339f373b86bf3,2ee63817d1b74243a19622279655ab81,...,b31ebf0a8e474c02a2208f52b7d0309e,b4841e08aea74fc9937d6e9bb9dfe49a,bbf2079473574b6fa04875e299cc542b,c48c8c4902b345239fe9ea2b40dedc9c,d00e783aa92344a9909c72d85f383ab7,dfb4867e50d44e93b635b072149cc885,e18094af7f4d4fd8b1281cb98d3605f2,e29275e68cf24c5489f2a26ee0b6e74f,e799d1274ccd4e77a7cd8b734992eda7,f233050a18134718a33ce9f3a7cf8327
ad_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
02a9c4e1181d4212b1e48adf25d25376,0,0,1,0,0,0,5,0,0,0,...,0,0,0,0,0,0,4,0,0,0
4cc44aa0416c49f4992fd0e3cabbd682,0,0,0,0,0,4,3,0,0,0,...,0,0,0,0,0,0,0,0,0,0
514d157c05664c18bdda78838a01c7fc,0,0,0,0,0,0,0,0,0,0,...,0,0,5,0,0,0,0,0,4,0
68c5999c1c1949ff9d7951b0bd6f9d57,0,0,0,0,0,0,0,0,0,3,...,0,0,0,4,0,0,0,0,0,0
6916d62825c04f2d86ad30358fba40c7,0,5,0,0,4,0,0,0,5,0,...,2,0,0,0,0,0,0,0,0,0
72fde2f9e6eb4468a976fdd70e5d7c7d,0,0,4,0,0,0,0,3,0,0,...,0,5,4,0,0,0,0,0,0,0
931f57551a4c463e8262a7a92c4eb7a2,0,0,0,0,0,5,0,0,4,0,...,4,0,5,0,0,0,0,0,0,0
9ad79fabb8264b0287a543695f055b77,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
a0db9d868d664e2a87611c99b78f6ac0,0,2,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,4
ab591370e4db49e59832628958b57d17,0,0,0,0,4,0,0,5,0,0,...,0,0,0,0,0,4,0,5,0,0


In [17]:
# Decomposing the matrix
SVD = TruncatedSVD(n_components=20)
decomposed_matrix = SVD.fit_transform(unique_products)
decomposed_matrix

array([[ 6.52931259e+00,  9.26283289e+00, -2.60047718e+00,
        -6.22284234e+00, -5.22400452e-01, -3.27742431e+00,
        -1.38548497e+00,  8.05332628e-01, -2.24654450e+00,
         1.31546950e+00,  2.25006976e-02,  5.40233317e-01,
        -1.08013452e+00, -5.12584598e-01],
       [ 3.25080872e+00,  6.54846648e-01,  3.12102360e-01,
        -1.14531374e-01,  1.44472004e+00, -1.85605947e+00,
        -3.82200910e+00,  1.80018856e-01, -1.65023690e+00,
         2.53627312e-01,  6.26136414e-01,  9.92176069e-01,
         3.88115325e+00,  7.02239825e-01],
       [ 1.46467110e+00,  4.50117504e-01,  7.56656562e+00,
        -7.12757583e-01, -3.73514548e+00,  9.05648017e-01,
         2.15929165e+00,  1.46513787e+00, -2.34393976e+00,
        -2.62873341e+00,  1.66183153e+00,  4.34422429e+00,
        -3.27347155e-01,  4.06321215e-01],
       [ 8.08546878e+00, -3.78463709e+00, -4.38112855e+00,
        -9.86056759e-04, -3.89157854e+00,  4.47657264e+00,
         2.10052097e+00, -3.47854392e+00, -4.

In [18]:
# Correlation Matrix
correlation_matrix = np.corrcoef(decomposed_matrix)
correlation_matrix

array([[ 1.00000000e+00,  3.84322621e-01, -3.29279307e-03,
         1.21181361e-01,  4.98765608e-02,  2.23007403e-01,
         1.36005154e-03,  2.99243359e-01,  1.39636793e-01,
         1.17319480e-01,  3.82455171e-01,  1.57098569e-01,
         1.74854239e-01,  3.29947332e-02],
       [ 3.84322621e-01,  1.00000000e+00, -5.07622445e-02,
         1.07254267e-01,  2.47098327e-01, -5.87446557e-02,
         2.58995182e-01, -6.62364659e-02,  2.45470971e-02,
         6.16757855e-02, -1.78961931e-02, -7.35371182e-02,
         4.31027070e-01, -8.17028463e-02],
       [-3.29279307e-03, -5.07622445e-02,  1.00000000e+00,
         3.02876425e-02, -3.21987398e-03,  2.69583387e-01,
         4.33555383e-01, -9.84872182e-02,  3.64991591e-02,
        -1.53339128e-01,  7.68079367e-02, -1.09342582e-01,
        -7.83184679e-02, -1.82471561e-02],
       [ 1.21181361e-01,  1.07254267e-01,  3.02876425e-02,
         1.00000000e+00,  3.32388893e-01,  6.33366525e-02,
         1.14979079e-01,  3.95204432e-02,  1.

In [13]:
rating_utility_matrix_transpose.index[3]


'68c5999c1c1949ff9d7951b0bd6f9d57'

In [20]:
user_id = '68c5999c1c1949ff9d7951b0bd6f9d57'
ad_names = list(rating_utility_matrix_transpose.index)
ad_id = ad_names.index(user_id)
ad_id
# type(user_id)

3

In [15]:
correlation_ad_id = correlation_matrix[ad_id]
correlation_ad_id

array([0.12796292, 0.09440203, 0.04328043, 1.        , 0.34530686,
       0.10460741, 0.12014307, 0.07338816, 0.1304745 , 0.22140695,
       0.28614244, 0.36639236, 0.39516204, 0.05994803])