# PIP (Proximity–Impact–Popularity)

In [16]:
from math import sqrt
import pandas as pd
import numpy as np
import csv
import collections

### Rating (1~5) 일 때, Rmin, Rmax, Rmed 

In [2]:
Rmin=1
Rmax=5
Rmed = (Rmin+Rmax)/2
print("Rmin: {}, Rmax: {}, Rmed: {}".format(Rmin,Rmax,Rmed))

Rmin: 1, Rmax: 5, Rmed: 3.0


### 가게 store id - store name mapping

In [4]:
business = pd.read_csv("business_clean.csv")

In [12]:
stores = {}

In [24]:
id = 0
with open('business_clean.csv','r',encoding="UTF-8") as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        #print(row['name'])
        stores[id] = row['name']
        id+=1

In [25]:
type(stores)

dict

### ui matrix

In [27]:
ui_csv = pd.read_csv("review_top.csv")

In [29]:
ui_csv.head()

Unnamed: 0,user,store,stars
0,Chad,China King Super Buffet,3.0
1,Chad,Panda Express,3.0
2,Margret,Gap,3.0
3,Shannon,Encore Beach Club,2.0
4,Jennifer,Wright Bar,4.0


In [36]:
ui = {}
with open("review_top.csv","r",encoding="UTF-8") as csvfile2:
    reader2 = csv.DictReader(csvfile2)
    for row in reader2:
        user = row['user']
        store = row["store"]
        rating = row['stars']
        ui.setdefault(user,{})
        ui[user][store]=float(rating)

In [38]:
type(ui)

dict

### user_list 를 뽑아 후에 indexing을 쉽게할 수 있음
- ex) user_list[0] = 'Chad'

In [39]:
user_list = [i for i in ui]

In [52]:
user_list[:5]

['Chad', 'Margret', 'Shannon', 'Jennifer', 'Nicole']

### PIP 구현에 있어 필요한 요소들

### -1. Agreement 

In [40]:
def Agreement(r1,r2):
    if (r1>Rmed and r2<Rmed) or (r1<Rmed and r2>Rmed):
        return False
    else:
        return True

### -2. Distance 

In [41]:
def Distance(r1,r2):
    agreement = Agreement(r1,r2)
    if agreement == True:
        return abs(r1-r2)
    else:
        return 2*abs(r1-r2)

### -3. Proximity

In [42]:
def Proximity(r1,r2):
    distance = Distance(r1,r2)
    return ((2*(Rmax - Rmin)+1) - distance)**2

### -4. Impact

In [43]:
def Impact(r1,r2):
    agreement = Agreement(r1,r2)
    if agreement == True:
        return (abs(r1-Rmed)+1)*(abs(r2-Rmed)+1)
    else:
        return 1/((abs(r1-Rmed)+1)*(abs(r2-Rmed)+1))

### -5. avg

In [44]:
def avg(my_list):
    total = 0.0
    for rating in my_list:
        total += rating
    return total/len(my_list)

### -6. Popularity

In [45]:
def Popularity(r1,r2,item):
    allRating = []
    for i in item_all_rating[item]:
        allRating.append(item_all_rating[item][i])
    mean = avg(allRating)
    if (r1>mean and r2>mean) or (r1<mean and r2<mean):
        return 1+(((r1+r2)/2)-mean)**2
    else:
        return 1

### -7. co-rated 찾아서 PIP Similarity 구현

In [46]:
# item 별 user 평점
result = collections.defaultdict(dict)
for person in ui:
    for item in ui[person]:
        # Flip item and person
        result[item][person] =ui[person][item]
item_all_rating = result

In [47]:
def PIP(user1,user2):
    co_rating = {}
    for item in ui[user1]:
        if item in ui[user2]:
            co_rating[item] = 1
    if len(co_rating)==0: return 0
    
    sim = 0
    for item in ui[user1]:
        if item in ui[user2]:
            r1 = ui[user1][item]
            r2 = ui[user2][item]
            sim += Proximity(r1,r2)*Impact(r1,r2)*Popularity(r1,r2,item)
            return(sim)
    #print("finsih")
    #rint("{} and {} sim is {}".format(user1,user2,sim))

### -8. PIP 단일 계산

In [49]:
PIP('Chad','Yi')

171.55555555555551

### -9. Similarity matrix 구현

In [50]:
NumUsers= len(ui)
Sim_mat = np.full((NumUsers,NumUsers),0.0)
Sim_mat

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [55]:
for u in range(0,NumUsers):
    for v in range(u,NumUsers):
        a = user_list[u]
        b = user_list[v]
        cal = PIP(a,b)
        Sim_mat[u,v] = cal
        Sim_mat[v,u] = Sim_mat[u,v]
print(Sim_mat)

[[ 101.25        128.          128.         ...    1.5
     0.            0.        ]
 [ 128.          562.04081633  180.89795918 ...    0.
     0.            0.        ]
 [ 128.          180.89795918 1165.         ...    0.
     0.            0.        ]
 ...
 [   1.5           0.            0.         ...   81.
     0.            0.        ]
 [   0.            0.            0.         ...    0.
  1327.71972656    0.        ]
 [   0.            0.            0.         ...    0.
     0.           81.        ]]
