# Exploiting Collaborative Preferences

Publication information: Martins, D. M. L., Vossen, G., & Maleszka, M. (2018, October). Supporting Online Data Purchase by Preference Recommendation. In 2018 IEEE International Conference on Systems, Man, and Cybernetics (SMC) (pp. 3703-3708). IEEE.

URL: https://ieeexplore.ieee.org/document/8616624

Publication's BibTeX:

### Configuring notebook

In [1]:
cd ..

C:\Users\d_mart04\Documents\CodeRepositoryGit\enablingnontechsdb\experiments


In [2]:
cd ..

C:\Users\d_mart04\Documents\CodeRepositoryGit\enablingnontechsdb


In [3]:
import sys, os
sys.path.append('..\\sada')
sys.path.append('..\\decision')

In [4]:
import pandas as pd
from datamanagement.dataaccessobject import DataAccessObject, Dataset
from decision.clause import *
from decision.criterion import Criterion
from decision.somselector import SomSelector
from sada.decisionsada import DecisionSADA

### Loading car dataset

In [5]:
DAO = DataAccessObject()

In [6]:
dataset = DAO.get_car_dataset()

  self.data_matrix = preprocessed_data.as_matrix()


In [7]:
dataset.preprocessed_data.columns

Index(['price', 'mpg', 'num_of_cylinders', 'horsepower', 'fuel_tank_capacity',
       'RPM', 'Wheelbase', 'Weight', 'automatic_gearbox', 'passenger_capacity',
       'length', 'width', 'luggage_capacity', 'Origin', 'Compact', 'Large',
       'Midsize', 'Small', 'Sporty', 'Van', 'Acura', 'Audi', 'BMW', 'Buick',
       'Cadillac', 'Chevrolet', 'Chrysler', 'Dodge', 'Eagle', 'Ford', 'Geo',
       'Honda', 'Hyundai', 'Infiniti', 'Lexus', 'Lincoln', 'Mazda', 'Mercury',
       'Mitsubishi', 'Nissan', 'Oldsmobile', 'Plymouth', 'Pontiac', 'Saab',
       'Saturn', 'Subaru', 'Suzuki', 'Toyota', 'Volkswagen', 'Volvo', '4WD',
       'Front', 'Rear', 'None', 'rear_seat_room', 'driver_passenger',
       'mercedes_benz', 'driver_only'],
      dtype='object')

### Creating SADA

In [8]:
sada = DecisionSADA(dataset)

### Creating the decision database

In [9]:
def print_preferences(preferences):
    for pref in preferences:
        print(pref.to_string())

class DecisionEntry(object):
    def __init__(self, preferences, criteria, optimal_candidates, success):
        self.preferences = preferences
        self.criteria = criteria
        self.optimal_candidates = optimal_candidates
        self.success = success
    
def build_historical_data(sada):
    b_1 = DecisionEntry(preferences = [DiadicClause('Volkswagen', Operation.EQUALS, 1), DiadicClause('mpg', Operation.GREATER_THAN_EQUALS, 0.098)],
                        criteria = [Criterion('price', maximize=False, weight=0.6), Criterion('mpg', maximize=True, weight=0.4)],
                        optimal_candidates = None, success = True)
    
    b_2 = DecisionEntry(preferences = [DiadicClause('price', Operation.LESS_THAN_EQUALS, 0.231193), DiadicClause('Sporty', Operation.EQUALS, 1), DiadicClause('Origin', Operation.EQUALS, 0)],
                        criteria = [Criterion('price', maximize=False, weight=0.6), Criterion('horsepower', maximize=True, weight=0.4)],
                        optimal_candidates = None, success = True)

    b_3 = DecisionEntry(preferences = [DiadicClause('driver_passenger', Operation.EQUALS, 1), DiadicClause('Front', Operation.EQUALS, 1)],
                        criteria = [Criterion('price', maximize=False, weight=0.6), Criterion('passenger_capacity', maximize=True, weight=0.4)],
                        optimal_candidates = None, success = 0)

    b_4 = DecisionEntry(preferences = [DiadicClause('price', Operation.LESS_THAN_EQUALS, 0.01), DiadicClause('Small', Operation.EQUALS, 1)],
                        criteria = [Criterion('price', maximize=False, weight=0.6), Criterion('mpg', maximize=True, weight=0.4)],
                        optimal_candidates = None, success = 0)

    b_5 = DecisionEntry(preferences = [DiadicClause('price', Operation.LESS_THAN_EQUALS, 0.03), DiadicClause('Small', Operation.EQUALS, 1)],
                        criteria = [Criterion('length', maximize=False, weight=0.6), Criterion('mpg', maximize=True, weight=0.4)],
                        optimal_candidates = None, success = 1)

    b_6 = DecisionEntry(preferences = [DiadicClause('Rear', Operation.EQUALS, 1), DiadicClause('Sporty', Operation.EQUALS, 1), DiadicClause('passenger_capacity', Operation.EQUALS, 0)],
                        criteria = [Criterion('RPM', maximize=True, weight=0.6), Criterion('horsepower', maximize=True, weight=0.4)],
                        optimal_candidates = None, success = 1)

    b_7 = DecisionEntry(preferences = [DiadicClause('Compact', Operation.EQUALS, 1), DiadicClause('passenger_capacity', Operation.GREATER_THAN_EQUALS, 0.5), DiadicClause('Rear', Operation.EQUALS, 1)],
                        criteria = [Criterion('mpg', maximize=True, weight=0.6), Criterion('price', maximize=False, weight=0.4)],
                        optimal_candidates = None, success = 0)

    b_8 = DecisionEntry(preferences = [DiadicClause('luggage_capacity', Operation.GREATER_THAN_EQUALS, 0.875)],
                        criteria = [Criterion('mpg', maximize=True, weight=0.6), Criterion('fuel_tank_capacity', maximize=True, weight=0.4)],
                        optimal_candidates = None, success = 1)
    
    previous_decisions = [b_1, b_2, b_3, b_4, b_5, b_6, b_7, b_8]
    
    for b in previous_decisions:
        selected, optimal = sada.get_recommendations(b.preferences, b.criteria)
        b.optimal_candidates = optimal
    
    return previous_decisions

In [10]:
previous_decisions = build_historical_data(sada)

### Current buyer

In [11]:
#preferences=[DiadicClause('price', Operation.LESS_THAN_EQUALS, 0.08), #6000
#        DiadicClause('horsepower', Operation.GREATER_THAN_EQUALS, 0.4)] #150

#criteria = [Criterion('price', maximize=False, weight=0.6),
#            Criterion('horsepower', maximize=True, weight=0.4)]

preferences=[DiadicClause('Compact', Operation.EQUALS, 0), DiadicClause('Origin', Operation.EQUALS, 0), DiadicClause('price', Operation.LESS_THAN_EQUALS, 0.08), DiadicClause('mpg', Operation.LESS_THAN_EQUALS, 0.41)]

criteria = [Criterion('mpg', maximize=True, weight=0.5), Criterion('price', maximize=False, weight=0.5)]


current_buyer = DecisionEntry(preferences=preferences, criteria=criteria, 
                              optimal_candidates=[], success=False)

### Select and evaluate cantidates

In [12]:
selected, current_buyer.optimal_candidates = sada.get_recommendations(current_buyer.preferences, current_buyer.criteria)

In [13]:
selected

Unnamed: 0,make,manufacturer,type,price,mpg,num_of_cylinders,horsepower,fuel_tank_capacity,RPM,Wheelbase,...,Weight,automatic_gearbox,passenger_capacity,length,width,luggage_capacity,AirBags,DriveTrain,Origin,imagepath
24,Dodge Shadow,Dodge,Small,5650.0,26.0,4,93,14.0,4800,97,...,2670,0,5,172,67,13.0,DriverOnly,Front,0,24Dodge_Shadow.jpg
32,Ford Escort,Ford,Small,5050.0,26.5,4,127,13.2,6500,98,...,2530,0,5,171,67,12.0,,Front,0,32Ford_Escort.jpg
23,Dodge Colt,Dodge,Small,4600.0,31.0,4,92,13.2,6000,98,...,2270,0,5,174,66,11.0,,Front,0,23dodge_colt.jpg
29,Eagle Summit,Eagle,Small,6100.0,31.0,4,92,13.2,6000,98,...,2295,0,5,174,66,11.0,,Front,0,29Eagle_Summit.jpg
35,Ford Probe,Ford,Sporty,7000.0,27.0,4,115,15.5,5500,103,...,2710,0,4,179,70,18.0,DriverOnly,Front,0,35Ford_Probe.jpg


In [14]:
current_buyer.optimal_candidates[['make', 'type', 'price', 'horsepower', 'mpg']]

Unnamed: 0,make,type,price,horsepower,mpg
23,Dodge Colt,Small,4600.0,92,31.0
29,Eagle Summit,Small,6100.0,92,31.0


### Finding the most similar buyers from the decision database

In [15]:
def create_som_vector(buyer, vector_size):
    vector = []
    for i in range(vector_size):
        if len(buyer.optimal_candidates) > i:
            vector.append(int(buyer.optimal_candidates.index[i]))
        else:
            vector.append(-1)
    return vector

def create_som_dataset(previous_decisions, vector_size):
    som_data = []
    for p in previous_decisions:
        vector = create_som_vector(p, vector_size)
        som_data.append(vector)

    som_dataset = Dataset(som_data, pd.DataFrame(som_data))
    return som_dataset

def get_jaccard_most_similar(previous_decisions, current_buyer):
    current_optimal = set(current_buyer.optimal_candidates['make'].values)
    history = {}
    for i in range(len(previous_decisions)):
        history[i] = set(previous_decisions[i].optimal_candidates['make'])
    similarity = []
    for k,v in history.items():
        jaccard = len(current_optimal & v)/len(current_optimal | v)
        similarity.append(jaccard)
    
    most_similar = similarity.index(max(similarity))
    complementary_candidates = history[most_similar] - (current_optimal & history[most_similar])
    return most_similar, complementary_candidates

In [16]:
som_selector = SomSelector(som_size=(3, 3), num_iterations=1000)

In [17]:
current_buyer_vector = create_som_vector(current_buyer, 5)
som_dataset = create_som_dataset(previous_decisions, 5)

In [18]:
similar_indices = som_selector.select(current_buyer_vector, som_dataset, num_selected_items=3)

In [19]:
most_similar_decisions = [previous_decisions[i] for i in similar_indices]

### Creating reject preference short-term memory

In [20]:
rejected_preferences = []

### Calculating preference weights

In [21]:
tau_increasing_factor = 1

In [22]:
all_preferences = []

In [23]:
for dec in most_similar_decisions:
    for pref in dec.preferences:
        if not pref in all_preferences:
            weight = 0
            if (not pref in current_buyer.preferences) and (not pref in rejected_preferences) and dec.success:
                weight += tau_increasing_factor
            entry = (pref, weight)
            all_preferences.append(entry)

In [24]:
# Sort preferences
all_preferences = [pref for pref in sorted(all_preferences, key=lambda pref: pref[1], reverse=True)]

In [25]:
relevant_preferences = [pref[0] for pref in all_preferences if pref[1] > 0]
print_preferences(relevant_preferences)

('price', <built-in function le>, 0.231193)
('Sporty', <built-in function eq>, 1)
('Origin', <built-in function eq>, 0)
('price', <built-in function le>, 0.03)
('Small', <built-in function eq>, 1)


In [26]:
print_preferences(current_buyer.preferences)

('Compact', <built-in function eq>, 0)
('Origin', <built-in function eq>, 0)
('price', <built-in function le>, 0.08)
('mpg', <built-in function le>, 0.41)


### Recommend the preference with highest weight that generates a list of diverse optimal offers

In [27]:
recommended_preferences = []

In [28]:
for pref in relevant_preferences:
    if pref in recommended_preferences:
        continue
    else:
        extended_preferences = [pref]
        extended_preferences.extend(current_buyer.preferences)
        sel, opt = sada.get_recommendations(query=extended_preferences, criteria=current_buyer.criteria)
        A = set(current_buyer.optimal_candidates.index.tolist())
        B = set(opt.index.tolist())
        if A != B: # Check wheater the preference produces a change into the optimal candidates retrieved by SADA
            recommended_preferences.append(pref)
        #if set(current_buyer.optimal_candidates.index.tolist()) != set(opt.index.tolist()):
            #recommended_preferences.append(pref)

In [29]:
print_preferences(recommended_preferences)

('Sporty', <built-in function eq>, 1)
('price', <built-in function le>, 0.03)
('Small', <built-in function eq>, 1)


### Update buyer preferences

In [30]:
current_buyer.preferences.append(recommended_preferences[0])

In [31]:
selected_recommended, optimal_recommended = sada.get_recommendations(query=current_buyer.preferences, criteria=current_buyer.criteria)

In [32]:
selected_recommended

Unnamed: 0,make,manufacturer,type,price,mpg,num_of_cylinders,horsepower,fuel_tank_capacity,RPM,Wheelbase,...,Weight,automatic_gearbox,passenger_capacity,length,width,luggage_capacity,AirBags,DriveTrain,Origin,imagepath
35,Ford Probe,Ford,Sporty,7000.0,27.0,4,115,15.5,5500,103,...,2710,0,4,179,70,18.0,DriverOnly,Front,0,35Ford_Probe.jpg
60,Mercury Capri,Mercury,Sporty,7050.0,24.5,4,100,11.1,5750,95,...,2450,0,4,166,65,6.0,DriverOnly,Front,0,60mercury_capri.jpg
72,Plymouth Laser,Plymouth,Sporty,7200.0,26.5,4,92,15.9,5000,97,...,2640,0,4,173,67,8.0,,4WD,0,72plymouth_laser.jpg
14,Chevrolet Camaro,Chevrolet,Sporty,7550.0,23.5,6,160,15.5,4600,101,...,3240,0,4,193,74,13.0,DriverAndPassenger,Rear,0,14chevrolet_camaro.jpg
34,Ford Mustang,Ford,Sporty,7950.0,25.5,4,105,15.4,4600,101,...,2850,0,4,180,68,12.0,DriverOnly,Rear,0,34ford_mustang.jpg


In [33]:
optimal_recommended[['make', 'type', 'price', 'horsepower', 'mpg']]

Unnamed: 0,make,type,price,horsepower,mpg
35,Ford Probe,Sporty,7000.0,115,27.0
