In [8]:
# Copyright 2018 The Cornac Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Example for Nearest Neighborhood-based methods with MovieLens 100K dataset"""

import cornac
from cornac.datasets import movielens
from cornac.eval_methods import RatioSplit, CrossValidation, cross_validation
from cornac.models import MF, PMF, BPR
# from cornac.metrics import MAE, RMSE, Precision, Recall, NDCG, AUC, MAP
from cornac.metrics import MAE, RMSE, Precision, Recall, NDCG, AUC

from math import*
from decimal import Decimal

import pandas as pd
import numpy as np


In [9]:
#!/usr/bin/env python


class Similarity():

    """ Five similarity measures function """

    def euclidean_distance(self,x,y):

        """ return euclidean distance between two lists """

        return sqrt(sum(pow(a-b,2) for a, b in zip(x, y)))

    def manhattan_distance(self,x,y):

        """ return manhattan distance between two lists """

        return sum(abs(a-b) for a,b in zip(x,y))

    def minkowski_distance(self,x,y,p_value):

        """ return minkowski distance between two lists """

        return self.nth_root(sum(pow(abs(a-b),p_value) for a,b in zip(x, y)),p_value)

    def nth_root(self,value, n_root):

        """ returns the n_root of an value """

        root_value = 1/float(n_root)
        return round (Decimal(value) ** Decimal(root_value),3)

    def cosine_similarity(self,x,y):

        """ return cosine similarity between two lists """

        numerator = sum(a*b for a,b in zip(x,y))
        denominator = self.square_rooted(x)*self.square_rooted(y)
        return round(numerator/float(denominator),3)

    def square_rooted(self,x):

        """ return 3 rounded square rooted value """

        return round(sqrt(sum([a*a for a in x])),3)

    def jaccard_similarity(self,x,y):

        """ returns the jaccard similarity between two lists """

        intersection_cardinality = len(set.intersection(*[set(x), set(y)]))
        union_cardinality = len(set.union(*[set(x), set(y)]))
        return intersection_cardinality/float(union_cardinality)
    
    def zeroCounter(x):
        i = 0
        for l in x:
            if l == 0:
                i += 1
        return i
    
    def Salton_ochiai(self,x,y):

        """ returns the Salton_ochiai similarity between two lists """

        intersection_cardinality = len(set.intersection(*[set(x), set(y)]))
        lenX = len(x) - zeroCounter(x)
        lenY = len(y) - zeroCounter(y)
        multiply_cardinality = sqrt(lenX * lenY)
        # union_cardinality = len(set.union(*[set(x), set(y)]))
        return intersection_cardinality / float(multiply_cardinality) 

    def cosine_Salton_Ochiai(self, x, y, alpha=2.5):
        return (Salton_ochiai(self,x,y) ** alpha) * cosine_similarity(self,x,y)

In [11]:


K = 60  # number of nearest neighbors


# Load ML-100K dataset
# feedback = movielens.load_feedback(variant="100K")
# cornac.datasets.movielens.load_feedback()
feedback = pd.read_csv('u.data.txt', sep='\t', header=None).values
# print(type(feedback))
# Define an evaluation method to split feedback into train and test sets
#ratio_split = RatioSplit(
#    data=feedback, test_size=0.2, exclude_unknowns=True, verbose=True, seed=123
#)

cv = CrossValidation(
    data=feedback,
    n_folds=5,
  #  rating_threshold=0.5,
    partition=None,
    seed=123,
    exclude_unknowns=True,
    verbose=True,
    #item_image=item_image_modality
)

rs = RatioSplit(data=ml_100k, test_size=0.2, rating_threshold=4.0, seed=123)

# UserKNN methods
user_knn_cosine = cornac.models.UserKNN(k=K, similarity="cosine", name="UserKNN-Cosine")
user_knn_pearson = cornac.models.UserKNN(
    k=K, similarity="pearson", name="UserKNN-Pearson"
)
user_knn_amp = cornac.models.UserKNN(
    k=K, similarity="cosine", amplify=2.0, name="UserKNN-Amplified"
)
user_knn_idf = cornac.models.UserKNN(
    k=K, similarity="cosine", weighting="idf", name="UserKNN-IDF"
)
user_knn_bm25 = cornac.models.UserKNN(
    k=K, similarity="cosine", weighting="bm25", name="UserKNN-BM25"
)
# ItemKNN methods
item_knn_cosine = cornac.models.ItemKNN(k=K, similarity="cosine", name="ItemKNN-Cosine")
item_knn_pearson = cornac.models.ItemKNN(
    k=K, similarity="pearson", name="ItemKNN-Pearson"
)
item_knn_adjusted = cornac.models.ItemKNN(
    k=K, similarity="cosine", mean_centered=True, name="ItemKNN-AdjustedCosine"
)

# Put everything together into an experiment
cornac.Experiment(
    eval_method=rs,
    models=[
#         user_knn_cosine,
       # user_knn_pearson,
       # user_knn_amp,
       # user_knn_idf,
       # user_knn_bm25,
       # item_knn_cosine,
       #item_knn_pearson,
#         item_knn_adjusted,
    MF(k=10, max_iter=25, learning_rate=0.01, lambda_reg=0.02, use_bias=True, seed=123),
    PMF(k=10, max_iter=100, learning_rate=0.001, lambda_reg=0.001, seed=123),
    BPR(k=10, max_iter=200, learning_rate=0.001, lambda_reg=0.01, seed=123),
    ],
    metrics=[cornac.metrics.RMSE(),cornac.metrics.MAE()],
    user_based=True,
).run()

rating_threshold = 1.0
exclude_unknowns = True


NameError: name 'ml_100k' is not defined