In [1]:
from surprise import KNNBasic


def user_base_function(user_id, trainset, testset, k=10):
    knn = KNNBasic(sim_options={"user_based": True})

    knn.fit(trainset)

    pre = knn.test(testset)

    user_pre = [pred for pred in pre if pred.uid == user_id]

    user_pre.sort(key=lambda x: x.est, reverse=True)

    return [pred.iid for pred in user_pre[:k]], pre


def item_base_function(item_id, trainset, testset, k=10):
    knn = KNNBasic(sim_options={"user_based": False})

    knn.fit(trainset)

    pre = knn.test(testset)

    item_pre = [pred for pred in pre if pred.iid == item_id]

    item_pre.sort(key=lambda x: x.est, reverse=True)

    return [pred.uid for pred in item_pre[:k]], pre

In [7]:
import pandas as pd

df = pd.read_csv('data/sample_movielens_ratings.txt', sep='::', header=None,
                 names=['user_id', 'item_id', 'rating', 'timestamp'])

from surprise import Dataset, Reader

reader = Reader(rating_scale=(1, 5))

dt = Dataset.load_from_df(df.drop('timestamp', axis=1), reader)


  df = pd.read_csv('data/sample_movielens_ratings.txt', sep='::', header=None,


In [8]:
from surprise.model_selection import train_test_split

trainset, testset = train_test_split(dt, test_size=0.33, random_state=42)

In [10]:
knn = KNNBasic(sim_options={"user_based": False})

knn.fit(trainset)

pre = knn.test(testset)

df_pre = pd.DataFrame(pre)

rs = df_pre[['uid', 'iid', 'est']]

rs

Computing the msd similarity matrix...
Done computing similarity matrix.


Unnamed: 0,uid,iid,est
0,5,36,1.796682
1,1,60,1.385400
2,18,11,1.948277
3,4,67,1.649522
4,8,79,2.319807
...,...,...,...
491,22,48,1.964995
492,25,67,1.509304
493,16,98,2.056475
494,29,9,1.796343


In [11]:
top10_user, user_pre = user_base_function(0, trainset, testset, k=10)

top10_item, item_pre = item_base_function(0, trainset, testset, k=10)

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.


In [14]:
user_df = pd.DataFrame(user_pre)

item_df = pd.DataFrame(item_pre)

user_df[user_df['uid'] == 0]

Unnamed: 0,uid,iid,r_ui,est,details
8,0,37,1.0,1.429768,"{'actual_k': 12, 'was_impossible': False}"
23,0,72,1.0,2.17866,"{'actual_k': 8, 'was_impossible': False}"
51,0,26,3.0,1.166786,"{'actual_k': 10, 'was_impossible': False}"
73,0,46,1.0,1.667228,"{'actual_k': 3, 'was_impossible': False}"
117,0,96,1.0,1.802709,"{'actual_k': 10, 'was_impossible': False}"
123,0,11,1.0,1.226984,"{'actual_k': 6, 'was_impossible': False}"
150,0,41,2.0,1.893677,"{'actual_k': 7, 'was_impossible': False}"
152,0,71,1.0,1.17725,"{'actual_k': 10, 'was_impossible': False}"
172,0,34,1.0,1.871492,"{'actual_k': 6, 'was_impossible': False}"
194,0,31,1.0,1.78187,"{'actual_k': 12, 'was_impossible': False}"


In [15]:
item_df[item_df['iid'] == 0]

Unnamed: 0,uid,iid,r_ui,est,details
156,8,0,1.0,2.058788,"{'actual_k': 38, 'was_impossible': False}"
234,10,0,3.0,1.602204,"{'actual_k': 31, 'was_impossible': False}"
366,19,0,1.0,1.278661,"{'actual_k': 32, 'was_impossible': False}"
374,21,0,1.0,1.729842,"{'actual_k': 29, 'was_impossible': False}"
427,22,0,1.0,1.935781,"{'actual_k': 35, 'was_impossible': False}"
477,15,0,1.0,1.802583,"{'actual_k': 31, 'was_impossible': False}"


In [16]:
print(top10_user)

print(top10_item)

[77, 28, 29, 72, 87, 19, 44, 51, 69, 41]
[8, 22, 15, 21, 10, 19]


In [17]:
from surprise import accuracy

accuracy.rmse(pre)

RMSE: 1.1265


1.126546319577955

In [24]:
from sklearn.datasets import load_iris

import numpy as np

X, y = load_iris(return_X_y=True)


def sigmoid(z):
    return 1 / (1 + np.exp(-z))


def costFunction(X, y, theta):
    A = sigmoid(X @ theta)

    first = y * np.log(A)
    second = (1 - y) * np.log(1 - A)

    return -np.sum(first + second) / len(X)


def gradientDescent(X, y, theta, iters, alpha):
    m = len(X)
    costs = np.zeros(iters)

    for i in range(iters):
        A = sigmoid(X @ theta)
        theta = theta - (alpha / m) * X.T @ (A - y)
        cost = costFunction(X, y, theta)
        costs[i] = cost

    return costs, theta




In [25]:
theta = np.zeros(X.shape[1])

iters = 100

alpha = 0.01

costs, theta = gradientDescent(X, y, theta, iters, alpha)

In [28]:
pre = sigmoid(X @ theta)

df = pd.DataFrame()

df['测试值'] = pre.astype(int)

df['实际值'] = y

In [29]:
df

Unnamed: 0,测试值,实际值
0,0,0
1,0,0
2,0,0
3,0,0
4,0,0
...,...,...
145,0,2
146,0,2
147,0,2
148,0,2


In [None]:
`