In [1]:
import numpy as np
import numpy.ma as ma
from numpy import genfromtxt
from collections import defaultdict
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
import tabulate
pd.set_option("display.precision", 1)

In [2]:
# Load and preprocess the data
courses = pd.read_excel('Rated_Courses.xlsx')
users = pd.read_excel('User_Profiles.xlsx')
ratings = pd.read_excel('User_Ratings.xlsx')
y_train = ratings.rating.values

## Cleaning

In [3]:
users = users.drop('Unnamed: 0',axis=1)
ratings = ratings.drop('Unnamed: 0',axis=1)
courses = courses.drop('Unnamed: 0',axis=1)
courses['id'] = courses['id'].astype(int)

## Feature Extraction

### User Data

In [4]:
# Merging courses into ratings and gettings dummies for subcategory.
df_merged = pd.merge(ratings,courses[['id','subcategory']], left_on='courseId', right_on='id').drop_duplicates()
df_subcategory = pd.get_dummies(df_merged['subcategory'])

# Contains each user rating with one hot encoding of course subcategory
df_ratings = pd.concat([ratings, df_subcategory], axis=1)

In [5]:
# Group by userid and category, and calculate the mean rating for each group
df_user_subcategory_rating = df_merged.groupby(["userId", "subcategory"])["rating"].mean().reset_index()

# Pivot the table to have category columns with average rating values
df_user_subcategory_rating = df_user_subcategory_rating.pivot(index="userId", columns="subcategory", values="rating").reset_index()

# Rename the columns for clarity
df_user_subcategory_rating.columns.name = None

# Display the new dataframe
df_user_average_ratings = df_user_subcategory_rating.replace(np.nan, 0)
user_input = df_user_average_ratings.iloc[:,1:]
user_input

Unnamed: 0,3D & Animation,Accounting & Bookkeeping,Affiliate Marketing,Apple,Architectural Design,Arts & Crafts,Beauty & Makeup,Branding,Business Analytics & Intelligence,Business Law,...,Teacher Training,Test Prep,Travel,User Experience Design,Video & Mobile Marketing,Video Design,Vocal,Web Design,Web Development,Yoga
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,4.3,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,5.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,4.5,0.0,0.0,0.0
9996,0.0,0.0,4.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9997,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.5,0.0
9998,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0


### Courses Data

In [6]:
encoded_cols = pd.get_dummies(courses['subcategory'], prefix='')
encoded_cols = encoded_cols.rename(columns=lambda x: x.replace('_', ''))
df_courses = pd.concat([courses, encoded_cols], axis=1)

In [7]:
courses_input = df_courses.iloc[:, list([6]) + list(range(20, df_courses.shape[1]))]
courses_input.head()

Unnamed: 0,avg_rating,3D & Animation,Accounting & Bookkeeping,Affiliate Marketing,Apple,Architectural Design,Arts & Crafts,Beauty & Makeup,Branding,Business Analytics & Intelligence,...,Teacher Training,Test Prep,Travel,User Experience Design,Video & Mobile Marketing,Video Design,Vocal,Web Design,Web Development,Yoga
0,4.2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
1,4.9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
2,4.5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
3,4.3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,3.9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [8]:
num_user_features = user_input.shape[1]  # remove userid, rating count and ave rating during training
num_item_features = courses_input.shape[1] -1 # remove movie id at train time
scaledata = True  # applies the standard scalar to data if true

In [9]:
# Group by userid and category, and calculate the mean rating for each group
# df_new2 = df_merged.groupby(["userId", "subcategory"])["rating"].mean().reset_index()

# Pivot the table to have category columns with average rating values


In [10]:
# use pivot to create a dataframe with subcategory as columns, rating as values, and userId as an additional column
df_pivot = df_merged.pivot( columns="subcategory", values="rating")
# df_pivot['userId'] = df_merged['userId']
df_pivot.insert(0, 'userId', df_merged['userId'])
# reset the index and flatten the column names
# df_pivot.columns = [' '.join(col).strip() for col in df_pivot.columns.values]
# df_pivot.drop_duplicates()
# df_pivot

In [11]:
user_input

Unnamed: 0,3D & Animation,Accounting & Bookkeeping,Affiliate Marketing,Apple,Architectural Design,Arts & Crafts,Beauty & Makeup,Branding,Business Analytics & Intelligence,Business Law,...,Teacher Training,Test Prep,Travel,User Experience Design,Video & Mobile Marketing,Video Design,Vocal,Web Design,Web Development,Yoga
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,4.3,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,5.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,4.5,0.0,0.0,0.0
9996,0.0,0.0,4.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9997,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.5,0.0
9998,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0


In [12]:
cols_to_mean = df_pivot.columns[1:]
df_p = df_pivot.copy()
for col in cols_to_mean:
    df_p[f'{col}'] = df_p.groupby('userId')[col].transform('mean')
df_p = df_p.fillna(0)
user_input = df_p.fillna(0)

In [13]:
if scaledata:
    item_train_save = courses_input
    user_train_save = user_input

    scalerItem = StandardScaler()
    scalerItem.fit(courses_input)
    courses_input = scalerItem.transform(courses_input)

    scalerUser = StandardScaler()
    scalerUser.fit(user_input)
    user_input = scalerUser.transform(user_input)
    print(np.allclose(item_train_save, scalerItem.inverse_transform(courses_input)))
    print(np.allclose(user_train_save, scalerUser.inverse_transform(user_input)))

True
True


In [14]:
from sklearn.model_selection import train_test_split
item_train, item_test = train_test_split(courses_input, train_size=0.80, shuffle=True, random_state=1)
user_train, user_test = train_test_split(user_input, train_size=0.80, shuffle=True, random_state=1)
y_train, y_test       = train_test_split(y_train,    train_size=0.80, shuffle=True, random_state=1)
print(f"movie/item training data shape: {item_train.shape}")
print(f"movie/item test  data shape: {item_test.shape}")

movie/item training data shape: (27940, 131)
movie/item test  data shape: (6986, 131)


In [15]:
scaler = MinMaxScaler((-1, 1))
scaler.fit(y_train.reshape(-1, 1))
ynorm_train = scaler.transform(y_train.reshape(-1, 1))
ynorm_test = scaler.transform(y_test.reshape(-1, 1))
print(ynorm_train.shape, ynorm_test.shape)

(27940, 1) (6986, 1)


In [16]:
# GRADED_CELL
# UNQ_C1

num_outputs = 32
tf.random.set_seed(1)
user_NN = tf.keras.models.Sequential([
    ### START CODE HERE ###   
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(num_outputs),
    ### END CODE HERE ###  
])

item_NN = tf.keras.models.Sequential([
    ### START CODE HERE ###     
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(num_outputs),
    ### END CODE HERE ###  
])

# create the user input and point to the base network
input_user = tf.keras.layers.Input(shape=(num_user_features))
vu = user_NN(input_user)
vu = tf.linalg.l2_normalize(vu, axis=1)

# create the item input and point to the base network
input_item = tf.keras.layers.Input(shape=(num_item_features))
vm = item_NN(input_item)
vm = tf.linalg.l2_normalize(vm, axis=1)

# compute the dot product of the two vectors vu and vm
output = tf.keras.layers.Dot(axes=1)([vu, vm])

# specify the inputs and output of the model
model = tf.keras.Model([input_user, input_item], output)

model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 130)]        0           []                               
                                                                                                  
 input_2 (InputLayer)           [(None, 130)]        0           []                               
                                                                                                  
 sequential (Sequential)        (None, 32)           70560       ['input_1[0][0]']                
                                                                                                  
 sequential_1 (Sequential)      (None, 32)           70560       ['input_2[0][0]']                
                                                                                              

In [17]:
tf.random.set_seed(1)
cost_fn = tf.keras.losses.MeanSquaredError()
opt = keras.optimizers.Adam(learning_rate=0.01)
model.compile(optimizer=opt,
              loss=cost_fn)

In [18]:
tf.random.set_seed(1)
model.fit([user_train[:,1:], item_train[:,1:]], ynorm_train, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 100/100


<keras.callbacks.History at 0x15702f57c70>

In [19]:
model.evaluate([user_test[:, 1:], item_test[:, 1:]], ynorm_test)



0.2895253896713257

In [20]:
def gen_user_vecs(user_vec, num_items):
    """ given a user vector return:
        user predict maxtrix to match the size of item_vecs """
    user_vecs = np.tile(user_vec, (num_items, 1))
    return (user_vecs)

In [21]:
new_user = [0]*131
new_user[0] = 10000
new_user[4] = 4.5
user_vec = np.array([new_user])
user_vecs = gen_user_vecs(user_vec,len(item_train))

In [22]:
item_train

array([[ 1.04394780e-01, -1.37276391e-01, -9.58573258e-02, ...,
        -8.42224768e-02, -1.61519316e-01, -8.85947050e-02],
       [ 2.92158426e-02, -1.37276391e-01, -9.58573258e-02, ...,
        -8.42224768e-02, -1.61519316e-01, -8.85947050e-02],
       [-2.41946886e+00, -1.37276391e-01, -9.58573258e-02, ...,
        -8.42224768e-02, -1.61519316e-01, -8.85947050e-02],
       ...,
       [-3.00369297e-03, -1.37276391e-01, -9.58573258e-02, ...,
        -8.42224768e-02, -1.61519316e-01, -8.85947050e-02],
       [ 5.16088659e-01, -1.37276391e-01, -9.58573258e-02, ...,
        -8.42224768e-02, -1.61519316e-01, -8.85947050e-02],
       [ 2.86972127e-01, -1.37276391e-01, -9.58573258e-02, ...,
         1.18733150e+01, -1.61519316e-01, -8.85947050e-02]])

In [23]:
def predict_uservec(user_vecs, item_vecs, model, scaler, ScalerUser, ScalerItem, scaledata=False):
    """ given a user vector, does the prediction on all movies in item_vecs returns
        an array predictions sorted by predicted rating,
        arrays of user and item, sorted by predicted rating sorting index
    """
    print(item_vecs)
    if scaledata:
        scaled_user_vecs = ScalerUser.transform(user_vecs)
        #scaled_item_vecs = ScalerItem.transform(item_vecs)
        y_p = model.predict(
            [scaled_user_vecs[:, 1:], item_vecs[:, 1:]])
    else:
        y_p = model.predict([user_vecs[:, 1:], item_vecs[:, 1:]])
    y_pu = scaler.inverse_transform(y_p)
    if np.any(y_pu < 0):
        print("Error, expected all positive predictions")
    # negate to get largest rating first
    sorted_index = np.argsort(-y_pu, axis=0).reshape(-1).tolist()
    sorted_ypu = y_pu[sorted_index]
    sorted_items = item_vecs[sorted_index]
    sorted_user = user_vecs[sorted_index]
    print(item_vecs)
    return (sorted_index, sorted_ypu, sorted_items, sorted_user)

In [41]:
item_train_save.iloc[10726,:].to_dict()

{'avg_rating': 0.0,
 '3D & Animation': 0.0,
 'Accounting & Bookkeeping': 0.0,
 'Affiliate Marketing': 0.0,
 'Apple': 0.0,
 'Architectural Design': 0.0,
 'Arts & Crafts': 0.0,
 'Beauty & Makeup': 0.0,
 'Branding': 0.0,
 'Business Analytics & Intelligence': 0.0,
 'Business Law': 0.0,
 'Business Strategy': 0.0,
 'Career Development': 0.0,
 'Commercial Photography': 0.0,
 'Communication': 0.0,
 'Compliance': 0.0,
 'Content Marketing': 0.0,
 'Creativity': 0.0,
 'Cryptocurrency & Blockchain': 0.0,
 'Dance': 0.0,
 'Data Science': 0.0,
 'Database Design & Development': 0.0,
 'Design Tools': 0.0,
 'Digital Marketing': 0.0,
 'Digital Photography': 0.0,
 'E-Commerce': 0.0,
 'Economics': 0.0,
 'Engineering': 0.0,
 'Entrepreneurship': 0.0,
 'Esoteric Practices': 0.0,
 'Fashion Design': 0.0,
 'Finance': 0.0,
 'Finance Cert & Exam Prep': 0.0,
 'Financial Modeling & Analysis': 0.0,
 'Fitness': 0.0,
 'Food & Beverage': 0.0,
 'Game Design': 0.0,
 'Game Development': 0.0,
 'Gaming': 0.0,
 'General Health

In [24]:
sorted_index, sorted_ypu, sorted_items, sorted_user = predict_uservec(user_vecs, item_train, model, scaler, scalerUser, scalerItem, scaledata=scaledata)

[[ 1.04394780e-01 -1.37276391e-01 -9.58573258e-02 ... -8.42224768e-02
  -1.61519316e-01 -8.85947050e-02]
 [ 2.92158426e-02 -1.37276391e-01 -9.58573258e-02 ... -8.42224768e-02
  -1.61519316e-01 -8.85947050e-02]
 [-2.41946886e+00 -1.37276391e-01 -9.58573258e-02 ... -8.42224768e-02
  -1.61519316e-01 -8.85947050e-02]
 ...
 [-3.00369297e-03 -1.37276391e-01 -9.58573258e-02 ... -8.42224768e-02
  -1.61519316e-01 -8.85947050e-02]
 [ 5.16088659e-01 -1.37276391e-01 -9.58573258e-02 ... -8.42224768e-02
  -1.61519316e-01 -8.85947050e-02]
 [ 2.86972127e-01 -1.37276391e-01 -9.58573258e-02 ...  1.18733150e+01
  -1.61519316e-01 -8.85947050e-02]]
  1/874 [..............................] - ETA: 1:30



[[ 1.04394780e-01 -1.37276391e-01 -9.58573258e-02 ... -8.42224768e-02
  -1.61519316e-01 -8.85947050e-02]
 [ 2.92158426e-02 -1.37276391e-01 -9.58573258e-02 ... -8.42224768e-02
  -1.61519316e-01 -8.85947050e-02]
 [-2.41946886e+00 -1.37276391e-01 -9.58573258e-02 ... -8.42224768e-02
  -1.61519316e-01 -8.85947050e-02]
 ...
 [-3.00369297e-03 -1.37276391e-01 -9.58573258e-02 ... -8.42224768e-02
  -1.61519316e-01 -8.85947050e-02]
 [ 5.16088659e-01 -1.37276391e-01 -9.58573258e-02 ... -8.42224768e-02
  -1.61519316e-01 -8.85947050e-02]
 [ 2.86972127e-01 -1.37276391e-01 -9.58573258e-02 ...  1.18733150e+01
  -1.61519316e-01 -8.85947050e-02]]


In [48]:
scalerItem.inverse_transform(sorted_items)[0]

array([ 4.50000000e+00,  0.00000000e+00,  0.00000000e+00, -8.67361738e-19,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00, -8.67361738e-19,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  8.67361738e-19,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00, -8.67361738e-19,
        8.67361738e-19,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00, -3.46944695e-18,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  

In [64]:
len(sorted_index)

27940

In [63]:
courses.reindex(sorted_index)

Unnamed: 0,id,title,is_paid,price,headline,num_subscribers,avg_rating,num_reviews,num_comments,num_lectures,content_length_min,published_time,last_update_date,category,subcategory,topic,language,course_url,instructor_name,instructor_url
10726,2748848,دورة التخطيط للعام,True,20.0,كيف تجعل من هذه السنة أفضل سنوات حياتك,0,0.0,0,0,9,126,2020-01-12T17:28:04Z,2020-01-09,Personal Development,Personal Productivity,Self-Discipline,Arabic,/course/plan-your-newyear/,Abdullah Alshehri,/user/abdullahalshehri4/
233,4702710,Aerodinamica alle Alte Velocità,True,60.0,Idoneo alla preparazione per l'esame per il co...,4,5.0,3,0,10,134,2022-07-11T12:34:12Z,2022-07-11,Teaching & Academics,Engineering,Aerospace Engineering,Italian,/course/aerodinamica-alle-alte-velocita/,Adriano Biason,/user/adriano-biason/
15750,3891020,MTA Security Fundamentals(MTA 98-367) Preparat...,True,1.0,Preparation test for Microsoft MTA Security Fu...,6,3.5,1,0,0,0,2021-03-08T23:23:30Z,2021-03-04,IT & Software,IT Certifications,Microsoft Certification,English,/course/mta-security-fundamentalsmta-98-367-pr...,Partho Kumar Saha,/user/partho-kumar-saha/
20656,4536234,Naučite MS Excel: Od početnika do eksperta (en),True,100.0,Naučite kako da izvučete maksimum iz Eksela i ...,13,5.0,4,3,176,1249,2022-02-15T19:52:40Z,2022-02-16,Office Productivity,Microsoft,Excel,Serbian,/course/naucite-ms-excel-od-pocetnika-do-ekspe...,Darko Gavrić,/user/darko-gavric-5/
15731,3431474,Personal SWOT Analysis in 30 minutes,False,0.0,"Find strengths, weaknesses, opportunities & th...",812,4.2,20,7,11,70,2020-08-24T16:22:21Z,2022-02-11,Personal Development,Career Development,SWOT Analysis,English,/course/personal-swot-analysis/,Mayur Pangrekar,/user/mayur-pangrekar/
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1135,4219334,Marketplace Hack Shopee Strategi Optimasi Toko...,True,279.0,strategi yang tepat untuk mendatangkan traffic...,17,4.6,5,1,11,120,2021-08-12T19:04:34Z,2021-08-12,Marketing,Digital Marketing,E-Commerce,Indonesian,/course/marketplace-hack-strategi-optimasi-mar...,Sasana Digital,/user/sasana-solusi-digital/
6458,3978590,Learn Acting Work in TV Drama & Films,True,20.0,Acting for TV Drama,7,5.0,3,1,7,41,2021-10-08T02:12:51Z,2021-10-05,Lifestyle,Arts & Crafts,Acting,Urdu,/course/learn-acting-work-in-tv-drama-films/,Abdul Majid,/user/abdul-majid-1832/
5416,1433692,High End Image Editing with Adobe Photoshop CS6,True,200.0,Anyone looking to develop expertise in high-en...,1397,3.5,42,11,15,89,2017-11-28T00:22:23Z,2017-11-21,Design,Graphic Design & Illustration,Photoshop Retouching,English,/course/high-end-image-editing-with-adobe-phot...,MD Hossain,/user/abul-hossain/
26353,4716634,【上場準備】上場準備におけるリーダーシップ,True,2.0,周りを巻き込み全員で上場を勝ち取るリーダーシップを学ぶ,9,0.0,0,0,24,57,2022-06-03T12:14:14Z,2022-06-03,Business,Management,Leadership,Japanese,/course/ifalotvj/,Hisashi Tanda,/user/dan-tian-jiu-si/


In [34]:
sorted_items

array([[ 0.48028934, -0.13727639, -0.09585733, ..., -0.08422248,
        -0.16151932, -0.08859471],
       [-0.16410137, -0.13727639, -0.09585733, ..., -0.08422248,
        -0.16151932, -0.08859471],
       [-2.41946886, -0.13727639, -0.09585733, ..., -0.08422248,
        -0.16151932, -0.08859471],
       ...,
       [ 0.51250888, -0.13727639, -0.09585733, ..., -0.08422248,
        -0.16151932, -0.08859471],
       [ 0.50415564, -0.13727639, -0.09585733, ..., -0.08422248,
        -0.16151932, -0.08859471],
       [-0.0996623 , -0.13727639, -0.09585733, ..., -0.08422248,
        -0.16151932, -0.08859471]])

In [26]:
movie_dict = defaultdict(dict,courses.to_dict(orient='index')) 

In [27]:
def print_pred_movies(y_p, item, movie_dict, maxcount=10):
    """ print results of prediction of a new user. inputs are expected to be in
        sorted order, unscaled. """
    count = 0
    movies_listed = defaultdict(int)
    disp = [["y_p", "id", "title", "subcategory"]]
    item = scalerItem.inverse_transform(item)
    print(item[0])
    for i in range(0, y_p.shape[0]):
        if count == maxcount:
            break
        count += 1
        movie_id = item[i, 0].astype(int)
        if movie_id in movies_listed:
            continue
        movies_listed[movie_id] = 1
        disp.append([y_p[i, 0], movie_id, item[i, 2].astype(float),
                    movie_dict[movie_id]['title'], movie_dict[movie_id]['subcategory']])
    table = tabulate.tabulate(disp, tablefmt='html', headers="firstrow")
    return (table)

In [42]:
print_pred_movies(sorted_ypu, sorted_items, movie_dict, maxcount = 10)

[ 4.50000000e+00  0.00000000e+00  0.00000000e+00 -8.67361738e-19
  0.00000000e+00  0.00000000e+00  0.00000000e+00 -8.67361738e-19
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  8.67361738e-19  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00 -8.67361738e-19
  8.67361738e-19  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00 -3.46944695e-18
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000

Unnamed: 0,y_p,id,title,subcategory
4.86168,4,0,Fundamentals of Scope and Requirements,Management
4.86168,3,0,Improve your WEST freestyle technique- from 200 m to 1000 m,Sports
4.86168,0,0,DSLR Video For Beginners,Video Design
4.86168,5,0,How To Write A Simple And Successful Business Plan,Entrepreneurship
