In [1]:
# Import necessary libraries
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.layers import Input, Dense, Embedding, Concatenate
from tensorflow.keras.models import Model

In [2]:
# Load and preprocess the data
courses = pd.read_csv('Sample_Udemy.csv')
users = pd.read_excel('User_Profiles.xlsx')
ratings = pd.read_excel('User_Ratings.xlsx')

In [3]:
users = users.drop('Unnamed: 0',axis=1)
ratings = ratings.drop('Unnamed: 0',axis=1)
courses['id'] = courses['id'].astype(int)

In [4]:
courses.head()

Unnamed: 0,id,title,is_paid,price,headline,num_subscribers,avg_rating,num_reviews,num_comments,num_lectures,content_length_min,published_time,last_update_date,category,subcategory,topic,language,course_url,instructor_name,instructor_url
0,1981330,"ZombieLee's Web Hacking (XSS , SQL Injection)",False,0.0,This course is designed for web hacking skills...,32523.0,3.55,330.0,46.0,8.0,41.0,2018-10-23T16:07:57Z,2021-06-13,IT & Software,Network & Security,Ethical Hacking,English,/course/lees-web-hacking-cross-site-scripting-...,KYOUNG SOO LEE,/user/igyeongsu/
1,1666706,LOCATIONZ : landscape lessons on location & po...,True,129.99,Kick-start your landscape photography with kil...,363.0,4.6,63.0,15.0,45.0,288.0,2018-04-29T19:10:12Z,2018-09-06,Photography & Video,Digital Photography,Landscape Photography,English,/course/locationz/,Ray Salisbury,/user/ray-salisbury/
2,2083392,Curso Completo de Edição e Gravação de Vídeos ...,True,529.9,"Aprenda Planejar, Produzir, Gravar e Editar Ví...",5134.0,4.688172,1780.0,554.0,130.0,673.0,2019-04-04T20:20:49Z,2022-04-11,Photography & Video,Video Design,Movavi,Portuguese,/course/curso-completo-de-edicao-e-gravacao-de...,Erick A. I. Souza,/user/erick-augusto-izidoro-souza/
3,4316848,Certified Information System Security Professi...,True,24.99,Latest 100+ Test Questions for Certified Infor...,2689.0,3.7,11.0,0.0,0.0,0.0,2021-09-30T07:35:56Z,2021-09-25,IT & Software,IT Certifications,CISSP - Certified Information Systems Security...,English,/course/certified-information-system-security-...,Abhishek Gupta,/user/abhishek-gupta-1607/
4,1612264,프로들만의 차별화된 엑셀 데이터 분석과 차트 시각화,True,48.0,"Amazing excel skills, differentiated data anal...",548.0,4.8,113.0,33.0,49.0,353.0,2018-03-29T19:44:26Z,2022-01-19,Office Productivity,Microsoft,Excel,Korean,/course/amazing_excel_skills/,kyongja kim,/user/gimgyeongja/


In [5]:
# Merge course features and user info data
data = pd.merge(ratings, users, left_on='userId', right_on='id')
data = pd.merge(data, courses, left_on='courseId', right_on='id')

In [6]:
data.head()

Unnamed: 0,userId,courseId,rating,timestamp,id_x,name,age,interests,courses,id_y,...,content_length_min,published_time,last_update_date,category,subcategory,topic,language,course_url,instructor_name,instructor_url
0,0,2871816,4.5,2021-11-12 03:46:05,0,Ryan Nguyen,50,"['Office Productivity', 'Design']","[(2871816, 4.5, '2021-11-12 03:46:05'), (31810...",2871816,...,115.0,2020-04-04T15:52:46Z,2020-08-15,Office Productivity,Google,Gmail Productivity,English,/course/accomplish-more-using-gmail-become-a-g...,Giuseppe Schiorlin,/user/giuseppe-schiorlin/
1,0,3181020,4.5,2021-11-27 13:49:46,0,Ryan Nguyen,50,"['Office Productivity', 'Design']","[(2871816, 4.5, '2021-11-12 03:46:05'), (31810...",3181020,...,362.0,2020-06-09T17:49:35Z,2022-05-24,Office Productivity,Microsoft,Microsoft Office 365,English,/course/pgdca_certificate/,Surya Narayan Moharana,/user/surya-narayan-moharana-2/
2,0,128786,4.5,2021-08-19 02:35:57,0,Ryan Nguyen,50,"['Office Productivity', 'Design']","[(2871816, 4.5, '2021-11-12 03:46:05'), (31810...",128786,...,1199.0,2013-12-05T18:33:00Z,2022-01-18,Office Productivity,Oracle,PL/SQL,English,/course/oracle-plsql-fundamentals-vol-i-ii/,Intellezy Trainers,/user/intellezy/
3,0,3019936,4.0,2021-06-09 11:26:27,0,Ryan Nguyen,50,"['Office Productivity', 'Design']","[(2871816, 4.5, '2021-11-12 03:46:05'), (31810...",3019936,...,188.0,2020-04-27T11:34:09Z,2020-10-15,Design,Design Tools,Fusion 360,Polish,/course/fusion-360-podstawy-modelowania-3d/,Damian Lewczuk,/user/damian-lewczuk-2/
4,1,1593192,3.0,2022-03-01 04:09:28,1,Richard Wilson,50,"['Photography & Video', 'Health & Fitness']","[(1593192, 3.0, '2022-03-01 04:09:28'), (46178...",1593192,...,156.0,2018-03-13T22:29:22Z,2021-08-14,Photography & Video,Video Design,Video Production,English,/course/professional-video-production/,Skills Gap Trainer,/user/dorincernat/


In [7]:
# Preprocess the data
data = data.drop_duplicates(subset=['userId', 'courseId']) # Remove duplicates
data = data.dropna() # Remove missing values
data['interests'] = data['interests'].str.split(',').apply(lambda x: len(x)) # Convert interests to numerical data
data['published_time'] = pd.to_datetime(data['published_time']) # Convert published_time to datetime format
data['last_update_date'] = pd.to_datetime(data['last_update_date']) # Convert last_update_date to datetime format
# data['course_age'] = (data['last_update_date'] - data['published_time']).dt.days # Compute course age
data = data.drop(columns=['timestamp', 'last_update_date', 'published_time']) # Remove unnecessary columns

In [8]:
# Split the data into training and testing sets
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

In [9]:
# Feature Extraction
course_tags = courses['topic'].str.get_dummies() # Extract tags from course topic
user_interests = users['interests'].str.get_dummies() # Extract interests from user info

In [10]:
# Build the model
course_input = Input(shape=(course_tags.shape[1],))
user_input = Input(shape=(user_interests.shape[1],))
concatenated = Concatenate()([course_input, user_input])
dense1 = Dense(128, activation='relu')(concatenated)
dense2 = Dense(64, activation='relu')(dense1)
output = Dense(1)(dense2)
model = Model(inputs=[course_input, user_input], outputs=output)
model.compile(optimizer='adam', loss='mse')

In [26]:
data.shape

(36, 26)

In [21]:
course_tags

Unnamed: 0,.NET,2D Animation,2D Game Development,360 video,3D Animation,3D Coat,3D Dynamics,3D Environment Modeling,3D Game Development,3D Lighting,...,ideCAD,isiXhosa Language,jQuery,jQuery Image Gallery,macOS,monday.com,ownCloud,pfSense,pytest,zBrush
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20968,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
20969,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
20970,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
20971,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [14]:
# Train the model
history = model.fit([train_data[course_tags.columns], train_data[user_interests.columns]], train_data['rating'], epochs=10, batch_size=32, validation_data=([test_data[course_tags.columns], test_data[user_interests.columns]], test_data['rating']))


KeyError: "None of [Index(['.NET', '2D Animation', '2D Game Development', '360 video',\n       '3D Animation', '3D Coat', '3D Dynamics', '3D Environment Modeling',\n       '3D Game Development', '3D Lighting',\n       ...\n       'ideCAD', 'isiXhosa Language', 'jQuery', 'jQuery Image Gallery',\n       'macOS', 'monday.com', 'ownCloud', 'pfSense', 'pytest', 'zBrush'],\n      dtype='object', length=2766)] are in the [columns]"

In [None]:
# Evaluate the model
train_loss = model.evaluate([train_data[course_tags.columns], train_data[user_interests.columns]], train_data['rating'])
test_loss = model.evaluate([test_data[course_tags.columns], test_data[user_interests.columns]], test_data['rating'])
print('Train loss:', train_loss)
print('Test loss:', test_loss)


In [27]:
f = pd.read_csv('Udemy.csv')

In [28]:
f.shape

(209734, 20)