### Rating Products
 - Average
 - Time-Based Weighted Average
 - User-Based Weighted Average
 - Weighted Rating


In [None]:
# Uygulama: Kullanıcı ve Zaman Ağırlıklı Kurs Puanı Hesaplama


In [1]:
import pandas as pd
import math
import scipy.stats as st
from sklearn.preprocessing import MinMaxScaler

In [2]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', 500)
pd.set_option('display.expand_frame_repr', False)
pd.set_option('display.float_format', lambda x: '%.5f' % x)

In [5]:
df = pd.read_csv("../datasets/course_reviews.csv")

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4323 entries, 0 to 4322
Data columns (total 6 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Rating              4323 non-null   float64
 1   Timestamp           4323 non-null   object 
 2   Enrolled            4323 non-null   object 
 3   Progress            4323 non-null   float64
 4   Questions Asked     4323 non-null   float64
 5   Questions Answered  4323 non-null   float64
dtypes: float64(4), object(2)
memory usage: 202.8+ KB


In [7]:
df.head()

Unnamed: 0,Rating,Timestamp,Enrolled,Progress,Questions Asked,Questions Answered
0,5.0,2021-02-05 07:45:55,2021-01-25 15:12:08,5.0,0.0,0.0
1,5.0,2021-02-04 21:05:32,2021-02-04 20:43:40,1.0,0.0,0.0
2,4.5,2021-02-04 20:34:03,2019-07-04 23:23:27,1.0,0.0,0.0
3,5.0,2021-02-04 16:56:28,2021-02-04 14:41:29,10.0,0.0,0.0
4,4.0,2021-02-04 15:00:24,2020-10-13 03:10:07,10.0,0.0,0.0


In [8]:
df.shape

(4323, 6)

In [None]:
# rating dagılımı

In [10]:
df["Rating"].value_counts() #Soruların dağılımı

Rating
5.00000    3267
4.50000     475
4.00000     383
3.50000      96
3.00000      62
1.00000      15
2.00000      12
2.50000      11
1.50000       2
Name: count, dtype: int64

In [11]:
df["Questions Asked"].value_counts()

Questions Asked
0.00000     3867
1.00000      276
2.00000       80
3.00000       43
4.00000       15
5.00000       13
6.00000        9
8.00000        5
9.00000        3
14.00000       2
11.00000       2
7.00000        2
10.00000       2
15.00000       2
22.00000       1
12.00000       1
Name: count, dtype: int64

In [12]:
df.groupby("Questions Asked").agg({"Questions Asked": "count",
                                   "Rating": "mean"})

Unnamed: 0_level_0,Questions Asked,Rating
Questions Asked,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,3867,4.76519
1.0,276,4.74094
2.0,80,4.80625
3.0,43,4.74419
4.0,15,4.83333
5.0,13,4.65385
6.0,9,5.0
7.0,2,4.75
8.0,5,4.9
9.0,3,5.0


In [13]:
# Ortalama Puan
df["Rating"].mean()

4.764284061993986

In [None]:
# Puan Zamanlarına Göre Ağırlıklı Ortalama

In [14]:
df["Timestamp"] = pd.to_datetime(df["Timestamp"])

In [15]:
current_date = pd.to_datetime('2021-02-10 0:0:0')

In [22]:
df["days"] = (current_date - df["Timestamp"]).dt.days
df["days"] 

0         4
1         5
2         5
3         5
4         5
5         5
6         5
7         5
8         5
9         6
10        6
11        6
12        6
13        6
14        6
15        6
16        6
17        6
18        6
19        6
20        7
21        7
22        7
23        7
24        7
25        7
26        7
27        7
28        7
29        7
30        7
31        8
32        8
33        8
34        8
35        8
36        8
37        8
38        9
39        9
40        9
41        9
42        9
43        9
44        9
45        9
46        9
47        9
48        9
49        9
50        9
51       10
52       10
53       10
54       10
55       10
56       10
57       10
58       10
59       10
60       10
61       10
62       10
63       11
64       11
65       11
66       11
67       11
68       11
69       11
70       11
71       11
72       11
73       11
74       11
75       12
76       12
77       12
78       12
79       13
80       13
81       13
82       13
83  

In [17]:
df.loc[df["days"] <= 30, "Rating"].mean()

4.775773195876289

In [18]:
df.loc[(df["days"] > 30) & (df["days"] <= 90), "Rating"].mean()

4.763833992094861

In [19]:
df.loc[(df["days"] > 90) & (df["days"] <= 180), "Rating"].mean()

4.752503576537912

In [20]:
df.loc[(df["days"] > 180), "Rating"].mean()

4.76641586867305

In [21]:
df.loc[df["days"] <= 30, "Rating"].mean() * 28/100 + \
    df.loc[(df["days"] > 30) & (df["days"] <= 90), "Rating"].mean() * 26/100 + \
    df.loc[(df["days"] > 90) & (df["days"] <= 180), "Rating"].mean() * 24/100 + \
    df.loc[(df["days"] > 180), "Rating"].mean() * 22/100

4.765025682267194

In [31]:
def time_based_weighted_average(dataframe, w1=28, w2=26, w3=24, w4=22):
    return dataframe.loc[df["days"] <= 30, "Rating"].mean() * w1 / 100 + \
           dataframe.loc[(dataframe["days"] > 30) & (dataframe["days"] <= 90), "Rating"].mean() * w2 / 100 + \
           dataframe.loc[(dataframe["days"] > 90) & (dataframe["days"] <= 180), "Rating"].mean() * w3 / 100 + \
           dataframe.loc[(dataframe["days"] > 180), "Rating"].mean() * w4 / 100

In [32]:
time_based_weighted_average(df)

4.765025682267194

In [33]:
time_based_weighted_average(df, 30, 26, 22, 22)

4.765491074653962

In [34]:
def user_based_weighted_average(dataframe, w1=22, w2=24, w3=26, w4=28):
    return dataframe.loc[dataframe["Progress"] <= 10, "Rating"].mean() * w1 / 100 + \
           dataframe.loc[(dataframe["Progress"] > 10) & (dataframe["Progress"] <= 45), "Rating"].mean() * w2 / 100 + \
           dataframe.loc[(dataframe["Progress"] > 45) & (dataframe["Progress"] <= 75), "Rating"].mean() * w3 / 100 + \
           dataframe.loc[(dataframe["Progress"] > 75), "Rating"].mean() * w4 / 100

In [35]:
user_based_weighted_average(df, 20, 24, 26, 30)

4.803286469062915

In [None]:
# Weighted Rating

In [36]:
def course_weighted_rating(dataframe, time_w=50, user_w=50):
    return time_based_weighted_average(dataframe) * time_w/100 + user_based_weighted_average(dataframe)*user_w/100

In [37]:
course_weighted_rating(df, time_w=40, user_w=60)

4.786164895710403