# General popularity

### Libraries

In [1]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import os

### Load datasets

In [2]:
# Load csv
df = pd.read_csv("proceded.csv") 

#### Convert string comma-separated to lists

In [None]:
# Split each comma-separated column into lists
df['history'] = df['history'].str.split(',')
df['timestampHistory'] = df['timestampHistory'].str.split(',')
df['numberOfClicksHistory'] = df['numberOfClicksHistory'].str.split(',')
df['timeOnPageHistory'] = df['timeOnPageHistory'].str.split(',')
df['scrollPercentageHistory'] = df['scrollPercentageHistory'].str.split(',')
df['pageVisitsCountHistory'] = df['pageVisitsCountHistory'].str.split(',')
df['timestampHistory_new'] = df['timestampHistory_new'].str.split(',')
df.head()


#### Explode the dataset using list collumns

In [None]:
# Explode all columns, so that each history gets its own row with the corresponding metrics
df_exploded = df.explode(['history','timestampHistory','numberOfClicksHistory','timeOnPageHistory','scrollPercentageHistory','pageVisitsCountHistory','timestampHistory_new'])
df_exploded.head()

##### Convert the relevant columns to numeric values (in case they're still strings)

In [None]:

df_exploded['numberOfClicksHistory'] = pd.to_numeric(df_exploded['numberOfClicksHistory'], errors='coerce')
df_exploded['timeOnPageHistory'] = pd.to_numeric(df_exploded['timeOnPageHistory'], errors='coerce')
df_exploded['scrollPercentageHistory'] = pd.to_numeric(df_exploded['scrollPercentageHistory'], errors='coerce')
df_exploded['pageVisitsCountHistory'] = pd.to_numeric(df_exploded['pageVisitsCountHistory'], errors='coerce')

df_exploded.head()


#### Group dataset by "history" and aggregate metrics for each history

In [None]:
grouped = df_exploded.groupby('history').agg({
    'numberOfClicksHistory': 'sum',    # Total number of clicks
    'timeOnPageHistory': 'mean',       # Average time on page
    'scrollPercentageHistory': 'mean', # Average scroll percentage
    'pageVisitsCountHistory': 'sum'    # Total page visits
}).reset_index()

grouped

In [8]:
grouped.to_parquet("general_recommendation.parquet")

##### Calculate popularity

Popularity = (alpha * (num_clicks)) + (beta * (time_on_page) + (delta * (page_visits)) + (gamma * (scroll_percent))) 

In [None]:


# Define the weights for the popularity score
alpha = 0.1  # Weight for clicks
beta = 0.3   # Weight for time on page
gamma = 0.4  # Weight for scroll percentage
delta = 0.2  # Weight for page visits

# Calculate the popularity score
grouped['popularity_score'] = (
    alpha * grouped['numberOfClicksHistory'] +
    beta * grouped['timeOnPageHistory'] +
    gamma * grouped['scrollPercentageHistory'] +
    delta * grouped['pageVisitsCountHistory']
)

# Sort histories by popularity score in descending order
grouped_sorted = grouped.sort_values(by='popularity_score', ascending=False)

# Recommend top N histories
top_n = 10  # Number of recommendations
recommendations = grouped_sorted.head(top_n)


grouped_sorted.head()
