In [1]:
# Install pandas if not already installed
%pip install pandas

import pandas as pd
import numpy as np

# Load preprocessed dataset
df = pd.read_csv("data/eda_summary.csv")
df.head()


Note: you may need to restart the kernel to use updated packages.


Unnamed: 0,user_id,event,content_id,timestamp
0,57,click,content_34,0.100614
1,10,share,content_5,0.518479
2,78,share,content_3,0.340944
3,78,click,content_31,0.185963
4,8,click,content_2,0.913657


In [2]:
# Encode 'event' as numeric
df['event_encoded'] = df['event'].astype('category').cat.codes

# (Optional) Also keep the mapping
event_mapping = dict(enumerate(df['event'].astype('category').cat.categories))
print("Event Mapping:", event_mapping)


Event Mapping: {0: 'click', 1: 'like', 2: 'share', 3: 'watch'}


In [3]:
# Ensure timestamp is in datetime format
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s', errors='coerce')

# Extract hour of day as new feature
df['hour'] = df['timestamp'].dt.hour


In [4]:
# Create simple time-of-day buckets
def time_bucket(hour):
    if 5 <= hour < 12:
        return 'morning'
    elif 12 <= hour < 17:
        return 'afternoon'
    elif 17 <= hour < 21:
        return 'evening'
    else:
        return 'night'

df['time_bucket'] = df['hour'].apply(time_bucket)
df['time_bucket_encoded'] = df['time_bucket'].astype('category').cat.codes


In [5]:
df.drop(columns=['timestamp', 'event', 'time_bucket'], inplace=True)

In [6]:
df.to_csv("data/engineered_events.csv", index=False)
print("✅ Feature engineering complete. File saved to: data/engineered_events.csv")


✅ Feature engineering complete. File saved to: data/engineered_events.csv
