In [None]:
import numpy as np
import pandas as pd

In [None]:
data = pd.read_parquet('/kaggle/input/original-data/original.parquet')

In [None]:
data.head()

**Hover Duration Features Mean (session_id, event)** 

In [None]:
# Hover duration by session id and event
d1 = data[['session_id','level_group','hover_duration','event_name']]

In [None]:
d1 = d1[d1['hover_duration'].notnull()]

In [None]:
hover_mean = d1.groupby(by=['session_id','level_group','event_name'])['hover_duration'].mean()

In [None]:
map = hover_mean.xs('map_hover', level='event_name').reset_index()
object = hover_mean.xs('object_hover', level='event_name').reset_index()

In [None]:
map.rename(columns={'hover_duration': 'map_hover_duration'}, inplace=True)
object.rename(columns={'hover_duration': 'object_hover_duration'}, inplace=True)

In [None]:
d1 = pd.merge(map,object,on=['session_id','level_group'])

In [None]:
d1.head()

**Bingo Features**

In [None]:
d2 = data[['session_id','level_group','text_fqid','elapsed_time']]
d2 = d2[d2['text_fqid'].notnull()]

In [None]:
d2 = d2[d2['text_fqid'].str.endswith('bingo')]

In [None]:
first_bingo = d2.groupby(['session_id','level_group']).first().reset_index()

In [None]:
first_bingo_elapsed_time = first_bingo[['session_id','level_group','elapsed_time']]

In [None]:
first_bingo_elapsed_time.head()

In [None]:
d2['time_diff']=d2['elapsed_time'].diff()

In [None]:
d2 = d2[d2['time_diff'] >= 0]

In [None]:
bingo_mean = d2.groupby(by=['session_id','level_group'])['time_diff'].mean().to_frame()

In [None]:
bingo_mean.rename(columns={'time_diff': 'bingo_time_mean'}, inplace=True)

In [None]:
d2 = pd.merge(bingo_mean,first_bingo_elapsed_time,on=['session_id','level_group'])

In [None]:
d2.rename(columns={'elapsed_time': 'first_bingo_elapsed_time'}, inplace=True)

In [None]:
d2.head()

**Room Change Feature: Ratio of room change per session id**

In [None]:
d3 = data[['session_id','level_group','room_fqid']]

In [None]:
d3['shift'] = d3['room_fqid'].shift()

In [None]:
d3 = d3.groupby(['session_id','level_group']).apply(lambda x: x.iloc[1:]).reset_index(drop=True)

In [None]:
d3 = d3[d3['shift'].notnull()]

In [None]:
d3['room_change']= np.where(d3['room_fqid']==d3['shift'],0,1)

In [None]:
d3_count = d3.groupby(by = ['session_id','level_group'])['room_change'].sum().to_frame()

In [None]:
d3_count.head()

In [None]:
d3_total = d3.groupby(by=['session_id','level_group'])['room_change'].count().to_frame()

In [None]:
d3 = pd.merge(d3_count,d3_total,on=['session_id','level_group'])

In [None]:
d3['room_change_ratio']=d3['room_change_x']/d3['room_change_y']

In [None]:
d3.reset_index(inplace = True)

In [None]:
d3.head()

In [None]:
d3 = d3[['session_id','level_group','room_change_ratio']]

In [None]:
d3.head()

**text_fqid change feature: Ratio of text_fqid change per session id**

In [None]:
d4 = data[['session_id','level_group','text_fqid']]

In [None]:
d4 = d4[d4['text_fqid'].notnull()]

In [None]:
d4['shift'] = d4['text_fqid'].shift()

In [None]:
d4 = d4.groupby(['session_id','level_group']).apply(lambda x: x.iloc[1:]).reset_index(drop=True)

In [None]:
d4 = d4[d4['shift'].notnull()]

In [None]:
d4['text_change']= np.where(d4['text_fqid']==d4['shift'],0,1)

In [None]:
d4_count = d4.groupby(by = ['session_id','level_group'])['text_change'].sum().to_frame()
d4_total = d4.groupby(by= ['session_id','level_group'])['text_change'].count().to_frame()

In [None]:
d4 = pd.merge(d4_count,d4_total,on=['session_id','level_group'])

In [None]:
d4['text_change_ratio']=d4['text_change_x']/d4['text_change_y']

In [None]:
d4=d4['text_change_ratio'].reset_index()

In [None]:
d4.head()

Combine All Features

In [None]:
d2.head()

In [None]:
from functools import reduce
dfs = [d1, d3, d4]
# Merging all DataFrames
features = reduce(lambda left, right: pd.merge(left, right, on=['session_id','level_group'], how='inner'), dfs)

In [None]:
features = pd.merge(features,d2,on=['session_id','level_group'],how='left')

In [None]:
features.head(10)

In [None]:
features.to_csv('features_keyue.csv', index=False)

In [None]:
features.head()