Skip to content
This repository has been archived by the owner on Jan 11, 2024. It is now read-only.

Commit

Permalink
Merge pull request #12 from fullstorydev/patrick/refactor_preproc_events
Browse files Browse the repository at this point in the history
slimming some statements down in preproc_events
  • Loading branch information
robertclewley committed Dec 5, 2019
2 parents 19cef91 + b55df3f commit 3b65849
Showing 1 changed file with 5 additions and 8 deletions.
13 changes: 5 additions & 8 deletions pathutils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,13 +103,13 @@ def preproc_events(events_df: pd.DataFrame) -> pd.DataFrame:

# Time
events_df["EventStart"] = pd.to_datetime(events_df["EventStart"])
# events_df.sort_values("EventStart", inplace=True)

# create a parent/child relationship: distinct_session_id (parent) --> index (child)
events_df.set_index(
pd.MultiIndex.from_arrays(
(pd.Index(events_df["distinct_session_id"]), events_df.index),
[ pd.Index(events_df["distinct_session_id"]), events_df.index ],
names=("sid", "i"),
),
),
inplace=True,
)

Expand All @@ -118,11 +118,8 @@ def preproc_events(events_df: pd.DataFrame) -> pd.DataFrame:
# it's of length `len(events_df)` not `len(unique_session_ids)`

# sort event times per session
events_df = (
events_df.reset_index()
.sort_values(["sid", "EventStart"], ascending=[1, 1])
.set_index(["sid", "i"])
)
events_df.sort_values(["sid", "EventStart"], ascending=[1, 1], inplace=True)

# create a proper incrementing integer index for each session, move unique
# `i to a column
events_df["idx"] = events_df.groupby("sid").cumcount()
Expand Down

0 comments on commit 3b65849

Please sign in to comment.