In [None]:
import pandas as pd

views = pd.read_csv("../data/feed-views.log", sep="\t", names=["datetime", "user"])
views["datetime"] = pd.to_datetime(views["datetime"])
## Extracting date and time components
views["year"] = views["datetime"].dt.year
views["month"] = views["datetime"].dt.month
views["day"] = views["datetime"].dt.day
views["hour"] = views["datetime"].dt.hour
views["minute"] = views["datetime"].dt.minute
views["second"] = views["datetime"].dt.second
## Creating a daytime column
bins = [0, 4, 7, 11, 17, 20, 24]
labels = ["night", "early morning", "morning", "afternoon", "early evening", "evening"]

views["daytime"] = pd.cut(
    views["hour"],
    bins=bins,
    labels=labels,
    right=False,
    include_lowest=True
)
## Index
views.set_index("user", inplace=True)
## Number of elements
views.count()
## Quantity by category daytime
views["daytime"].value_counts()
## Sort by time
views_sorted = views.sort_values(by=["hour", "minute", "second"])
## min/max and time mode
views["hour"].min(), views["hour"].max()
views["datetime"].mode()
## Maximum hour for night, minimum for morning and who was it
night_max = views[views["daytime"] == "night"]["hour"].max()
morning_min = views[views["daytime"] == "morning"]["hour"].min()

visitor_night = views[views["hour"] == night_max].head(1)
visitor_morning = views[views["hour"] == morning_min].head(1)
## Hour and daytime modes
views["hour"].mode()
views["daytime"].mode()
## 3 earliest visits in the morning and 3 latest hours
morning_users = views[views["daytime"] == "morning"]
morning_users.nsmallest(3, "hour")
views.nlargest(3, "hour")
## Statistics
views[["hour", "minute", "second"]].describe()
## Calculating IQR for an hour
desc = views["hour"].describe()
iqr = desc["75%"] - desc["25%"]
iqr
