In [2]:
import pandas as pd

views = pd.read_csv('../data/feed-views.log', 
                 sep = '\t',
                 names = ['datetime', 'user'],
                 parse_dates = ['datetime'],
                 engine = 'python')

views["year"] = views["datetime"].dt.year
views["month"] = views["datetime"].dt.month
views["day"] = views["datetime"].dt.day
views["hour"] = views["datetime"].dt.hour
views["minute"] = views["datetime"].dt.minute
views["second"] = views["datetime"].dt.second

bins = [0, 3.59, 6.59, 10.59, 16.59, 19.59, 23.59]
labels = ['night', 'early morning', 'morning', 'afternoon', 'early evening', 'evening']
views["daytime"] = pd.cut(views["hour"], bins, labels = labels)

views.set_index("user", inplace = True)

total_elements = views.count()

daytime_counts = views["daytime"].value_counts()

views_sorted = views.sort_values(by=["hour", "minute", "second"])

min_hour = views["hour"].min()
max_hour = views["hour"].max()

mode_daytime = views["daytime"].mode()[0]

max_hour_night = views.loc[views["daytime"] == "night", "hour"].max()
user_max_night = views.loc[views["hour"] == max_hour_night].index[0]

min_hour_morning = views.loc[views["daytime"] == "morning", "hour"].min()
user_min_morning = views.loc[views["hour"] == min_hour_morning].index[0]

mode_hour = views["hour"].mode()[0]

early_morning_users = views.nsmallest(3, "hour")[["hour"]]
late_night_users = views.nlargest(3, "hour")[["hour"]]

describe_stats = views[["hour", "minute", "second"]].describe()

q1 = describe_stats.loc["25%", "hour"]
q3 = describe_stats.loc["75%", "hour"]
iqr = q3 - q1

print("Total elements in DataFrame:\n", total_elements)
print("\nOccurrences in each daytime category:\n", daytime_counts)
print("\nMin hour:", min_hour)
print("Max hour:", max_hour)
print("Mode of daytime:", mode_daytime)
print(f"Max hour at night: {max_hour_night}, user: {user_max_night}")
print(f"Min hour in morning: {min_hour_morning}, user: {user_min_morning}")
print("Mode of hour:", mode_hour)
print("\nEarliest 3 users:\n", early_morning_users)
print("\nLatest 3 users:\n", late_night_users)
print("\nStatistics:\n", describe_stats)
print("\nInterquartile range (IQR) for hour:", iqr)

Total elements in DataFrame:
 datetime    1076
year        1076
month       1076
day         1076
hour        1076
minute      1076
second      1076
daytime     1011
dtype: int64

Occurrences in each daytime category:
 daytime
evening          509
afternoon        252
early evening    145
night             64
morning           36
early morning      5
Name: count, dtype: int64

Min hour: 0
Max hour: 23
Mode of daytime: evening
Max hour at night: 3, user: konstantin
Min hour in morning: 8, user: alexander
Mode of hour: 22

Earliest 3 users:
             hour
user            
artem          0
konstantin     0
konstantin     0

Latest 3 users:
             hour
user            
konstantin    23
artem         23
artem         23

Statistics:
               hour       minute       second
count  1076.000000  1076.000000  1076.000000
mean     16.249071    29.629182    29.500929
std       6.955490    17.689388    17.405506
min       0.000000     0.000000     0.000000
25%      13.000000    14.00