In [179]:
import pandas as pd

## create a dataframe:

In [180]:
data = pd.read_csv(
    "../data/feed-views.log",
    names=['datetime', 'user'],
    sep='\t'
)
data['datetime'] = pd.to_datetime(data['datetime'], infer_datetime_format=True).astype("datetime64[ns]")
data['year'] = data['datetime'].dt.year
data['month'] = data['datetime'].dt.month
data['day'] = data['datetime'].dt.day
data['hour'] = data['datetime'].dt.hour
data['minute'] = data['datetime'].dt.minute
data['second'] = data['datetime'].dt.second

## create the new column daytime

In [181]:
evaluation_bins = [0, 3, 6, 10, 16, 19, 23]
group_names = ['night', 'early morning', 'morning', 'afternoon', 'early evening', 'evening']
data['daytime'] = pd.cut(data['hour'], bins = evaluation_bins, labels = group_names, include_lowest = True, ordered=False)
data.set_index('user')

Unnamed: 0_level_0,datetime,year,month,day,hour,minute,second,daytime
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
artem,2020-04-17 12:01:08.463179,2020,4,17,12,1,8,afternoon
artem,2020-04-17 12:01:23.743946,2020,4,17,12,1,23,afternoon
artem,2020-04-17 12:27:30.646665,2020,4,17,12,27,30,afternoon
artem,2020-04-17 12:35:44.884757,2020,4,17,12,35,44,afternoon
artem,2020-04-17 12:35:52.735016,2020,4,17,12,35,52,afternoon
...,...,...,...,...,...,...,...,...
valentina,2020-05-21 18:45:20.441142,2020,5,21,18,45,20,early evening
maxim,2020-05-21 23:03:06.457819,2020,5,21,23,3,6,evening
pavel,2020-05-21 23:23:49.995349,2020,5,21,23,23,49,evening
artem,2020-05-21 23:49:22.386789,2020,5,21,23,49,22,evening


##  number of elements

In [182]:
number_of_elements = data.count()
value_counts = data.value_counts('daytime')
value_counts

daytime
evening          509
afternoon        252
early evening    145
night            129
morning           36
early morning      5
dtype: int64

## sort values

In [183]:
data.sort_values(['hour', 'minute', 'second'])

Unnamed: 0,datetime,user,year,month,day,hour,minute,second,daytime
944,2020-05-15 00:00:13.222265,valentina,2020,5,15,0,0,13,night
945,2020-05-15 00:01:05.153738,valentina,2020,5,15,0,1,5,night
563,2020-05-12 00:01:27.764025,pavel,2020,5,12,0,1,27,night
564,2020-05-12 00:01:38.444917,pavel,2020,5,12,0,1,38,night
565,2020-05-12 00:01:55.395042,pavel,2020,5,12,0,1,55,night
...,...,...,...,...,...,...,...,...,...
1074,2020-05-21 23:49:22.386789,artem,2020,5,21,23,49,22,evening
246,2020-05-09 23:53:55.599821,anatoliy,2020,5,9,23,53,55,evening
247,2020-05-09 23:54:54.260791,pavel,2020,5,9,23,54,54,evening
942,2020-05-14 23:58:56.754866,valentina,2020,5,14,23,58,56,evening


## min and max

In [184]:
data.groupby('daytime').max('hour')

Unnamed: 0_level_0,year,month,day,hour,minute,second
daytime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
night,2020,5,24,3,57,59
early morning,2020,5,10,4,31,53
morning,2020,5,30,10,58,59
afternoon,2020,5,30,16,59,59
early evening,2020,5,30,19,59,59
evening,2020,5,30,23,59,59


In [185]:
data.groupby('daytime').min('hour')



Unnamed: 0_level_0,year,month,day,hour,minute,second
daytime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
night,2020,4,6,0,0,0
early morning,2020,5,10,4,8,13
morning,2020,4,2,8,2,1
afternoon,2020,4,1,11,0,0
early evening,2020,4,1,17,2,0
evening,2020,4,1,20,0,0


In [186]:
for index, row in data.groupby('daytime').max('hour').iterrows():
    person = data.loc[data['hour'] == row['hour']].head(1)
    print(index, row['hour'], person['user'].item())

night 3 konstantin
early morning 4 konstantin
morning 10 konstantin
afternoon 16 artem
early evening 19 artem
evening 23 konstantin


In [187]:
for index, row in data.groupby('daytime').min('hour').iterrows():
    person = data.loc[data['hour'] == row['hour']].head(1)
    print(index, row['hour'], person['user'].item())

night 0 artem
early morning 4 konstantin
morning 8 alexander
afternoon 11 artem
early evening 17 artem
evening 20 artem


In [188]:
data['hour'].mode()
data['daytime'].mode()

0    evening
Name: daytime, dtype: category
Categories (6, object): ['night', 'early morning', 'morning', 'afternoon', 'early evening', 'evening']

## 3 earliest hours