## Create a dataframe views with two columns: datetime and user by reading feed-views.log

In [1]:
import pandas as pd
data = pd.read_csv('../data/feed-views.log', sep='\t', parse_dates=[0], header=None, names=['datetime', 'user'])
dataframe = pd.DataFrame(data)

date_time_cols = list(zip(*[(d.year, d.month, d.day, d.hour, d.minute, d.second) for d in dataframe['datetime']]))

cols = ('year', 'month', 'day', 'hour', 'minute', 'second')
for i in range(6):
    dataframe[cols[i]] = date_time_cols[i]

dataframe

Unnamed: 0,datetime,user,year,month,day,hour,minute,second
0,2020-04-17 12:01:08.463179,artem,2020,4,17,12,1,8
1,2020-04-17 12:01:23.743946,artem,2020,4,17,12,1,23
2,2020-04-17 12:27:30.646665,artem,2020,4,17,12,27,30
3,2020-04-17 12:35:44.884757,artem,2020,4,17,12,35,44
4,2020-04-17 12:35:52.735016,artem,2020,4,17,12,35,52
...,...,...,...,...,...,...,...,...
1071,2020-05-21 18:45:20.441142,valentina,2020,5,21,18,45,20
1072,2020-05-21 23:03:06.457819,maxim,2020,5,21,23,3,6
1073,2020-05-21 23:23:49.995349,pavel,2020,5,21,23,23,49
1074,2020-05-21 23:49:22.386789,artem,2020,5,21,23,49,22


## Create the new column daytime

you need to assign the particular daytime value if an hour is in the particular
interval, for example, afternoon if the hour is larger than 11 and less or equal
to 17

◦ 0.00 – 03.59 night, 04.00 – 06.59 early morning, 07.00 – 10.59 morning, 11.00
– 16.59 afternoon, 17.00 – 19.59 early evening, 20.00 – 23.59 evening

◦ use method cut to solve this subtask

◦ assign the column user as the index

In [2]:
dataframe['daytime'] = pd.cut(dataframe['hour'], [0, 4, 7, 11, 17, 20, 24], right=False, labels=[
    'night',
    'early morning',
    'morning',
    'afternoon',
    'early evening',
    'evening'
])
dataframe.set_index('user', inplace=True)
dataframe

Unnamed: 0_level_0,datetime,year,month,day,hour,minute,second,daytime
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
artem,2020-04-17 12:01:08.463179,2020,4,17,12,1,8,afternoon
artem,2020-04-17 12:01:23.743946,2020,4,17,12,1,23,afternoon
artem,2020-04-17 12:27:30.646665,2020,4,17,12,27,30,afternoon
artem,2020-04-17 12:35:44.884757,2020,4,17,12,35,44,afternoon
artem,2020-04-17 12:35:52.735016,2020,4,17,12,35,52,afternoon
...,...,...,...,...,...,...,...,...
valentina,2020-05-21 18:45:20.441142,2020,5,21,18,45,20,early evening
maxim,2020-05-21 23:03:06.457819,2020,5,21,23,3,6,evening
pavel,2020-05-21 23:23:49.995349,2020,5,21,23,23,49,evening
artem,2020-05-21 23:49:22.386789,2020,5,21,23,49,22,evening


## Calculate the number of elements in your dataframe

use the method count()

In [3]:
dataframe.count()[0]

1076

calculate the number of elements in each daytime category using the method
value_counts()

In [4]:
dataframe.value_counts('daytime')

daytime
evening          509
afternoon        252
early evening    145
night            129
morning           36
early morning      5
dtype: int64

## Sort values in your dataframe by hour, minute and second ascendingly (simultaneously and not one by one)

In [5]:
dataframe.sort_values(['hour', 'minute', 'second'], inplace=True)
dataframe

Unnamed: 0_level_0,datetime,year,month,day,hour,minute,second,daytime
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
valentina,2020-05-15 00:00:13.222265,2020,5,15,0,0,13,night
valentina,2020-05-15 00:01:05.153738,2020,5,15,0,1,5,night
pavel,2020-05-12 00:01:27.764025,2020,5,12,0,1,27,night
pavel,2020-05-12 00:01:38.444917,2020,5,12,0,1,38,night
pavel,2020-05-12 00:01:55.395042,2020,5,12,0,1,55,night
...,...,...,...,...,...,...,...,...
artem,2020-05-21 23:49:22.386789,2020,5,21,23,49,22,evening
anatoliy,2020-05-09 23:53:55.599821,2020,5,9,23,53,55,evening
pavel,2020-05-09 23:54:54.260791,2020,5,9,23,54,54,evening
valentina,2020-05-14 23:58:56.754866,2020,5,14,23,58,56,evening


## Calculate the minimum, maximum for the hours and the mode for the daytime categories

### calculate the maximum of hour for the rows where the daytime is night

In [6]:
max_night_hour = dataframe[dataframe['daytime'] == 'night']['hour'].max()
max_night_hour

3

### calculate the minimum of hour for the rows where the daytime is morning

In [7]:
min_morning_hour = dataframe[dataframe['daytime'] == 'morning']['hour'].min()
min_morning_hour

8

### additionally to this, find out who visited the the page at those hours (one example out of it)

In [8]:
dataframe[dataframe['hour'] == min_morning_hour].iloc[0]

datetime    2020-05-15 08:16:03.918402
year                              2020
month                                5
day                                 15
hour                                 8
minute                              16
second                               3
daytime                        morning
Name: alexander, dtype: object

In [9]:
dataframe[dataframe['hour'] == max_night_hour].iloc[0]

datetime    2020-04-19 03:23:35.471598
year                              2020
month                                4
day                                 19
hour                                 3
minute                              23
second                              35
daytime                          night
Name: konstantin, dtype: object

### calculate the mode for the hour and daytime

In [10]:
mode = dataframe.mode().iloc[0]
f'{mode["hour"]} {mode["daytime"]}'

'22.0 evening'

## show the 3 smallest hours in the morning and the corresponding usernames and the 3 largest hours and the usernames using nsmallest() and nlargest()

In [11]:
morning_ds = dataframe[dataframe['daytime'] == 'morning']
pd.concat([morning_ds['hour'].nsmallest(3), dataframe['hour'].nlargest(3)])

user
alexander     8
alexander     8
alexander     9
ekaterina    23
ekaterina    23
ekaterina    23
Name: hour, dtype: int64

## use the method describe() to get the basic statistics for the columns

### to find out what the most popular interval of visiting the page is, calculate the interquartile range for the hour by extracting values from the result of the describe() method and store it in the variable iqr

In [12]:
iqr = dataframe.describe()['hour']['50%']
iqr

19.0