In [1]:
import pandas as pd

## create a dataframe views with two columns: datetime and user by reading feedviews and convert the datetime to the datetime64[ns] Dtype

In [3]:
df = pd.read_csv('../data/feed-views.log', sep='\t', header=None, names = ['datetime', 'user'], engine='python')
df

Unnamed: 0,datetime,user
0,2020-04-17 12:01:08.463179,artem
1,2020-04-17 12:01:23.743946,artem
2,2020-04-17 12:27:30.646665,artem
3,2020-04-17 12:35:44.884757,artem
4,2020-04-17 12:35:52.735016,artem
...,...,...
1071,2020-05-21 18:45:20.441142,valentina
1072,2020-05-21 23:03:06.457819,maxim
1073,2020-05-21 23:23:49.995349,pavel
1074,2020-05-21 23:49:22.386789,artem


## convert the datetime to the datetime64[ns] Dtype

In [8]:
df['datetime'] = pd.to_datetime(df['datetime'])

## extract the year, month, day, hour, minute, and second from the values of that column to the new columns

In [9]:
df['year'] = df['datetime'].dt.year
df['month'] = df['datetime'].dt.month
df['hour'] = df['datetime'].dt.hour
df['minute'] = df['datetime'].dt.minute
df['second'] = df['datetime'].dt.second

Unnamed: 0,datetime,user,year,month,hour,minute,second
0,2020-04-17 12:01:08.463179,artem,2020,4,12,1,8
1,2020-04-17 12:01:23.743946,artem,2020,4,12,1,23
2,2020-04-17 12:27:30.646665,artem,2020,4,12,27,30
3,2020-04-17 12:35:44.884757,artem,2020,4,12,35,44
4,2020-04-17 12:35:52.735016,artem,2020,4,12,35,52
...,...,...,...,...,...,...,...
1071,2020-05-21 18:45:20.441142,valentina,2020,5,18,45,20
1072,2020-05-21 23:03:06.457819,maxim,2020,5,23,3,6
1073,2020-05-21 23:23:49.995349,pavel,2020,5,23,23,49
1074,2020-05-21 23:49:22.386789,artem,2020,5,23,49,22


## create the new column daytime

In [43]:
daytimes = ['night', 'early morning', 'morning', 'afternoon', 'early evening', 'evening']
df['daytime'] = pd.cut(df['hour'], bins = [0, 3, 6, 10, 16, 19, 23], labels = daytimes, include_lowest=True)
df.set_index('user', inplace=True)

## calculate the number of elements in your dataframe

In [44]:
df.count()

datetime    1076
year        1076
month       1076
hour        1076
minute      1076
second      1076
daytime     1076
dtype: int64

## calculate the number of elements in each time of day category using the method value_counts()

In [45]:
df.value_counts()

datetime                    year  month  hour  minute  second  daytime      
2020-04-17 12:01:08.463179  2020  4      12    1       8       afternoon        1
2020-05-13 20:00:59.235948  2020  5      20    0       59      evening          1
2020-05-13 17:14:04.566458  2020  5      17    14      4       early evening    1
2020-05-13 17:14:34.067153  2020  5      17    14      34      early evening    1
2020-05-13 17:14:49.837479  2020  5      17    14      49      early evening    1
                                                                               ..
2020-05-11 11:20:32.750952  2020  5      11    20      32      afternoon        1
2020-05-11 11:22:21.614275  2020  5      11    22      21      afternoon        1
2020-05-11 15:39:38.220003  2020  5      15    39      38      afternoon        1
2020-05-11 15:39:57.277993  2020  5      15    39      57      afternoon        1
2020-05-22 10:36:14.662600  2020  5      10    36      14      morning          1
Length: 1076, dtype: 

## sort values in your dataframe by hour, minute and second ascendingly (simultaneously and not one by one)


In [46]:
df.sort_values(['hour', 'minute', 'second'], inplace=True)

## calculate the minimum, maximum for the hours and the mode for the daytime categories

In [47]:
time_max = df['hour'][df.daytime == 'night'].max()
time_min = df['hour'][df.daytime == 'morning'].min()

## calculate the maximum of hour for the rows where the daytime is night

In [48]:
df[df['daytime'] == 'night'].hour.max()

3

## calculate the minimum of hour for the rows where the daytime is morning

In [49]:
df[df['daytime'] == 'morning'].hour.min()

8

## In addition to this, find out who visited the page at those hours (make one example from that)

In [50]:
df.loc[df.daytime == 'night'].hour.idxmax()

'konstantin'

In [51]:
df.loc[df.daytime == 'morning'].hour.idxmin()

'alexander'

## calculate the mode for the hour and daytime

In [52]:
df.hour.mode()

0    22
dtype: int64

In [53]:
df.daytime.mode()

0    evening
Name: daytime, dtype: category
Categories (6, object): ['night' < 'early morning' < 'morning' < 'afternoon' < 'early evening' < 'evening']

## show the 3 earliest hours in the morning and the corresponding usernames and the 3 latest hours and the usernames using nsmallest() and nlargest()

In [54]:
df[df.daytime == 'morning'].nsmallest(3, 'hour')['hour']

user
alexander    8
alexander    8
alexander    9
Name: hour, dtype: int64

In [55]:
df[df.daytime == 'morning'].nlargest(3, 'hour')['hour']

user
konstantin    10
maxim         10
maxim         10
Name: hour, dtype: int64

## use the method describe() to get the basic statistics for the columns. To find out what the most popular interval for visiting the page is, calculate the interquartile range

In [38]:
inter = df.describe()['hour']['75%'] - df.describe()['hour']['25%']
inter

9.0