In [8]:
import pandas as pd

### Create a dataframe views with two columns: datetime and user by reading feed-views.log

In [9]:
views = pd.read_csv(
    '../../datasets/feed-views.log',
    sep='\t',
    names=['datetime','users'],
)
print(views.dtypes)

datetime    object
users       object
dtype: object


### Convert the datetime to the datetime64[ns] Dtype

In [10]:
views['datetime'] = pd.to_datetime(views['datetime'])
print(views.dtypes)

datetime    datetime64[ns]
users               object
dtype: object


### Extract the year, month, day, hour, minute, and second from the values of that column to the new columns

In [11]:
views['year'] = views['datetime'].dt.year
views['month'] = views['datetime'].dt.month
views['day'] = views['datetime'].dt.day
views['hour'] = views['datetime'].dt.hour
views['minute'] = views['datetime'].dt.minute
views['second'] = views['datetime'].dt.second
print(views)

                       datetime      users  year  month  day  hour  minute  \
0    2020-04-17 12:01:08.463179      artem  2020      4   17    12       1   
1    2020-04-17 12:01:23.743946      artem  2020      4   17    12       1   
2    2020-04-17 12:27:30.646665      artem  2020      4   17    12      27   
3    2020-04-17 12:35:44.884757      artem  2020      4   17    12      35   
4    2020-04-17 12:35:52.735016      artem  2020      4   17    12      35   
...                         ...        ...   ...    ...  ...   ...     ...   
1071 2020-05-21 18:45:20.441142  valentina  2020      5   21    18      45   
1072 2020-05-21 23:03:06.457819      maxim  2020      5   21    23       3   
1073 2020-05-21 23:23:49.995349      pavel  2020      5   21    23      23   
1074 2020-05-21 23:49:22.386789      artem  2020      5   21    23      49   
1075 2020-05-22 10:36:14.662600      artem  2020      5   22    10      36   

      second  
0          8  
1         23  
2         30  
3  

### Create the new column daytime ("names like night, morning, etc.")

In [12]:
bins = [-1, 4, 7, 11, 17, 20, 24]  
labels = ['night', 'early morning', 'morning', 'afternoon', 'early evening', 'evening']
views['daytime'] = pd.cut(views.hour,
                      bins=bins,
                      labels=labels,
                    )
try:
    views = views.set_index('users')
except KeyError:
    print('"users" уже установлен как индекс')
  
print(views)

                            datetime  year  month  day  hour  minute  second  \
users                                                                          
artem     2020-04-17 12:01:08.463179  2020      4   17    12       1       8   
artem     2020-04-17 12:01:23.743946  2020      4   17    12       1      23   
artem     2020-04-17 12:27:30.646665  2020      4   17    12      27      30   
artem     2020-04-17 12:35:44.884757  2020      4   17    12      35      44   
artem     2020-04-17 12:35:52.735016  2020      4   17    12      35      52   
...                              ...   ...    ...  ...   ...     ...     ...   
valentina 2020-05-21 18:45:20.441142  2020      5   21    18      45      20   
maxim     2020-05-21 23:03:06.457819  2020      5   21    23       3       6   
pavel     2020-05-21 23:23:49.995349  2020      5   21    23      23      49   
artem     2020-05-21 23:49:22.386789  2020      5   21    23      49      22   
artem     2020-05-22 10:36:14.662600  20

### Calculate the number of elements in your dataframe

In [13]:
count = views.count()
print(count)
print(views['daytime'].value_counts())

datetime    1076
year        1076
month       1076
day         1076
hour        1076
minute      1076
second      1076
daytime     1076
dtype: int64
daytime
evening          445
afternoon        279
early evening    144
night            134
morning           74
early morning      0
Name: count, dtype: int64


### Sort values in your dataframe by hour, minute, and second in ascending order (simultaneously and not one by one)

In [14]:
sorted_views = views.sort_values(by=['hour', 'minute', 'second'])
print(views)

                            datetime  year  month  day  hour  minute  second  \
users                                                                          
artem     2020-04-17 12:01:08.463179  2020      4   17    12       1       8   
artem     2020-04-17 12:01:23.743946  2020      4   17    12       1      23   
artem     2020-04-17 12:27:30.646665  2020      4   17    12      27      30   
artem     2020-04-17 12:35:44.884757  2020      4   17    12      35      44   
artem     2020-04-17 12:35:52.735016  2020      4   17    12      35      52   
...                              ...   ...    ...  ...   ...     ...     ...   
valentina 2020-05-21 18:45:20.441142  2020      5   21    18      45      20   
maxim     2020-05-21 23:03:06.457819  2020      5   21    23       3       6   
pavel     2020-05-21 23:23:49.995349  2020      5   21    23      23      49   
artem     2020-05-21 23:49:22.386789  2020      5   21    23      49      22   
artem     2020-05-22 10:36:14.662600  20

### Calculate the minimum and maximum for the hours and the mode for the daytime categories

In [15]:
max_night_hour = views[views['daytime'] == 'night']['hour'].max()
min_morning_hour = views[views['daytime'] == 'morning']['hour'].min()
hour_mode = views['hour'].mode()[0]
day_mode = views['daytime'].mode()[0]

# Пример пользователя для каждого случая
example_night_user = views[views['hour'] == max_night_hour].iloc[0]
example_morning_user = views[views['hour'] == min_morning_hour].iloc[0]
print(max_night_hour, min_morning_hour, hour_mode, day_mode, example_night_user, example_morning_user)

4 8 22 evening datetime    2020-05-10 04:08:53.496691
year                              2020
month                                5
day                                 10
hour                                 4
minute                               8
second                              53
daytime                          night
Name: konstantin, dtype: object datetime    2020-05-15 08:16:03.918402
year                              2020
month                                5
day                                 15
hour                                 8
minute                              16
second                               3
daytime                        morning
Name: alexander, dtype: object


### Show the 3 earliest hours in the morning and the corresponding usernames and the 3 latest hours and the usernames

In [16]:
morning_views = views[views['daytime'] == 'morning']['hour']
earliest = morning_views.nsmallest(3)
latest = morning_views.nlargest(3)
print(earliest, latest)

users
alexander    8
alexander    8
artem        9
Name: hour, dtype: int32 users
artem    11
artem    11
artem    11
Name: hour, dtype: int32


### Use the method describe() to get the basic statistics for the columns

In [17]:
hour_stats = views['hour'].describe()
iqr = hour_stats['75%'] - hour_stats['25%']  # IQR = Q3 - Q1
print(iqr)
popular_start = hour_stats['25%']
popular_end = hour_stats['75%']
print(f"Most popular visiting hours: {popular_start:.0f}:00 - {popular_end:.0f}:00")

9.0
Most popular visiting hours: 13:00 - 22:00


In [18]:
views.loc[views.daytime == 'night'].hour.idxmax()

'konstantin'