In [1]:
import os
from pathlib import Path
import numpy as np
import pandas as pd

# import locale
# locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
# or
# locale.setlocale(locale.LC_NUMERIC, '')
# df['Count'].apply(lambda x: locale.atoi(x))
# df['Count'] = df['Count'].replace({",":""}, regex=True).map(pd.eval).astype(int)

In [2]:
def read_data(api_name):
    columns = ['IP', 'Time', 'Count', 'DateTime']

    folder = Path(f'./ds/{api_name}')
    assert os.path.isdir(folder), f"folder {folder} doesn't exist"
    files = [f for f in folder.glob('**/*.csv')]
    assert len(files) > 0, f"folder {folder} is empty"

    # full_df = pd.DataFrame(columns=columns,dtype={columns[0]: str, columns[1]: datetime, columns[2]: int32})
    full_df = pd.DataFrame(columns=columns)
    for f in files:
        dt = f.name.split('.')[0]
        df = pd.read_csv(f.as_posix(), skiprows = 1, header=None, names=columns, thousands=',')

        df['DateTime']= pd.to_datetime(f'{dt} '  + df['Time'])

        full_df = pd.concat([full_df, df], ignore_index=True)

    # convert data type
    full_df.Count = full_df.Count.astype(np.int32)

    return full_df

def get_max_count(df):
    max_idx = df['Count'].argmax()
    return df.iloc[max_idx]


In [3]:
df = read_data('get-site-profile')
print(get_max_count(df))

IP                  1.25.121.35
Time                      23:39
Count                        55
DateTime    2022-10-29 23:39:00
Name: 41808, dtype: object


In [4]:
df.dtypes

IP                  object
Time                object
Count                int32
DateTime    datetime64[ns]
dtype: object

In [5]:
df.describe()

Unnamed: 0,Count
count,60064.0
mean,1.207196
std,2.658512
min,0.0
25%,0.0
50%,0.0
75%,1.0
max,55.0


In [6]:
df[~(df.Count == 0)].describe()

Unnamed: 0,Count
count,24854.0
mean,2.917398
std,3.477233
min,1.0
25%,1.0
50%,2.0
75%,3.0
max,55.0


In [7]:
df = read_data('get-favourite-events-count')
print(get_max_count(df))
df[~(df.Count == 0)].describe()

IP                14.254.196.53
Time                      20:37
Count                      9470
DateTime    2022-10-23 20:37:00
Name: 293, dtype: object


Unnamed: 0,Count
count,1063.0
mean,144.491063
std,507.323903
min,1.0
25%,1.0
50%,2.0
75%,10.0
max,9470.0


In [8]:
df = read_data('get-all-live-events')
print(get_max_count(df))
df[~(df.Count == 0)].describe()

IP                 123.60.64.93
Time                      22:51
Count                        67
DateTime    2022-10-23 22:51:00
Name: 2571, dtype: object


Unnamed: 0,Count
count,7894.0
mean,8.577654
std,9.628826
min,1.0
25%,2.0
50%,3.0
75%,16.0
max,67.0


In [9]:
df = read_data('get-all')
print(get_max_count(df))
df[~(df.Count == 0)].describe()

IP               171.250.166.95
Time                      23:56
Count                      7558
DateTime    2022-10-23 23:56:00
Name: 3171, dtype: object


Unnamed: 0,Count
count,11305.0
mean,178.252808
std,229.035957
min,1.0
25%,89.0
50%,125.0
75%,235.0
max,7558.0
