In [1]:
import csv
from datetime import datetime, timedelta
import os
import pandas as pd
from pprint import pprint
from time import mktime
from typing import Dict, List, Tuple

In [2]:
# declare constants here
def extract_page_view_data(row: List[str]) -> Tuple[str, str, str, str, str]:
    eid = row[0]
    eaction = row[1]
    ecategory = row[2]
    etype = row[4]
    pid = row[5]
    return (eid, eaction, ecategory, etype, pid)

def valid_event_page_view(row: List[str]) -> bool:
    (eid, eaction, ecategory, etype, pid) = extract_page_view_data(row)
    return eid == 'tab-event' and eaction == 'click' and ecategory == 'tab' and etype == 'event' and pid == 'home-customer'

def valid_used_page_view(row: List[str]) -> bool:
    (eid, eaction, ecategory, etype, pid) = extract_page_view_data(row)
    return eid == 'none' and eaction == 'enter' and ecategory == 'pages' and etype == 'used' and pid == 'used'

def valid_new_page_view(row: List[str]) -> bool:
    (eid, eaction, ecategory, etype, pid) = extract_page_view_data(row)
    return eid == 'none' and eaction == 'enter' and ecategory == 'page' and etype == 'new-car' and pid == 'vehicle'

def csv_to_dict(row: List[str]) -> Dict:
    return {
        'element_id': row[0],
        'event_action': row[1],
        'event_category': row[2],
        'event_label': row[3],
        'event_type': row[4],
        'page_id': row[5],
        'timestamp': int(row[6]),
        'user_id': row[7],
        'is_domestic': row[8] == 'True',
    }


In [3]:
if os.path.exists('./event_page_views.csv'):
    os.remove('./event_page_views.csv')
if os.path.exists('./used_page_views.csv'):
    os.remove('./used_page_views.csv')
if os.path.exists('./new_page_views.csv'):
    os.remove('./new_page_views.csv')

event_page_views = open('event_page_views.csv', 'w')
used_page_views = open('used_page_views.csv', 'w')
new_page_views = open('new_page_views.csv', 'w')

writer_event_page_views = csv.writer(event_page_views, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
writer_used_page_views = csv.writer(used_page_views, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
writer_new_page_views = csv.writer(new_page_views, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)

with open('interaction_data_1548735930.csv') as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')
    for row in csv_reader:
        if valid_event_page_view(row):
            writer_event_page_views.writerow(row)
        if valid_used_page_view(row):
            writer_used_page_views.writerow(row)
        if valid_new_page_view(row):
            writer_new_page_views.writerow(row)

event_page_views.close()
used_page_views.close()
new_page_views.close()

In [4]:
now = datetime.now() + timedelta(hours=9, minutes=0)
# now = now.replace(minute=0, second=0)
now_timestamp = mktime(now.timetuple())
timestamps = []
timestamp_map = {}
reference_timestamp = 1522249200 + 32400 # 2019/03/29
while reference_timestamp <= now_timestamp:
    timestamps.append(reference_timestamp)
    reference_timestamp += 86400
# for ts in timestamps:
#     print(datetime.utcfromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S'))


In [5]:
event_views = []
used_views = []
new_views = []

with open('./event_page_views.csv') as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')
    for row in csv_reader:
        event_views.append(csv_to_dict(row))

with open('./used_page_views.csv') as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')
    for row in csv_reader:
        used_views.append(csv_to_dict(row))

with open('./new_page_views.csv') as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')
    for row in csv_reader:
        new_views.append(csv_to_dict(row))

df_event_views = pd.DataFrame(event_views)
df_used_views = pd.DataFrame(used_views)
df_new_views = pd.DataFrame(new_views)

view_counts = []
for ts in timestamps:
    view_counter = {}
    limit = ts + 86400
    view_counter['event'] = len(df_event_views.loc[
        (df_event_views['timestamp'] >= ts) &
        (df_event_views['timestamp'] < (limit))
    ].index)
    view_counter['used'] = len(df_used_views.loc[
        (df_used_views['timestamp'] >= ts) &
        (df_used_views['timestamp'] < (limit))
    ].index)
    view_counter['new'] = len(df_new_views.loc[
        (df_new_views['timestamp'] >= ts) &
        (df_new_views['timestamp'] < (limit))
    ].index)
    view_counts.append(view_counter)

df_view_counts = pd.DataFrame(view_counts)
df_view_counts.to_csv('view_counts.csv', sep=',', encoding='utf-8', index=False, header=False)
