In [None]:
import logging
from datetime import datetime

current_file_name = "2_UXtweak_Mouse_Data_Downloading"

dt_string = datetime.now().strftime("%Y%m%d_%H%M%S")
log_file = f"logs/{current_file_name}/{dt_string}.log"
logging.basicConfig(level=logging.INFO, filename=log_file,filemode="w", format="%(asctime)s %(levelname)s %(message)s")

# https://blog.sentry.io/logging-in-python-a-developers-guide/

In [None]:
import urllib.request, json 
import pandas as pd
import os

In [None]:
from helpers.constants import *
from helpers.utils import *

In [None]:
pd.set_option("display.max_columns", 500)

In [None]:
with open("tokens/Smrecek_ReplayToken.txt", "r") as file:
    token = file.read().rstrip()

logging.info("Token loaded")

In [None]:
path_to_sessions = "data\\0_Raw_Data\\uxtweak_sessions.csv"
sessions = pd.read_csv(path_to_sessions, delimiter=";")

logging.info("Sessions loaded")
logging.info(f"Sessions shape: {sessions.shape}")

In [None]:
sessions.head()

In [None]:
sessions = sessions[sessions["Useable"] == True]
sessions = sessions[sessions["Baked Downloaded"] == False]
sessions = sessions[sessions["Raw Downloaded"] == False]
sessions = sessions[["Variant", "Respondent", "Session"]]

logging.info("Sessions filtered")
logging.info(f"Sessions shape: {sessions.shape}")

In [None]:
sessions.head()

In [None]:
sessions_fg = sessions[sessions["Variant"] == "FG"][["Respondent", "Session"]].values.tolist()
sessions_fg[:5]

In [None]:
sessions_h = sessions[sessions["Variant"] == "H"][["Respondent", "Session"]].values.tolist()
sessions_h[:5]

In [None]:
@timer
def baked_data_downloader(sessions, token, path):
  for count, session in enumerate(sessions):
    print("Task {} of {}".format(count + 1, len(sessions)))
    logging.info(f"Task {count + 1} of {len(sessions)}")

    os.mkdir(f"{path}\\respondent_{session[0]}")
    logging.info(f"Directory created: {path}\\respondent_{session[0]}")

    url_address = "https://replay.uxtweak.com/api/v1/data/stream/" + session[1] + "?token=" + token
    logging.info(f"Downloading from url: {url_address}")

    with urllib.request.urlopen(url_address) as url:
      data = json.loads(url.read().decode())

      path_to_baked = f"{path}\\respondent_{session[0]}\\respondent_{session[0]}_baked_{session[1]}.json"
      with open(path_to_baked, "w") as outfile:
        json.dump(data, outfile)
        
      print("   Path to file: " + path_to_baked)
      logging.info(f"Path to file: {path_to_baked}")

In [None]:
@timer
def raw_data_downloader(sessions, token, path):
  for count, session in enumerate(sessions):
      print("Task {} of {}".format(count + 1, len(sessions)))
      logging.info(f"Task {count + 1} of {len(sessions)}")

      path_to_baked = f"{path}\\respondent_{session[0]}\\respondent_{session[0]}_baked_{session[1]}.json"
      logging.info(f"Session: {session}, Path to baked: {path_to_baked}")

      with open(path_to_baked) as loadfile:
          baked_data = json.load(loadfile)
          
          print("  Number of pageviews: ", len(baked_data['pageviews']))
          logging.info(f"Number of pageviews: {len(baked_data['pageviews'])}")
          
          for pageview_count, pageview in enumerate(baked_data['pageviews']):
              logging.info(f"Task {count + 1} of {len(sessions)}, Pageview {pageview_count + 1} of {len(baked_data['pageviews'])}")

              projectId = pageview['projectId']
              sessionId = pageview['sessionId']
              pageviewId = pageview['id']

              url_address = "https://replay.uxtweak.com/api/v1/data/events/" + projectId + "/" + sessionId + "/" + pageviewId + "?token=" + token
              logging.info(f"Downloading from url: {url_address}")

              with urllib.request.urlopen(url_address) as url:
                raw_data = json.loads(url.read().decode())

                path_to_raw = f"{path}\\respondent_{session[0]}\\respondent_{session[0]}_raw_pageview_{pageview_count + 1}_{pageviewId}.json"

                with open(path_to_raw, 'w') as outfile:
                  json.dump(raw_data, outfile)
                print("      Path to file: " + path_to_raw)
                logging.info(f"Path to file: {path_to_raw}")

In [None]:
folder_path_fg = f"data\\2_UXtweak_Mouse_Data_Downloading\\FG"
folder_path_h = f"data\\2_UXtweak_Mouse_Data_Downloading\\H"

if not os.path.exists(folder_path_fg): 
    os.mkdir(folder_path_fg)
if not os.path.exists(folder_path_h): 
    os.mkdir(folder_path_h)

logging.info(f"Directories created: {folder_path_fg}, {folder_path_h}")

In [None]:
baked_data_downloader(sessions_fg, token, folder_path_fg)
logging.warning("Baked data downloader for FG sessions finished")

In [None]:
baked_data_downloader(sessions_h, token, folder_path_h)
logging.warning("Baked data downloader for H sessions finished")

In [None]:
raw_data_downloader(sessions_fg, token, folder_path_fg)
logging.warning("Raw data downloader for FG sessions finished")

In [None]:
raw_data_downloader(sessions_h, token, folder_path_h)
logging.warning("Raw data downloader for H sessions finished")