In [24]:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import numpy as np
import time
import re

In [25]:
class User:
    def __init__(self, tags) -> None:
        self.tags = tags

In [27]:
class TikTokScraper:
    def __init__(self) -> None:
        self.driver = None

    def create_and_run_scraper(self, options):
        self.driver = webdriver.Chrome(options=options)
        self.driver.get("https://www.tiktok.com/foryou")
        self.click_first_video()
    
    def quit_driver(self):
        self.driver.quit()

    def click_first_video(self):
        WebDriverWait(driver=self.driver, timeout=60).until(
            EC.element_to_be_clickable(
                (
                    By.XPATH,
                    "//*[@id=\"loginContainer\"]/div/div/div/div[2]"
                )
            )
        )
        # hide login (I don't know the consequences of this)
        self.driver.execute_script("document.evaluate('/html/body/div[5]', document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.style.display='none';")
        WebDriverWait(driver=self.driver, timeout=60).until(
            EC.element_to_be_clickable(
                (
                    By.XPATH,
                    '//*[@id="app"]/div[2]/div[2]/div[1]/div[1]/div/div[2]/div[1]',
                )
            )
        ).click()
        WebDriverWait(self.driver, 60).until(
            EC.element_to_be_clickable(
                (
                    By.XPATH,
                    "//*[@id=\"tiktok-verify-ele\"]/div/div[4]/div/a[1]"
                )
            )
        )
        # hide captcha (I don't know the consequences of this)
        self.driver.execute_script("document.getElementById('tiktok-verify-ele').style.display='none';")
        seconds_to_watch = self.watch_n_seconds()
        time.sleep(seconds_to_watch)
        self.next_video()
        seconds_to_watch = self.watch_n_seconds()
        time.sleep(seconds_to_watch)
    
    # Clicks the next button to go forward a video
    def next_video(self):
        WebDriverWait(self.driver, timeout=60).until(
            EC.element_to_be_clickable(
                (
                    By.XPATH,
                    "//*[@id=\"app\"]/div[2]/div[4]/div/div[1]/button[3]"
                )
            )
        ).click()

    # Clicks the back button to go back a video
    def previous_video(self):
        WebDriverWait(driver=driver, timeout=60).until(
            EC.element_to_be_clickable(
                (
                    By.XPATH,
                    '//*[@id="app"]/div[2]/div[3]/div[1]/button[2]',
                )
            )
        ).click()

    def get_video_length(self):
        video_length = str(
            WebDriverWait(self.driver, timeout=60)
            .until(
                EC.presence_of_element_located(
                    (
                        By.XPATH,
                        '//*[@id="app"]/div[2]/div[4]/div/div[1]/div[3]/div[2]/div[2]',
                    )
                )
            )
            .get_attribute("innerHTML")
        )
        video_length = video_length[video_length.index("/") + 1 : len(video_length)]
        if video_length[0:1] == "0":
            video_length = video_length[1 : len(video_length)]
        video_minutes = video_length[0 : video_length.index(":")]
        video_seconds = video_length[video_length.index(":") + 1 : len(video_length)]
        video_length_in_seconds = int(video_minutes) * 60 + int(video_seconds)
        return video_length_in_seconds

    # Most liked videos are watched between 75% and 100% of the way through
    def watch_n_seconds(self):
        video_length = self.get_video_length()
        print(video_length)
        return np.random.default_rng().integers(low=np.floor((0.75*video_length)), high=np.ceil(video_length), size=1)[0]

    def watch_n_videos(self, n, user, tags):
        for i in range(n):
            watch_or_skip = watch_or_skip(self.driver, user, tags)
            if watch_or_skip:
                pass
            time.sleep(5)
            self.next_video()
    
    # Gets and returns tags associated with the current video
    def get_current_video_tags(self):
        # wait until comment section is loaded
        WebDriverWait(self.driver, timeout=60).until(
            EC.presence_of_element_located(
                (By.XPATH, '//*[@id="app"]/div[2]/div[3]/div[2]/div[2]/div[1]')
            )
        )

        comment_parent_element = self.driver.find_element(
            By.XPATH, '//*[@id="app"]/div[2]/div[3]/div[2]/div[2]/div[1]'
        )
        comment_parent_children = comment_parent_element.find_elements(By.TAG_NAME, "a")
        comment_categories = [
            child.get_attribute("href") for child in comment_parent_children
        ]
        tags = []
        for category in comment_categories:
            if "tag" in category:
                tag = category.split("/")[-1]
                tags.append(tag)

        return tags

In [28]:
options = webdriver.ChromeOptions()
options.add_argument("--disable-notifications")
options.add_argument("--disable-infobars")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
# options.add_argument("--headless=new")

In [7]:
user = User(tags=["cats", "dogs", "animals"])

In [29]:
scraper = TikTokScraper()
scraper.create_and_run_scraper(options=options)

In [30]:
scraper.quit_driver()