# 1. Libraries, Configuration, and Importing Queries

## 1.1 Libraries

In [2]:
# selenium specific imports
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException

# other imports
import configparser
import time
import pandas as pd
import numpy as np
from datetime import datetime

## 1.2 Configuration

In [3]:
# configuration parser initialization
config = configparser.ConfigParser()
config.read('../config.ini')
delay = 10 # waits for 10 seconds for the correct element to appeara

## 1.3 Load csv of Brand Names Search Queries

-  Brand queries in conjuction with slight modifications were systematically created by Catherine C. Pollack at Dartmouth College. 

In [4]:
query_df = pd.read_csv("../data/queries/remaining_queries_from_fbw.csv")

In [5]:
query_df.describe()

Unnamed: 0,search_query
count,23
unique,23
top,Bud Light LIme
freq,1


# 2. Custom Functions 

## 2.1 Profile Search (Advanced)

- None of the search terms are case sensitive


In [6]:
def advanced_profile_search(scraped_data_df, query, num_months):    
    
    try:
        driver.get("https://app.streamhatchet.com/search/profilesearch")
        time.sleep(5)
        
        # click on advanced search and monthly data
        advanced_search_element = WebDriverWait(driver, delay).until(EC.element_to_be_clickable((By.XPATH,"/html[1]/body[1]/div[1]/div[2]/div[1]/main[1]/div[2]/div[2]/div[1]/div[1]/div[1]/div[1]/div[2]/form[1]/div[4]/div[1]/a[2]")))
        advanced_search_element.click()
        
        month_element = WebDriverWait(driver, delay).until(EC.element_to_be_clickable((By.XPATH,"/html[1]/body[1]/div[1]/div[2]/div[1]/main[1]/div[2]/div[2]/div[1]/div[1]/div[1]/div[1]/div[2]/form[1]/div[1]/div[1]/div[1]/div[1]/a[2]")))
        month_element.click()

        # Ensures the data is from the most recent available
        driver.find_element_by_xpath("//button[@class='ui icon button time_monthly']//i[@class='right arrow icon']").click()

        # Enters the query into the must_have element
        must_have_element = driver.find_element_by_id("chatKeywordsANDCurrent")
        must_have_element.send_keys(query)
        driver.find_element_by_xpath("//div[@id='chatkeywordANDAdd']//i[@class='plus icon']").click()

        # Month by month iteration for obtaining metrics
        for month_ind in range(0, num_months):

            search_element = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//*[@id="discoveryForm"]/div[5]/div/div/div/div/button/i')))
            search_element.click()

            body_element = driver.find_element_by_xpath("//html//body")
            WebDriverWait(driver, 60).until(lambda d: 'removeScroll' not in body_element.get_attribute('class'))

            month = driver.find_element_by_id("monthly_label").text
            unique_channels = driver.find_element_by_id("totalChannelsFound").get_attribute("title")
            new_views = driver.find_element_by_id("topStatsNewViews").get_attribute("title")
            hours_watched = driver.find_element_by_id("topStatsTimeWatched").get_attribute("title")

            last_row_ind = len(scraped_data_df)  

            # Add data to the bottom row of the dataframe
            row_dict = {
                'query': query,
                'month': month,
                'unique_channels': unique_channels,
                'new_views': new_views,
                'hours_watched': hours_watched
            }

            scraped_data_df = scraped_data_df.append(row_dict, ignore_index = True)

            # If the data is the same month-to-month (Indicative of reaching date range limitation), then exit the loop
            if sum(scraped_data_df.duplicated()) != 0:
                scraped_data_df = scraped_data_df[:-1]
                break
            try:
                left_arrow_element = WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.XPATH, "//button[@class='ui icon button time_monthly left']//i[@class='left arrow icon']")))
                left_arrow_element.click()
            except:
                month = driver.find_element_by_id("monthly_label").text
                print("Left arrow was not clickable for " + query + " " + month)


    except:
        month = driver.find_element_by_id("monthly_label").text
        print("Had a timeout exception for " + query + " " + month)
            
    return(scraped_data_df)

# 3. Login 

In [7]:
driver = webdriver.Chrome()
driver.get("https://app.streamhatchet.com/")
driver.find_element_by_id("cookiesAccepted").click()

username = driver.find_element_by_name("loginEmail")
username.clear()
username.send_keys(config['login_credentials']['email'])

password = driver.find_element_by_name("loginPassword")
password.clear()
password.send_keys(config['login_credentials']['password'])

driver.find_element_by_xpath("//button[contains(text(),'Login')]").click()
time.sleep(3) # sleep for 3 seconds to let the page load

# 4. Profile Search Using Final Basic Words(fbw) 

In [17]:
remaining_df = pd.DataFrame(columns=['query', 'month','unique_channels',
                                        'new_views', 'hours_watched'])

for index, row in query_df.iterrows():    
    query = row['search_query']
    remaining_df = advanced_profile_search(remaining_df, query, 24)
    # Comment out this line for full data

In [18]:
remaining_df

Unnamed: 0,query,month,unique_channels,new_views,hours_watched
0,Kashi,May 2019,41,60622,30299
1,Kashi,April 2019,43,63061,33578
2,Kashi,March 2019,35,54944,32225
3,Kashi,February 2019,29,19926,16096
4,Kashi,January 2019,39,15470,10807
5,Kashi,December 2018,41,19764,10843
6,Kashi,November 2018,44,492597,122217
7,Kashi,October 2018,27,70403,27982
8,Kashi,September 2018,32,42967,20856
9,Kashi,August 2018,34,67011,26530
