# Written by Jason Kim and Catherine C. Pollack

# 1. Libraries, Configuration, and Importing Queries

## 1.1 Libraries

In [1]:
import configparser
import time
import pandas as pd
import os

# selenium specific imports
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By

## 1.2 Configuration

In [2]:
# configuration parser initialization
config = configparser.ConfigParser()
config.read('../config.ini')
delay = 10 # waits for 10 seconds for the correct element to appeara

stream_data = pd.DataFrame(columns=['Title', 'Streamer', 'Date', 'Hours Watched', 'Word'])

## 1.3 Load csv of Brand Names Search Queries

-  Brand queries in conjuction with slight modifications were systematically created by Catherine C. Pollack at Dartmouth College. 

In [3]:
query_df = pd.read_csv("../data/queries/Final_Words_List.csv")

In [4]:
query_df.describe()

Unnamed: 0,search_query
count,670
unique,670
top,NoDoz
freq,1


# 2. Custom Functions 

## 2.1 Login

In [5]:
def login_streamhatchet():
    driver.get("https://app.streamhatchet.com/")
    driver.find_element_by_id("cookiesAccepted").click()

    username = driver.find_element_by_name("loginEmail")
    username.clear()
    username.send_keys(config['login_credentials']['email'])

    password = driver.find_element_by_name("loginPassword")
    password.clear()
    password.send_keys(config['login_credentials']['password'])

    driver.find_element_by_xpath("//button[contains(text(),'Login')]").click()
    time.sleep(3) # sleep for 3 seconds to let the page load

## 2.1 Stream Title Search

In [6]:
def stream_title_search(query, incomplete_queries_list, df):
    driver.get("https://app.streamhatchet.com/search/toolstatus")
    time.sleep(1)
    
    # Enters query into 'Stream title query'
    stream_title_query_input = WebDriverWait(driver, delay).until(EC.element_to_be_clickable((By.XPATH,"//input[@id='status-query']")))
    stream_title_query_input.send_keys(query)

    # Makes twitch the only platform to search
    platform_input = WebDriverWait(driver, delay).until(EC.element_to_be_clickable((By.XPATH,"//input[@class='search']")))
    platform_input.click()
    platform_input.send_keys(Keys.BACKSPACE)
    platform_input.send_keys(Keys.BACKSPACE)
    platform_input.send_keys(Keys.BACKSPACE)

    # Click to Expand Date Options
    driver.find_element_by_xpath("//div[@id='NewRangePicker']").click()
    
    # change the hours and minutes to 0:00 for date from and to 
    driver.find_element_by_xpath("//div[@class='calendar left']//select[@class='hourselect']//option[1]").click()
    driver.find_element_by_xpath("//div[@class='calendar left']//option[contains(text(),'00')]").click()
    driver.find_element_by_xpath("//div[@class='calendar right']//select[@class='hourselect']//option[1]").click()
    driver.find_element_by_xpath("//div[@class='calendar right']//option[contains(text(),'00')]").click()
    
    driver.find_element_by_xpath("//li[@class='active']").click()

        # Keep clicking on left_arrow
    while driver.find_element_by_xpath("//i[@id='icon-down-New']").is_displayed() == True:
        try:
            driver.find_element_by_xpath("//i[@class='fa fa-chevron-left glyphicon glyphicon-chevron-left']").click()
        except:
            break
                
     # Click on the very first day of data available
    day_one_element = driver.find_element_by_xpath("//div[@class='calendar left']//tr[1]//td[2]")
    try:
        day_one_element.click()
    except WebDriverException:
        print("First Day element is not clickable")
    
    # Click on the right arrow to go all the way to the end
    while driver.find_element_by_xpath("//i[@id='icon-down-New']").is_displayed() == True:
        try:
            driver.find_element_by_xpath("//i[@class='fa fa-chevron-right glyphicon glyphicon-chevron-right']").click()
        except:
           break
    
    #Code was run on July 22nd, so that's why the last day is 22nd
    driver.find_element_by_xpath("//div[@class='calendar right']//td[contains(text(), '22')]").click()

    # Runs the search
    driver.find_element_by_xpath("//button[@class='applyBtn btn btn-sm btn-success ui google plus button']").click()
    run_button = WebDriverWait(driver, delay).until(EC.element_to_be_clickable((By.XPATH,"//button[@class='medium ui google plus submit button']")))
    run_button.click()
    
    # Scrape the Number of Titles
    num_titles = WebDriverWait(driver, delay).until(EC.visibility_of_element_located((By.XPATH,"//p[@id='messages-count']")))
    num_titles = num_titles.text
    
    #Next few lines of code pull the data table on a webpage that dynamically scrolls -- thanks, StackOverflow!
    last_height = driver.execute_script("return document.body.scrollHeight")
    
    while True:
        #Scroll down to bottom
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        
        #Wait to load page
        time.sleep(1)
        
        #Calculate new scroll height and compare with last scroll height
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height
    
    # Extracts Table as exports it as large text file
    df = driver.find_element_by_xpath("//table/tbody").text.split("\n")

        
    return(num_titles, df)

  
        

# 3. Run Stream Titles Search

In [7]:
df = []
incomplete_queries_list = []
driver = webdriver.Chrome()

login_streamhatchet()

for index,row in query_df.iterrows(): #For each word
    print("Starting " + row['search_query']) #Print out the word in Python so I know it's still running
    num_titles, table_id = stream_title_search(row['search_query'], incomplete_queries_list, df) #Runs script
    for i in range(len(table_id))[0::6]: #Loops over six rows of information 
        if len(table_id) == 1: #If no results
            stream_data = stream_data.append({'Word': row['search_query'],
                                              'Title': "None", #Adds unique users, streamers, and total views to data set
                                              'Streamer': "None", 
                                              'Date': "None",
                                              'Hours Watched': 0}, ignore_index = True)
        else:
            stream_data = stream_data.append({'Word': row['search_query'], #adds word, stream title, streamer, date, and hours watched
                                          'Title': table_id[i], 
                                          'Streamer': table_id[i+1], 
                                          'Date': table_id[i+2],
                                          'Hours Watched': table_id[i+3]}, ignore_index = True)


ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/Users/jasonkim/.local/share/virtualenvs/ds_streamhatchet-8y_cdCbo/lib/python3.7/site-packages/urllib3/connectionpool.py", line 379, in _make_request
    httplib_response = conn.getresponse(buffering=True)
TypeError: getresponse() got an unexpected keyword argument 'buffering'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/jasonkim/.local/share/virtualenvs/ds_streamhatchet-8y_cdCbo/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3325, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-7-16bd369bf78c>", line 5, in <module>
    login_streamhatchet()
  File "<ipython-input-5-e6f93ce00d8c>", line 2, in login_streamhatchet
    driver.get("https://app.streamhatchet.com/")
  File "/Users/jasonkim/.local/share/virtualenvs/ds_streamhatchet-8y_cdCbo/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py", line 33

TypeError: can only concatenate str (not "list") to str

In [None]:
stream_data