## Aim

I aim to find the bug in 2014-2018 session codes. I found it and have updated the script. 

In [39]:
import pandas as pd
import numpy as np
import time 
import math
import re
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC 
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.ui import Select

In [2]:
driver = webdriver.Firefox()
wait = WebDriverWait(driver, 10)

In [3]:
driver.get('https://convention2.allacademic.com/one/ica/ica16')

In [4]:
def click_browse_by_session_type():
    '''click on "browse by session type"
    '''
    browse_by_session_type = driver.find_elements(
        By.CSS_SELECTOR, "li.ui-li-has-icon.ui-last-child > a"
    )[3]
    browse_by_session_type.click()

def click_paper_session():
    '''click "paper session" button
    '''
    paper_session = driver.find_element(
        By.XPATH, '//li[@class="ui-li-has-count"][3] //a[@class="ui-btn"]'
    )
    paper_session.click()

def get_sessions():
    '''These are session links
    '''
    sessions = driver.find_elements(
        By.CSS_SELECTOR, 'a.ul-li-has-alt-left.ui-btn'
    )
    return sessions

In [5]:
click_browse_by_session_type()
click_paper_session()
sessions = get_sessions()

In [6]:
len(sessions)

343

In [15]:
s_texts = [i.text for i in sessions]
for s in s_texts:
    if 'Sex, Sexting, Sexuality' in s:
        print(s_texts.index(s))

200


In [16]:
s = sessions[200]

In [17]:
s_link = s.get_attribute('href')

In [18]:
# open a new window
driver.execute_script("window.open('');")
# switch to the new window
driver.switch_to.window(driver.window_handles[1])
# open the session
driver.get(s_link)

In [19]:
session_title_e = driver.find_element(
    By.CSS_SELECTOR, 'h3'
)
session_title = session_title_e.text

# sub unit, cosponsor, chair, the presentations
h4s = driver.find_elements(
    By.CSS_SELECTOR, 'h4'
)
h4s_texts = [i.text for i in h4s]
sub_unit_e_idx = h4s_texts.index('Sub Unit')

In [20]:
session_title

'Sex, Sexting, Sexuality'

In [21]:
h4s_texts

['Sub Unit', 'Individual Presentations']

In [22]:
sub_unit_e_idx

0

In [23]:
try:
    sub_unit_e = driver.find_elements(
        By.CSS_SELECTOR, 'ul.ui-listview.ui-listview-inset.ui-corner-all.ui-shadow'
    )[4]
    sub_unit = sub_unit_e.text
except:
    sub_unit = None

In [24]:
sub_unit

'Children Adolescents and the Media'

In [27]:
if 'Chair' not in h4s_texts:
    chair_name = None
    chair_aff = None
else:
    try:
        if 'Cosponsor' in h4s_texts:
            chair_e_idx = 6
        else:
            chair_e_idx = 5
        # chair_e_idx = h4s_texts.index('Chair')
        chair_graybox = driver.find_elements(
            By.CSS_SELECTOR, 'ul.ui-listview.ui-listview-inset.ui-corner-all.ui-shadow'
        )[chair_e_idx]
        chair_es = chair_graybox.find_elements(
            By.CSS_SELECTOR, 'li'
        )
        if chair_es:
            if len(chair_es) == 1:
                chair_info = chair_es[0].text
                chair_name = chair_info.split(', ')[0]
                chair_aff = chair_info.split(', ')[1]
            # this is to solve the issue of when there are multiple chairs. For example,
            # year 2018, session 'Research Escalator - Part 1'
            else:
                chair_name = ''
                chair_aff = ''
                for chair_e in chair_es:
                    chair_info = chair_e.text
                    chair_name_i = chair_info.split(', ')[0]
                    chair_aff_i = chair_info.split(', ')[1]
                    chair_name += chair_name_i
                    chair_aff += chair_aff_i
                    if chair_e != chair_es[-1]:
                        chair_name += '; '
                        chair_aff += '; '
    except:
        chair_name = None
        chair_aff = None

In [29]:
chair_name, chair_aff

(None, None)

In [30]:
len(chair_es) == 1

False

## String

In [40]:
df = pd.read_csv('../data/interim/paper_df_2014_2018.csv')

In [41]:
astr = df.abstract[107]
astr

'A key factor in finding television writing jobs in Hollywood is access to the professional\nnetwork of writers. Phalen, Ksiazek and Garber (2016) conducted a network analysis of\nHollywood writers, and found that those who were most central in terms of degree centrality\nwere men. They also found that the writers who scored highest on betweenness centrality\nwere women. Their analysis, however, was limited to 5 years, and included no data on the\nways the writers’ network might change over time. Additionally, their study combined the\ngenres of comedy and drama, making it impossible to view differences between the two.\nHollywood Voices presents a more nuanced analysis of the television writers’ network over a\nlonger period of time, highlighting differences between men and women, and between writers\nin comedy and drama, on measures of betweenness and closeness centrality.'

'A key factor in finding television writing jobs in Hollywood is access to the professional\nnetwork of writers. Phalen, Ksiazek and Garber (2016) conducted a network analysis of\nHollywood writers, and found that those who were most central in terms of degree centrality\nwere men. They also found that the writers who scored highest on betweenness centrality\nwere women. Their analysis, however, was limited to 5 years, and included no data on the\nways the writers’ network might change over time. Additionally, their study combined the\ngenres of comedy and drama, making it impossible to view differences between the two.\nHollywood Voices presents a more nuanced analysis of the television writers’ network over a\nlonger period of time, highlighting differences between men and women, and between writers\nin comedy and drama, on measures of betweenness and closeness centrality.'

In [42]:
" ".join(astr.splitlines()).strip()

'A key factor in finding television writing jobs in Hollywood is access to the professional network of writers. Phalen, Ksiazek and Garber (2016) conducted a network analysis of Hollywood writers, and found that those who were most central in terms of degree centrality were men. They also found that the writers who scored highest on betweenness centrality were women. Their analysis, however, was limited to 5 years, and included no data on the ways the writers’ network might change over time. Additionally, their study combined the genres of comedy and drama, making it impossible to view differences between the two. Hollywood Voices presents a more nuanced analysis of the television writers’ network over a longer period of time, highlighting differences between men and women, and between writers in comedy and drama, on measures of betweenness and closeness centrality.'