# Capital IQ Webscraping | Key Developments

A demonstration for scraping key developments from the Capital IQ Website.

In [1]:
from selenium.webdriver.common.keys import Keys
from selenium import webdriver
from bs4 import BeautifulSoup
import pandas as pd
import lxml
import getpass

### Website and url parameters

In [2]:
company = 24937 # Apple, Inc.
date_range = 'y1' # other options include: w1, d30, m3, m6, y1, y2, y3, y5, all
url = 'https://www.capitaliq.com/CIQDotNet/KeyDevs/KeyDevelopments.aspx?companyId={}&selDateRangeOption={}'

### Credentials for website authentication

In [3]:
username = input()

 israel.dryer@us.gt.com


In [4]:
password = getpass.getpass()

 ·········


### Create the browser bot

In [5]:
bot = webdriver.Chrome()

### Navigate to the website and login

In [6]:
bot.get(url.format(company, date_range))

In [7]:
bot.find_element_by_id('username').send_keys(username)

In [8]:
pwd = bot.find_element_by_id('password')
pwd.send_keys(password)
pwd.send_keys(Keys.RETURN)

### Extract data from the webpage

Show all records and expand all rows if available

In [9]:
# show all records
view_all = bot.find_element_by_id("Displaysection3_myKeyDevDataGrid_myDataGrid_viewall")
view_all.click()

In [10]:
# expand all rows
exp_rows = bot.find_element_by_id("Displaysection3_myKeyDevDataGrid_myDataGrid_Icon")
exp_rows.click()

In [11]:
soup = BeautifulSoup(bot.page_source, 'lxml')

### Find table details within html and parse

In [12]:
table = soup.find('table',{'class':'cTblListBody'}).tbody.find_all('td')

In [13]:
print(table[15])

<td align="left" style="width:200px;" valign="top">
<span>Fixed Income Offering</span>
<span style="float:right;"><a data-ensho="24937,635793917" enableviewstate="false" id="635793917" name="KeyDev" onclick="KenshoService.openKenshoPopup(event)" style="float:right;cursor: pointer;" value="Fixed Income Offering"><img alt="" src="https://w1.ciqimg.com/CIQDotNet/images/enzo.png?urwvid=805769356" style="display:none;float:right;" title="Kensho Analytics"/></a></span>
</td>


### Extract and strip the text from each of the < td > elements

In [14]:
table_rows = [x.text.strip() for x in table]

In [15]:
for i in range(13):
    print(i, ' ', table_rows[i])

0   
1   Date
2   Type
3   Headline
4   Other Parties
5   
6   Sep-04-2019
7   Fixed Income Offering
8   Apple Inc. has announced a Fixed-Income Offering.
9   -
10   
11   Situation: Apple Inc. has announced a Fixed-Income Offering.

Security Name: Fixed Rate Senior Unsecured Notes due September 2022
Security Type: Corporate Bond/Note (Non Convertible)
Security Features: Callable
Coupon Type: Fixed
12   Situation: Apple Inc. has announced a Fixed-Income Offering.

Security Name: Fixed Rate Senior Unsecured Notes due September 2022
Security Type: Corporate Bond/Note (Non Convertible)
Security Features: Callable
Coupon Type: Fixed


### The records do not begin until index 6, so I can start there

In [16]:
table_rows = [x.text.strip() for x in table][6:]

### The last row contains extra irrelevant data, so I'll pop this from the list

In [17]:
print(table_rows.pop())

Viewing 1-135 of 135 Key Developments [View 1-25  | 26-50  | 51-75  | 76-100  | 101-125  | 126-135] [View All]


### Each record is a chunk of 8 list elements; the last 2 can be ignored

In [18]:
for i in range(8):
    print(i, table_rows[i])

0 Sep-04-2019
1 Fixed Income Offering
2 Apple Inc. has announced a Fixed-Income Offering.
3 -
4 
5 Situation: Apple Inc. has announced a Fixed-Income Offering.

Security Name: Fixed Rate Senior Unsecured Notes due September 2022
Security Type: Corporate Bond/Note (Non Convertible)
Security Features: Callable
Coupon Type: Fixed
6 Situation: Apple Inc. has announced a Fixed-Income Offering.

Security Name: Fixed Rate Senior Unsecured Notes due September 2022
Security Type: Corporate Bond/Note (Non Convertible)
Security Features: Callable
Coupon Type: Fixed
7 


### Append record chunks to a new list

In [19]:
row_count = len(table_rows)

In [20]:
records = []

for i in range(0, row_count, 8):
    if table_rows[i:i+8]:
        records.append(table_rows[i:i+6])
    else:
        continue

In [21]:
for i, row in enumerate(records[0]):
    print(i, row)

0 Sep-04-2019
1 Fixed Income Offering
2 Apple Inc. has announced a Fixed-Income Offering.
3 -
4 
5 Situation: Apple Inc. has announced a Fixed-Income Offering.

Security Name: Fixed Rate Senior Unsecured Notes due September 2022
Security Type: Corporate Bond/Note (Non Convertible)
Security Features: Callable
Coupon Type: Fixed


### Remove situation label

In [22]:
for row in records:
    row[5] = row[5].replace('Situation: ','')

### Remove the empty field between headline and situation

In [23]:
for row in records:
    row.pop(4)

In [24]:
for i, row in enumerate(records[0]):
    print(i, row)

0 Sep-04-2019
1 Fixed Income Offering
2 Apple Inc. has announced a Fixed-Income Offering.
3 -
4 Apple Inc. has announced a Fixed-Income Offering.

Security Name: Fixed Rate Senior Unsecured Notes due September 2022
Security Type: Corporate Bond/Note (Non Convertible)
Security Features: Callable
Coupon Type: Fixed


### Create and preview dataframe

In [25]:
df = pd.DataFrame(records, columns=['Date','EventType','Headline','OtherParties','Situation'])

In [26]:
df.head(10)

Unnamed: 0,Date,EventType,Headline,OtherParties,Situation
0,Sep-04-2019,Fixed Income Offering,Apple Inc. has announced a Fixed-Income Offering.,-,Apple Inc. has announced a Fixed-Income Offeri...
1,Sep-04-2019,Fixed Income Offering,Apple Inc. has announced a Fixed-Income Offering.,-,Apple Inc. has announced a Fixed-Income Offeri...
2,Sep-04-2019,Fixed Income Offering,Apple Inc. has announced a Fixed-Income Offering.,-,Apple Inc. has announced a Fixed-Income Offeri...
3,Sep-04-2019,Fixed Income Offering,Apple Inc. has announced a Fixed-Income Offering.,-,Apple Inc. has announced a Fixed-Income Offeri...
4,Sep-04-2019,Fixed Income Offering,Apple Inc. has announced a Fixed-Income Offering.,-,Apple Inc. has announced a Fixed-Income Offeri...
5,Aug-21-2019,Product-Related Announcement,Apple to Launch Streaming Service in November ...,-,Apple introduced its TV+ service with exclusiv...
6,Aug-09-2019,Ex-Div Date (Regular),"Apple Inc., $ 0.77, Cash Dividend, Aug-09-2019",-,"Apple Inc., $ 0.77, Cash Dividend, Aug-09-2019"
7,Jul-31-2019,Buyback Tranche Update,Tranche Update on Apple Inc. (NasdaqGS:AAPL)'s...,-,"From March 31, 2019 to June 29, 2019, the comp..."
8,Jul-30-2019,Earnings Call,"Apple Inc., Q3 2019 Earnings Call, Jul 30, 2019",-,"Apple Inc., Q3 2019 Earnings Call, Jul 30, 2019"
9,Jul-30-2019,Dividend Affirmation,"Apple Declares Cash Dividend, Payable on Augus...",-,Apple's board of directors has declared a cash...
