In [264]:
### CBC SCRAPING CODE
### Authors: JONATHAN CHAN and PANDRAMISHI NAGA SIRISHA

###MOST RECENT UPDATE:  
##2020 MAY 15, 11:52AM
#wrote separate functions for each JSON element to be collected
#extract_json_items() will run for all articles, and will return null if not in proper format

#TO DO:
#write code for collecting JSON items from all articles returned in CBC API (code cell 4)
#write code to iterate through multiple pages of API call 
#done
#write code to store JSON in a JSON or text document
#Final run: collect 50-100 articles for each of 6 indicators

In [1]:
import urllib.request
import json 
from bs4 import BeautifulSoup
#from datetime import date
import requests
import json
from urllib.request import Request, urlopen
from urllib.error import URLError, HTTPError
from datetime import datetime, timedelta
import datetime
import pytz
import dateutil.parser


In [2]:
#https://www.cbc.ca/search_api/v1/search?q=mortgage%20rate&sortOrder=relevance&page=100&fields=feed
def get_initial_url(search_term):
    """returns the URL of the first page API call given a search string"""
    
    words = search_term.split()
    url_prefix = "https://www.cbc.ca/search_api/v1/search?"
    query = "q=" + "%20".join(words)
    url_suffix = "&sortOrder=relevance&page=1&fields=feed"
    first_url = url_prefix + query + url_suffix
    print("FIRST URL API CALL: ", first_url)
    return first_url
    
get_initial_url("interest rate index")

FIRST URL API CALL:  https://www.cbc.ca/search_api/v1/search?q=interest%20rate%20index&sortOrder=relevance&page=1&fields=feed


'https://www.cbc.ca/search_api/v1/search?q=interest%20rate%20index&sortOrder=relevance&page=1&fields=feed'

In [3]:
# yourdate = dateutil.parser.parse('2020-05-13T14:48:05.000Z')
# print(yourdate)
# yourdate > past

In [4]:
def scrape_urls(url):
    """This function takes in the first query url and scrapes all other articles from past 1 year and returns 
    the urls of such articles"""
    count = 0
    url_list = []
    current_date = datetime.datetime.now(datetime.timezone.utc)
    last_year_date = current_date - timedelta(days=365)
    main_url = url
    r = requests.get(url)
    info = r.json()
    last_retrieved_items_count= len(info)
    
    for i in info:
        
        if dateutil.parser.parse(i['publishtime']) > last_year_date:
            url_list.append(i['url'])
            count += 1
        
    page_number = 2
    
    while last_retrieved_items_count > 0 :
#     while count < 25 :
        split_url = main_url.split('page')
        new_url = split_url[0] + "page=" + str(page_number) + "&fields=feed" 
        r = requests.get(new_url)
        info = r.json()
        last_retrieved_items_count= len(info)
        
        for i in info:
            
            if dateutil.parser.parse(i['publishtime']) > last_year_date:
                url_list.append(i['url'])
                count += 1
                
        page_number += 1   
        print(page_number)
    return url_list

# first_url = get_initial_url("supernova")
# all_urls = scrape_urls(first_url)

In [5]:
def get_author(soup):
    """returns the author of a BeautifulSoup article if it exists, None if cannot be found
    
    Assume author info is contained within span tag (class: authorText)
    """
    author_span = soup.find("span", {"class": "authorText"})
    
    if author_span:
        return author_span.text
    else:
        #print("No author found in article!")
        return None

In [6]:
def get_title(soup):
    """returns the title of a BeautifulSoup article if it exists, None if cannot be found
    
    Assume title info is contained within h1 tag (class: detailHeadline)
    """
    title_tag = soup.find("h1", {"class": "detailHeadline"})
    
    if title_tag:
        title_text = title_tag.text
        return title_text
    else:
        #print("no title found in article!")
        return None

In [7]:
def get_desc(soup):
    """returns the description of a BeautifulSoup article if it exists, None if not
    
    Assume description is contained within h2 tag (class: deck)
    """
    desc_tag = soup.find("h2", {"class": "deck"})
    
    if desc_tag:
        desc_text = desc_tag.text
        return desc_text
    else:
        #print("No description found in article!")
        return None
    

In [8]:
def get_url_to_image(soup):
    """returns the url to the header image of a CBC article (BeautifulSoup) if it exists, None if not
    
    Assume image url is contained within src attribute of img tag 
    """
    main_image_tag = soup.find("figure", {"class": "imageMedia leadmedia-story full"})
    
    if main_image_tag:
        main_image_url = main_image_tag.find("img").attrs["src"]
        return main_image_url
    else:
        #print("No main header image found in article!")
        return None
        

In [9]:
def get_publish_time(soup):
    """returns a tuple of publish time string and datetime string if found in article, None if not
    
    Assume time is contained within time tag (class: timestamp)
    """
    time_tag = soup.find("time", {"class": "timeStamp"})
    if time_tag:
        datetime_str = time_tag.attrs["datetime"]
        
        #NOTE: if we want to return a datetime object, error when writing to JSON
        #datetime_obj = parser.isoparse(datetime_str)
        #SOLUTION: return as string for now, convert to datetime object later in pipeline
        
        #format of time_tag.text: 
        timetext_str = time_tag.text.split("|")[0].replace("Posted: ", "").strip()
        return (timetext_str, datetime_str)
    else:
        #print("No time information found in article!")
        return None


In [10]:
def get_source(soup, specify_source_type=True):
    """Returns the source of the article if it exists
    if specify_source_type, subdivision of CBC will be returned
    if not, "CBC" will be returned as the source
    
    
    Assume that source always starts with "CBC" (Ex: "CBC news", "CBC radio")
    Assume that source comes before span tag (class: bullet)
    """
    
    #source appears before <span class="bullet"> · </span>
    #if author is attached, there are two bullet tags
    #if no author attached, there is one bullet tag
    source = None
    
    if specify_source_type:
        bullet_spans = soup.find_all("span", {"class": "bullet"})
        for bullet_span in bullet_spans:
            previous_str = str(bullet_span.previous_sibling)
            if previous_str.startswith("CBC"):
                source = previous_str
    else:
        
        source = "CBC"
    
    if source:
        return source
    else:
        #print("no source found in article!")
        return None
    

In [11]:

def get_content(soup, as_string=True):
    """Returns the text content from a CBC article (as BeautifulSoup object)
    if as_string is True, return content as one string,
    if as_string is False, return content as list of paragraph strings
    
    Input: BeautifulSoup object, boolean
    
    """
    
    story_tag = soup.find("div", {"class": "story"}) 
    content_list = []
    
    if story_tag:
        for p_tag in story_tag.find_all("p"):
            p_text = p_tag.text + "\n"
            content_list.append(p_text)

        if as_string:
            final_content = "".join(content_list)
        else:
            final_content = content_list #return content as list of paragraph strings

        return final_content
    else:
        #print("no content found in article!")
        return None
    
# get_content(soup)

In [12]:
#NEW - USING NEW FUNCTIONS: 
def extract_json_items(url, specify_source_type=True):
    """Returns a json containing the following items from a CBC article:
        url: the url of the article
        urlToImage: the url of the header image
        title: the title of the article 
        description: subheader of the article
        author: author (note that some articles do not specify author)
        source: CBC if specify_source_type == False, subdivision of CBC if True (ex: "CBC radio")
        publishedAt: tuple of (date_string, datetime object)
        
        input: url returned from CBC API in "url" field (missing "http:" as part of URL)
    """
    json_dict = {}
#     output_list = [] 
    article_url = "http:" + url
    
    #get HTML from article URL into BeautifulSoup
    try:
        html_bytes = urllib.request.urlopen(article_url)
  
    except HTTPError as e:
        print('Error code: ', e.code)
        return None
    except URLError as e:
        print('Reason: ', e.reason)
        return None

    else:    
        mybytes = html_bytes.read()
        html = mybytes.decode("utf8")
        html_bytes.close()
        soup = BeautifulSoup(html, 'html.parser')
        author_name = get_author(soup)
        title_text = get_title(soup)
        desc_text = get_desc(soup)
        image_url = get_url_to_image(soup)
        publish_time = get_publish_time(soup)
        news_source = get_source(soup)
        content = get_content(soup, True)
        
        json_dict["author"] = author_name
        json_dict["title"] = title_text 
        json_dict["description"] = desc_text
        json_dict["url"] = article_url
        json_dict["urlToImage"] = image_url
        json_dict["publishedAt"] = publish_time
        json_dict["source"] = news_source
        json_dict["content"] = content
#         print("JSON KEYS AND VALUES: ")
#         for key, item in json_dict.items():
#             print(key + ": " + str(item))
#         print("-----")
        
        final_json = json.dumps(json_dict)
        return json_dict


In [13]:
extract_json_items('//www.cbc.ca/news/business/powel-trump-negative-rates-1.5567512')

{'author': 'Pete Evans',
 'title': 'U.S. Fed chair rules out negative interest rates even as Trump trumpets them',
 'description': 'U.S. president goes negative on Jerome Powell for rejection of below-zero interest rates',
 'url': 'http://www.cbc.ca/news/business/powel-trump-negative-rates-1.5567512',
 'urlToImage': 'https://i.cbc.ca/1.5258204.1566589338!/fileImage/httpImage/image.jpg_gen/derivatives/16x9_780/869482910.jpg',
 'publishedAt': ('May 13, 2020 10:48 AM ET', '2020-05-13T15:17:41.088Z'),
 'source': 'CBC News',
 'content': 'The head of the Federal Reserve said Wednesday the U.S. central bank is not considering using negative interest rates, despite President Donald Trump seemingly pushing for them.\nIn a tweet Tuesday night, the president said other countries are enjoying the advantages of negative interest rates, and he urged his own central bank to accept the "gift" they would bestow on the U.S. economy.\nTypically central banks tinker with lending rates in an attempt to con

In [15]:
def main(query):
    first_url = get_initial_url(query)
    all_urls = scrape_urls(first_url)
    json_list = []
    
    for each_url in all_urls:
        retrieved_json  = extract_json_items(each_url)
        if retrieved_json is not None:
            print(each_url)
            json_list.append(retrieved_json)
    
    full_query = query.split(" ")
    file_name_prefix = "_".join(full_query)
    print(file_name_prefix)
    
    with open( file_name_prefix + '_' +'CBC_article' + '.json', 'w') as json_file:
        json.dump(json_list, json_file)
    
    return json_list



## Mortgage Rates

In [387]:
cbc_mr_article = main("mortgage rates")
print(len(cbc_mr_article))
print(cbc_mr_article[0])

FIRST URL API CALL:  https://www.cbc.ca/search_api/v1/search?q=mortgage%20rates&sortOrder=relevance&page=1&fields=feed
3
4
5
6
7
8
9
10
11
12
//www.cbc.ca/news/business/bank-of-canada-thursday-1.5569391
//www.cbc.ca/news/business/interest-rates-coronavirus-evans-1.5515884
//www.cbc.ca/news/business/coronavirus-mortgage-rates-canada-1.5443071
//www.cbc.ca/news/business/powel-trump-negative-rates-1.5567512
//www.cbc.ca/news/canada/calgary/alberta-mortgages-arrears-august-report-from-february-2019-data-1.5246010
//www.cbc.ca/news/business/canadian-mortgage-rates-analysis-1.5164774
//www.cbc.ca/news/canada/hamilton/hamilton-economy-coronavirus-1.5566804
//www.cbc.ca/news/business/housing-starts-april-1.5561214
//www.cbc.ca/news/politics/federal-deficit-higher-than-252-billion-1.5566768
//www.cbc.ca/news/canada/british-columbia/covid-19-may-real-estate-1.5562836
//www.cbc.ca/news/canada/newfoundland-labrador/vianne-timmons-pov-women-1.5559214
//www.cbc.ca/news/business/canada-jobs-april-1.5

In [388]:
print(len(cbc_mr_article))

31


## Interest rates

In [16]:
cbc_hp_article =  main("interest rates")
print(len(cbc_hp_article))
print(cbc_hp_article[0])

FIRST URL API CALL:  https://www.cbc.ca/search_api/v1/search?q=interest%20rates&sortOrder=relevance&page=1&fields=feed
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
2

//www.cbc.ca/news/business/bank-of-canada-thursday-1.5569391
//www.cbc.ca/news/canada/british-columbia/vancity-ceo-tamara-vrooman-vancouver-airport-authority-1.5568224
//www.cbc.ca/player/play/1722983491858
//www.cbc.ca/news/business/pete-evans-1.3333505
//www.cbc.ca/news/canada/nova-scotia/38-million-dollars-municipalities-loan-covid-19-1.5548256
//www.cbc.ca/news/canada/manitoba/manitoba-premier-pallister-workforce-cuts-covid-19-1.5573225
//www.cbc.ca/news/politics/federal-deficit-higher-than-252-billion-1.5566768
//www.cbc.ca/news/health/travel-bubble-covid-19-1.5577781
//www.cbc.ca/news/technology/ozone-contaminant-1.5571201
//www.cbc.ca/news/canada/prince-edward-island/pei-essential-worker-bonus-one-thousand-dollars-1.5560192
//www.cbc.ca/news/business/cmhc-mortgage-program-1.5510802
//www.cbc.ca/news/canada/prince-edward-island/group-calls-on-p-e-i-to-reduce-highest-allowable-payday-loan-rates-in-country-1.5533468
//www.cbc.ca/news/canada/nova-scotia/cbrm-to-make-property-tax-col

//www.cbc.ca/marketplace/episodes/_search_results/ee1ac0d8801af8dd38d95e262e8b9e69/
//www.cbc.ca/news/business/economy-poloz-retail-1.5381037
//www.cbc.ca/news/business/how-covid-might-affect-our-food-supply-wedding-delays-might-have-hidden-costs-cbc-s-marketplace-cheat-sheet-1.5535864
//www.cbc.ca/news/business/marketplace-cheatsheet-1.5544298
//www.cbc.ca/news/canada/new-brunswick/nb-power-eub-rate-hearing-joi-scientific-1.5462070
//www.cbc.ca/news/business/what-s-happening-in-the-repo-loan-market-and-why-does-it-matter-1.5289518
//www.cbc.ca/news/canada/saskatchewan/tentative-bid-capital-pointe-falls-through-1.5490070
//www.cbc.ca/news/business/canada-gdp-december-1.5479368
//www.cbc.ca/archives/mortgage-levels-start-to-take-their-toll-1.5331159
//www.cbc.ca/news/canada/calgary/alberta-geothermal-energy-potential-1.5569748
//www.cbc.ca/radio/costofliving/the-cost-of-life-and-how-small-businesses-are-faring-under-covid-19-1.5531458/recession-or-depression-canada-s-economy-is-in-for-a

//www.cbc.ca/player/play/1579383363990
//www.cbc.ca/player/play/2671565519
//www.cbc.ca/news/canada/london/covid-19-city-finances-1.5541117
//www.cbc.ca/news/canada/british-columbia/negative-yield-and-deficit-spending-1.5310352
//www.cbc.ca/player/play/2416845822
//www.cbc.ca/player/play/2416845822
//www.cbc.ca/news/canada/london/covid-19-city-finances-1.5541117
//www.cbc.ca/player/play/660477507969
//www.cbc.ca/player/play/2681520197
//www.cbc.ca/player/play/2686548987
//www.cbc.ca/player/play/695651907618
//www.cbc.ca/player/play/2673582983
//www.cbc.ca/player/play/2671775379
//www.cbc.ca/player/play/2686102244
//www.cbc.ca/player/play/2686200600
//www.cbc.ca/player/play/661279299750
//www.cbc.ca/player/play/2682336785
//www.cbc.ca/player/play/2673403163
//www.cbc.ca/player/play/2673930726
//www.cbc.ca/player/play/2684480736
//www.cbc.ca/player/play/2686383234
//www.cbc.ca/player/play/756328003809
//www.cbc.ca/player/play/2682365865
//www.cbc.ca/player/play/2688291043
//www.cbc.ca/pl

//www.cbc.ca/news/canada/prince-edward-island/pei-short-term-rentals-canada-research-oct-2019-1.5339238
//www.cbc.ca/news/business/markets-virus-concerns-1.5482163
//www.cbc.ca/news/business/employment-steady-october-statistics-canada-says-1.5352718
//www.cbc.ca/news/politics/covid-pandemic-coronavirus-deficit-debt-1.5561513
//www.cbc.ca/news/canada/calgary/covid-19-economic-impact-government-of-canada-1.5502090
//www.cbc.ca/news/politics/muskratfalls-dwightball-hydro-1.5457892
//www.cbc.ca/news/business/canada-gdp-november-1.5447173
//www.cbc.ca/news/canada/british-columbia/canadian-real-estate-markets-hit-hard-by-pandemic-1.5525681
//www.cbc.ca/news/business/7-potential-lingering-effects-of-covid-19-on-the-economy-don-pittis-1.5512872
//www.cbc.ca/player/play/668703299862
//www.cbc.ca/player/play/2683789293
//www.cbc.ca/news/canada/british-columbia/canadian-real-estate-markets-hit-hard-by-pandemic-1.5525681
//www.cbc.ca/news/business/7-potential-lingering-effects-of-covid-19-on-the-e

//www.cbc.ca/news/canada/thunder-bay/thunder-bay-lead-pipes-1.5461668
//www.cbc.ca/radio/costofliving/slashed-interest-rates-getting-a-piece-of-the-electric-car-pie-and-a-happy-jobs-friday-to-all-1.5486253/it-s-electric-how-canada-needs-to-hold-on-to-the-past-to-get-in-on-the-future-of-auto-manufacturing-1.5487571
//www.cbc.ca/radio/day6/benefiting-from-brexit-unboxing-videos-kids-toys-holiday-book-guide-carmen-maria-machado-and-more-1.5394177/his-business-nearly-collapsed-when-the-eu-was-created-now-this-customs-broker-is-readying-for-a-brexit-boom-1.5394188
//www.cbc.ca/news/hannah-tooktoo-on-her-cross-country-bike-ride-to-bring-awareness-to-the-high-rate-of-suicide-in-the-native-community-1.5215468
//www.cbc.ca/radio/day6/benefiting-from-brexit-unboxing-videos-kids-toys-holiday-book-guide-carmen-maria-machado-and-more-1.5394177/his-business-nearly-collapsed-when-the-eu-was-created-now-this-customs-broker-is-readying-for-a-brexit-boom-1.5394188
//www.cbc.ca/news/canada/newfoundland-l

//www.cbc.ca/news/canada/new-brunswick/new-brunswick-carbon-tax-higgs-trudeau-1.5332588
//www.cbc.ca/news/canada/new-brunswick/right-whales-protection-response-1.5479303
//www.cbc.ca/news/politics/un-security-council-pandemic-canada-trudeau-1.5556149
//www.cbc.ca/news/politics/un-security-council-pandemic-canada-trudeau-1.5556149
//www.cbc.ca/news/canada/newfoundland-labrador/moodys-rating-muskrat-1.5224515
//www.cbc.ca/news/canada/thunder-bay/thunder-bay-2020-budget-1.5414005
//www.cbc.ca/news/politics/un-security-council-pandemic-canada-trudeau-1.5556149
//www.cbc.ca/news/business/recession-economy-1.5256099
//www.cbc.ca/news/dog-toxins-1.5166477
//www.cbc.ca/news/canada/north/canadian-doctors-demand-health-study-impact-teck-mine-1.5472791
//www.cbc.ca/news/dog-toxins-1.5166477
//www.cbc.ca/radio/thecurrent/the-current-for-march-20-2020-1.5504386/in-the-race-to-find-a-covid-19-vaccine-this-epidemiologist-urges-governments-to-set-aside-nationalism-1.5505306
//www.cbc.ca/news/canada/br

//www.cbc.ca/news/canada/newfoundland-labrador/pov-lori-lee-oates-wolf-in-the-kitchen-1.5360559
//www.cbc.ca/news/canada/calgary/covid-19-lessons-business-owners-calgary-flood-1.5512740
//www.cbc.ca/news/canada/new-brunswick/bathurst-wind-farm-partnership-secrecy-1.5351557
//www.cbc.ca/news/canada/british-columbia/kicking-horse-canyon-highway-project-cost-rises-1.5272695
//www.cbc.ca/news/canada/edmonton/gaming-tax-credit-alberta-1.5336579
//www.cbc.ca/news/canada/newfoundland-labrador/winter-butter-pot-park-1.5414265
//www.cbc.ca/news/canada/calgary/alberta-trevor-tombe-recession-analysis-1.5299896
//www.cbc.ca/news/canada/calgary/alberta-trevor-tombe-recession-analysis-1.5299896
//www.cbc.ca/news/canada/windsor/michigan-sports-online-gambling-1.5389033
//www.cbc.ca/news/business/stock-market-trump-uncertainty-1.5347006
//www.cbc.ca/news/canada/toronto/city-council-budget-special-meeting-property-tax-increases-1.5469004
//www.cbc.ca/news/canada/ottawa/ottawa-race-weekend-collapse-help

//www.cbc.ca/news/canada/edmonton/zombie-debt-canadians-1.5355333
//www.cbc.ca/news/canada/windsor/chief-frederick-retiring-1.5178302
//www.cbc.ca/news/politics/fiscal-update-1.5397273
//www.cbc.ca/news/canada/london/pot-shops-london-licence-1.5254616
//www.cbc.ca/news/politics/fiscal-update-1.5397273
//www.cbc.ca/life/the-skincare-terms-you-ll-want-to-know-about-this-year-1.5488897
//www.cbc.ca/news/canada/nova-scotia/outdoor-adventure-instagram-influencer-1.5228977
//www.cbc.ca/news/politics/fiscal-update-1.5397273
//www.cbc.ca/news/canada/nova-scotia/nsha-health-care-cihi-janet-knox-1.5241420
//www.cbc.ca/player/play/1583082051810
//www.cbc.ca/news/business/bank-of-canada-economy-housing-1.5152705
//www.cbc.ca/strombo/music-2/the-legendary-gza-of-the-wu-tang-clan-is-helping-high-school-kids-learn-sci
//www.cbc.ca/news/canada/edmonton/edmonton-ets-fraud-1.5233497
//www.cbc.ca/radio/thesundayedition/the-sunday-edition-for-april-26-2020-1.5536429/canada-s-for-profit-model-of-long-term-

//www.cbc.ca/news/technology/baseball-mlb-computerized-umpire-trackman-1.5330369
//www.cbc.ca/news/canada/calgary/ndp-climate-plan-andrew-leach-1.5276006
//www.cbc.ca/news/canada/calgary/inter-pipeline-confirms-takeover-offer-1.5242585
//www.cbc.ca/news/politics/rcmp-emergency-response-team-women-1.5233162
//www.cbc.ca/news/canada/calgary/calgary-family-friendly-city-richard-white-1.5451558
//www.cbc.ca/news/canada/coronavirus-covid19-march26-canada-world-1.5510495
//www.cbc.ca/news/canada/prince-edward-island/pei-charlottetown-federal-candidates-debate-sept-2019-1.5305383
//www.cbc.ca/news/canada/calgary/calgary-price-increases-2020-1.5411176
//www.cbc.ca/news/canada/calgary/imperial-oil-second-quarter-results-profit-1.5234561
//www.cbc.ca/news/canada/calgary/imperial-oil-second-quarter-results-profit-1.5234561
//www.cbc.ca/news/canada/new-brunswick/moncton-hospital-nurse-oxytocin-nicole-ruest-class-action-lawsuit-1.5161462
//www.cbc.ca/news/canada/manitoba/children-in-care-evicted-br

//www.cbc.ca/news/canada/kitchener-waterloo/kitchener-conestoga-candidate-surveys-federal-election-1.5315108
//www.cbc.ca/news/politics/cbc-election-poll-1.5188097
//www.cbc.ca/news/canada/manitoba/electricity-history-hydro-manitoba-1.5163321
//www.cbc.ca/news/canada/saskatchewan/brightenview-mall-shadowy-world-1.5341635
//www.cbc.ca/news/indigenous/mmiwg-inquiry-report-1.5158385
interest_rates
1056
{'author': 'Pete Evans', 'title': 'U.S. Fed chair rules out negative interest rates even as Trump trumpets them', 'description': 'U.S. president goes negative on Jerome Powell for rejection of below-zero interest rates', 'url': 'http://www.cbc.ca/news/business/powel-trump-negative-rates-1.5567512', 'urlToImage': 'https://i.cbc.ca/1.5258204.1566589338!/fileImage/httpImage/image.jpg_gen/derivatives/16x9_780/869482910.jpg', 'publishedAt': ('May 13, 2020 10:48 AM ET', '2020-05-13T15:17:41.088Z'), 'source': 'CBC News', 'content': 'The head of the Federal Reserve said Wednesday the U.S. central b

## Housing price

In [None]:
cbc_hp_article = main('housing price')
print(len(cbc_hp_article))
print(cbc_hp_article[0])

FIRST URL API CALL:  https://www.cbc.ca/search_api/v1/search?q=housing%20price&sortOrder=relevance&page=1&fields=feed
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
24

//www.cbc.ca/news/canada/hamilton/vacant-units-1.5389684
//www.cbc.ca/news/canada/british-columbia/bc-price-changes-past-decade-1.5397302
//www.cbc.ca/player/play/2677724014
//www.cbc.ca/player/play/2677724014
//www.cbc.ca/player/play/2677721573
//www.cbc.ca/news/world/singapore-s11-dorm-coronavirus-1.5539303
//www.cbc.ca/player/play/1648958019581
//www.cbc.ca/news/canada/british-columbia/vancouver-housing-market-2019-2020-1.5407180
//www.cbc.ca/news/canada/london/london-ontario-housing-real-estate-2020-1.5404033
//www.cbc.ca/news/canada/ottawa/urban-boundary-density-debate-ottawa-1.5492618
//www.cbc.ca/news/canada/sudbury/affordable-housing-rent-geared-to-income-low-rental-housing-poverty-1.5366825
//www.cbc.ca/news/canada/british-columbia/budget-2020-affordability-1.5467239
//www.cbc.ca/news/canada/saskatchewan/city-of-regina-tax-exemption-may-be-reinstated-non-profits-1.5432971
//www.cbc.ca/news/canada/hamilton/commonwealth-games-kujavsky-1.5471286
//www.cbc.ca/news/canada/prince-ed

//www.cbc.ca/news/canada/prince-edward-island/pei-down-payment-loan-update-1.5342488
//www.cbc.ca/news/canada/calgary/affordability-report-homes-calgary-zoocasa-mortgage-median-income-1.5298385
//www.cbc.ca/news/canada/north/yukon-housing-affordability-election-1.5327251
//www.cbc.ca/news/politics/millennial-conservatives-1.5443425
//www.cbc.ca/news/canada/london/london-population-1.5464669
//www.cbc.ca/news/canada/north/vacancy-rate-in-whitehorse-1.5218398
//www.cbc.ca/news/canada/north/vacancy-rate-in-whitehorse-1.5218398
//www.cbc.ca/news/canada/british-columbia/victoria-bc-affordability-seniors-homeless-1.5351032
//www.cbc.ca/news/canada/prince-edward-island/pei-rent-increase-2020-irac-1.5293930
//www.cbc.ca/news/canada/sudbury/sudbury-housing-market-1.5248495
//www.cbc.ca/news/housing-for-young-people-national-pharmacare-1.5172870
//www.cbc.ca/player/play/2446359798
//www.cbc.ca/news/business/apple-california-housing-crisis-1.5346584
//www.cbc.ca/news/canada/toronto/toronto-home-s

## Employment

In [389]:
cbc_e_article = main('employment')
print(len(cbc_e_article))
print(cbc_e_article[0])

FIRST URL API CALL:  https://www.cbc.ca/search_api/v1/search?q=employment&sortOrder=relevance&page=1&fields=feed
3
4
//www.cbc.ca/player/play/1737553475798
//www.cbc.ca/player/play/1736889411699
//www.cbc.ca/news/canada/manitoba/manitoba-jobs-economy-analysis-1.5565090
//www.cbc.ca/news/indigenous/first-nations-fishermen-manitoba-relief-1.5570593
//www.cbc.ca/player/play/1735580739577
//www.cbc.ca/news/canada/toronto/east-end-toronto-groups-launch-food-bank-1.5570514
//www.cbc.ca/player/play/1733823555515
//www.cbc.ca/player/play/1733910083634
//www.cbc.ca/news/canada/thunder-bay/ontario-northland-bus-thunder-bay-winnipeg-1.5570191
//www.cbc.ca/player/play/1733924931751
//www.cbc.ca/player/play/1724965443646
//www.cbc.ca/player/play/1712952899711
//www.cbc.ca/news/politics/wayne-easter-emergency-response-benefit-fraud-1.5570545
//www.cbc.ca/player/play/1713193027988
//www.cbc.ca/player/play/1693717571885
//www.cbc.ca/news/world/employment-gains-1.5452316
//www.cbc.ca/player/play/170965

## GDP

In [390]:
cbc_gdp_article = main('GDP')
print(len(cbc_gdp_article))
print(cbc_gdp_article[0])

FIRST URL API CALL:  https://www.cbc.ca/search_api/v1/search?q=GDP&sortOrder=relevance&page=1&fields=feed
3
4
5
6
7
8
9
10
11
12
13
//www.cbc.ca/news/business/february-gdp-statscan-1.5550479
//www.cbc.ca/news/business/january-gdp-1.5515908
//www.cbc.ca/player/play/2676409093
//www.cbc.ca/news/business/canada-gdp-november-1.5447173
//www.cbc.ca/radio/costofliving/the-cost-of-running-either-for-election-or-a-deficit-1.5323745/canada-s-debt-to-gdp-ratio-isn-t-a-major-election-issue-but-should-it-be-1.5327355
//www.cbc.ca/news/business/us-economy-reports-1.5375050
//www.cbc.ca/news/canada-s-debt-to-gdp-ratio-isn-t-a-major-election-issue-but-should-it-be-1.5327513
//www.cbc.ca/news/world/new-zealand-budget-1.5569118
//www.cbc.ca/player/play/1626791491581
//www.cbc.ca/news/politics/federal-deficit-higher-than-252-billion-1.5566768
//www.cbc.ca/news/world/england-lockdown-1.5567345
//www.cbc.ca/news/canada/hamilton/hamilton-economy-coronavirus-1.5566804
//www.cbc.ca/news/canada/toronto/covid-

## Stock Market

In [392]:
cbc_tsx_article = main('stock market')
print(len(cbc_tsx_article))
print(cbc_tsx_article[0])

FIRST URL API CALL:  https://www.cbc.ca/search_api/v1/search?q=stock%20market&sortOrder=relevance&page=1&fields=feed
3
4
//www.cbc.ca/news/canada/calgary/aer-alberta-energy-regulator-pieridae-shell-canada-1.5570875
//www.cbc.ca/player/play/1725703235509
//www.cbc.ca/news/world/u-s-senate-intelligence-committee-richard-burr-1.5569669
//www.cbc.ca/news/business/elon-tusk-tesla-stock-too-high-tweet-1.5553109
//www.cbc.ca/radio/costofliving/the-economy-with-a-capital-e-1.5542514/how-canadians-can-interpret-covid-19-s-volatile-effect-on-the-stock-market-1.5542515
//www.cbc.ca/player/play/1729104451748
//www.cbc.ca/news/canada/prince-edward-island/pei-covid-19-thursday-may-14-1.5569155
//www.cbc.ca/player/play/1704849475631
//www.cbc.ca/player/play/1709167171581
//www.cbc.ca/player/play/1709170243575
//www.cbc.ca/player/play/1708995139657
//www.cbc.ca/news/canada/ottawa/programs/ottawamorning/coronavirus-and-the-stock-market-1.5482115
//www.cbc.ca/player/play/1713828931570
//www.cbc.ca/playe

In [151]:
#TEST ON DIFFERENT CBC LINKS RETURNED BY CBC API

standard_url = "//www.cbc.ca/news/politics/federal-deficit-higher-than-252-billion-1.5566768"
radio_url = "//www.cbc.ca/radio/costofliving/slashed-interest-rates-getting-a-piece-of-the-electric-car-pie-and-a-happy-jobs-friday-to-all-1.5486253"
media_url = "//www.cbc.ca/player/play/1707317315674"
noauthor_url = "//www.cbc.ca/news/canada/coronavirus-covid19-world-canada-may12-1.5564261"



extract_json_items(standard_url)

#note: doesn't work for 'player' URLS ("//www.cbc.ca/player/play/1707317315674")
#will run, but will return null for most values - player articles have different setup


'{"author": "John Paul Tasker", "title": "Federal deficit likely to be higher than $252 billion, parliamentary budget\\u00a0officer says", "description": "PBO says it\'s possible federal debt will hit $1 trillion because of pandemic relief spending", "url": "http://www.cbc.ca/news/politics/federal-deficit-higher-than-252-billion-1.5566768", "urlToImage": "https://i.cbc.ca/1.5138746.1558027091!/fileImage/httpImage/image.jpg_gen/derivatives/16x9_780/phoenix-pay-system.jpg", "publishedAt": ["May 12, 2020 5:34 PM ET", "2020-05-13T17:44:54.995Z"], "source": "CBC News"}'