# Working with RSS Feeds Lab

Complete the following set of exercises to solidify your knowledge of parsing RSS feeds and extracting information from them.

In [1]:
# First we need to install the following library:
!pip install feedparser



In [2]:
import feedparser

### 1. Use feedparser to parse the following RSS feed URL.

In [3]:
url = 'http://feeds.feedburner.com/oreilly/radar/atom'

In [4]:
new_url = feedparser.parse(url)

### 2. Obtain a list of components (keys) that are available for this feed.

In [5]:
list(new_url.keys())

['feed',
 'entries',
 'bozo',
 'headers',
 'etag',
 'updated',
 'updated_parsed',
 'href',
 'status',
 'encoding',
 'version',
 'namespaces']

### 3. Obtain a list of components (keys) that are available for the *feed* component of this RSS feed.

In [34]:
list(new_url['feed'].keys())

['title',
 'title_detail',
 'links',
 'link',
 'subtitle',
 'subtitle_detail',
 'updated',
 'updated_parsed',
 'language',
 'sy_updateperiod',
 'sy_updatefrequency',
 'generator_detail',
 'generator',
 'feedburner_info',
 'geo_lat',
 'geo_long',
 'feedburner_emailserviceid',
 'feedburner_feedburnerhostname']

### 4. Extract and print the feed title, subtitle, author, and link.

In [7]:
# We can specify the column ('feed') from the URL and apply the command .title to know its title:
feed_title = new_url['feed'].title
feed_title

'Radar'

In [8]:
# We can specify the column ('feed') from the URL and apply the command .subtitle to know its subtitle:
feed_subtitle = new_url['feed'].subtitle
feed_subtitle

'Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology'

In [9]:
# To get all details from the subtitle, use the command bellow:
feed_subtitle_detail = new_url['feed'].subtitle_detail
feed_subtitle_detail

{'type': 'text/html',
 'language': None,
 'base': 'http://feeds.feedburner.com/oreilly/radar/atom',
 'value': 'Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology'}

In [35]:
# To get more information about the author:
feed_author = new_url['feed'].author
feed_author

AttributeError: object has no attribute 'author'

In [53]:
# To know the link of the url, use the command .link:
feed_link = new_url['feed'].link
feed_link

'https://www.oreilly.com/radar'

### 5. Count the number of entries that are contained in this RSS feed.

In [12]:
entries_number = new_url.entries


entries_number

[{'title': 'Four short links: 4 December 2019',
  'title_detail': {'type': 'text/plain',
   'language': None,
   'base': 'http://feeds.feedburner.com/oreilly/radar/atom',
   'value': 'Four short links: 4 December 2019'},
  'links': [{'rel': 'alternate',
    'type': 'text/html',
    'href': 'http://feedproxy.google.com/~r/oreilly/radar/atom/~3/DV4ZdRtk1ws/'}],
  'link': 'http://feedproxy.google.com/~r/oreilly/radar/atom/~3/DV4ZdRtk1ws/',
  'comments': 'https://www.oreilly.com/radar/four-short-links-4-december-2019/#respond',
  'published': 'Wed, 04 Dec 2019 05:01:00 +0000',
  'published_parsed': time.struct_time(tm_year=2019, tm_mon=12, tm_mday=4, tm_hour=5, tm_min=1, tm_sec=0, tm_wday=2, tm_yday=338, tm_isdst=0),
  'authors': [{'name': 'Nat Torkington'}],
  'author': 'Nat Torkington',
  'author_detail': {'name': 'Nat Torkington'},
  'tags': [{'term': 'Four Short Links', 'scheme': None, 'label': None},
   {'term': 'Signals', 'scheme': None, 'label': None}],
  'id': 'https://www.oreilly.

In [13]:
f'This RSS feed contained a total of {len(entries_number)} entries'

'This RSS feed contained a total of 18 entries'

### 6. Obtain a list of components (keys) available for an entry.

*Hint: Remember to index first before requesting the keys*

In [38]:
entry = entries_number[0]


entry_keys_feed_list = list(entry.keys())


entry_keys_feed_list

['title',
 'title_detail',
 'links',
 'link',
 'comments',
 'published',
 'published_parsed',
 'authors',
 'author',
 'author_detail',
 'tags',
 'id',
 'guidislink',
 'summary',
 'summary_detail',
 'content',
 'wfw_commentrss',
 'slash_comments',
 'feedburner_origlink']

### 7. Extract a list of entry titles.

In [15]:
# Knowing that we have a total entries of 18, we can use the function range with a for loop in a list 
# comprehension to get all the entry titles.


entry_titles = [new_url.entries[i].title for i in range(0, 18)]


entry_titles

['Four short links: 4 December 2019',
 'Use your people as competitive advantage',
 'Four short links: 3 December 2019',
 'A 5G future',
 'Four short links: 2 December 2019',
 'Four short links: 29 November 2019',
 'Four short links: 28 November 2019',
 'Four short links: 27 November 2019',
 'Moving AI and ML from research into production',
 'Four short links: 26 November 2019',
 'Four short links: 25 November 2019',
 'Four short links: 22 November 2019',
 'Why you should care about robotic process automation',
 'Unraveling the mystery of code',
 'Four short links: 21 November 2019',
 'Four short links: 20 November 2019',
 'There’s a path to an AI ROI',
 'Four short links: 19 November 2019']

### 8. Calculate the percentage of "Four short links" entry titles.

In [16]:
four_short_links = [i for i in entry_titles if i.startswith('Four short links:')]


percentage = len(four_short_links)/len(entry_titles) *100


f'The percentage of "Four short links" entry titles equals to {percentage:.3}'

'The percentage of "Four short links" entry titles equals to 66.7'

### 9. Create a Pandas data frame from the feed's entries.

In [17]:
import pandas as pd

In [20]:
df_entries = pd.DataFrame(entries_number)
df_entries

Unnamed: 0,title,title_detail,links,link,comments,published,published_parsed,authors,author,author_detail,tags,id,guidislink,summary,summary_detail,content,wfw_commentrss,slash_comments,feedburner_origlink
0,Four short links: 4 December 2019,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Wed, 04 Dec 2019 05:01:00 +0000","(2019, 12, 4, 5, 1, 0, 2, 338, 0)",[{'name': 'Nat Torkington'}],Nat Torkington,{'name': 'Nat Torkington'},"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=11129,False,The Complexity Explorer &#8212; online courses...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,0,https://www.oreilly.com/radar/four-short-links...
1,Use your people as competitive advantage,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/use-your-people-...,"Tue, 03 Dec 2019 09:00:00 +0000","(2019, 12, 3, 9, 0, 0, 1, 337, 0)",[{'name': 'Pamela Rucker'}],Pamela Rucker,{'name': 'Pamela Rucker'},"[{'term': 'Future of the Firm', 'scheme': None...",https://www.oreilly.com/radar/?p=11068,False,"In a fast-paced digital world, it is tempting ...","{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/use-your-people-...,0,https://www.oreilly.com/radar/use-your-people-...
2,Four short links: 3 December 2019,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Tue, 03 Dec 2019 05:01:00 +0000","(2019, 12, 3, 5, 1, 0, 1, 337, 0)",[{'name': 'Nat Torkington'}],Nat Torkington,{'name': 'Nat Torkington'},"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=11109,False,"Oxide.computer &#8212; a new hardware company,...","{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,0,https://www.oreilly.com/radar/four-short-links...
3,A 5G future,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/a-5g-future/#res...,"Mon, 02 Dec 2019 10:00:00 +0000","(2019, 12, 2, 10, 0, 0, 0, 336, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,{'name': 'Mike Loukides'},"[{'term': 'Innovation & Disruption', 'scheme':...",https://www.oreilly.com/radar/?p=11075,False,"For the past year, 5G cell technology has gene...","{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/a-5g-future/feed/,0,https://www.oreilly.com/radar/a-5g-future/
4,Four short links: 2 December 2019,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Mon, 02 Dec 2019 05:01:00 +0000","(2019, 12, 2, 5, 1, 0, 0, 336, 0)",[{'name': 'Nat Torkington'}],Nat Torkington,{'name': 'Nat Torkington'},"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=11100,False,Two Years at Dropbox &#8212; a lot of wisdom a...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,0,https://www.oreilly.com/radar/four-short-links...
5,Four short links: 29 November 2019,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Fri, 29 Nov 2019 05:01:00 +0000","(2019, 11, 29, 5, 1, 0, 4, 333, 0)",[{'name': 'Nat Torkington'}],Nat Torkington,{'name': 'Nat Torkington'},"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=11096,False,A Visual Guide to Using BERT for the First Tim...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,0,https://www.oreilly.com/radar/four-short-links...
6,Four short links: 28 November 2019,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Thu, 28 Nov 2019 05:01:00 +0000","(2019, 11, 28, 5, 1, 0, 3, 332, 0)",[{'name': 'Nat Torkington'}],Nat Torkington,{'name': 'Nat Torkington'},"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=11091,False,Raspberry Pi Recovery Kit &#8212; Pi for Prepp...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,0,https://www.oreilly.com/radar/four-short-links...
7,Four short links: 27 November 2019,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Wed, 27 Nov 2019 05:01:00 +0000","(2019, 11, 27, 5, 1, 0, 2, 331, 0)",[{'name': 'Nat Torkington'}],Nat Torkington,{'name': 'Nat Torkington'},"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=11085,False,Comby &#8212; a tool for matching and rewritin...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,0,https://www.oreilly.com/radar/four-short-links...
8,Moving AI and ML from research into production,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/moving-ai-and-ml...,"Tue, 26 Nov 2019 05:10:13 +0000","(2019, 11, 26, 5, 10, 13, 1, 330, 0)",[{'name': 'Jenn Webb'}],Jenn Webb,{'name': 'Jenn Webb'},"[{'term': 'AI & ML', 'scheme': None, 'label': ...",https://www.oreilly.com/radar/?p=10241,False,In this interview from O&#8217;Reilly Foo Camp...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/moving-ai-and-ml...,0,https://www.oreilly.com/radar/moving-ai-and-ml...
9,Four short links: 26 November 2019,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Tue, 26 Nov 2019 05:01:00 +0000","(2019, 11, 26, 5, 1, 0, 1, 330, 0)",[{'name': 'Nat Torkington'}],Nat Torkington,{'name': 'Nat Torkington'},"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=11064,False,Braid &#8212; a set of extensions to HTTP whic...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,0,https://www.oreilly.com/radar/four-short-links...


In [21]:
# This will be important because in the exercise 11 we will need to add a new column.
# With the command .shape we'll know how many rows and columns we have in the dataframe df_entries.
df_entries.shape

(18, 19)

### 10. Count the number of entries per author and sort them in descending order.

In [22]:
df_entries['author'].value_counts().sort_values(ascending=False)

Nat Torkington                                  12
Jenn Webb                                        3
Sunil Ranka, Roger Magoulas and Steve Swoyer     1
Mike Loukides                                    1
Pamela Rucker                                    1
Name: author, dtype: int64

### 11. Add a new column to the data frame that contains the length (number of characters) of each entry title. Return a data frame that contains the title, author, and title length of each entry in descending order (longest title length at the top).

In [27]:
number_of_characters = df_entries['title'].str.len()


df_entries['title_length'] = number_of_characters


df_entries.shape

(18, 20)

In [41]:
df_entries[['title', 'author', 'title_length']].sort_values(by='title_length', ascending=False).reset_index(drop=True)

Unnamed: 0,title,author,title_length
0,Why you should care about robotic process auto...,"Sunil Ranka, Roger Magoulas and Steve Swoyer",52
1,Moving AI and ML from research into production,Jenn Webb,46
2,Use your people as competitive advantage,Pamela Rucker,40
3,Four short links: 26 November 2019,Nat Torkington,34
4,Four short links: 20 November 2019,Nat Torkington,34
5,Four short links: 21 November 2019,Nat Torkington,34
6,Four short links: 22 November 2019,Nat Torkington,34
7,Four short links: 25 November 2019,Nat Torkington,34
8,Four short links: 19 November 2019,Nat Torkington,34
9,Four short links: 27 November 2019,Nat Torkington,34


### 12. Create a list of entry titles whose summary includes the phrase "machine learning."

In [45]:
wanted_phrase = list(df_entries['summary'].str.contains('machine learning'))

machine_learning = []
for i,e in enumerate(wanted_phrase):
    if e == True:
        machine_learning.append(df_entries.iloc[i,0])
machine_learning

['Moving AI and ML from research into production',
 'There’s a path to an AI ROI']

In [46]:
df_entries.summary

0     The Complexity Explorer &#8212; online courses...
1     In a fast-paced digital world, it is tempting ...
2     Oxide.computer &#8212; a new hardware company,...
3     For the past year, 5G cell technology has gene...
4     Two Years at Dropbox &#8212; a lot of wisdom a...
5     A Visual Guide to Using BERT for the First Tim...
6     Raspberry Pi Recovery Kit &#8212; Pi for Prepp...
7     Comby &#8212; a tool for matching and rewritin...
8     In this interview from O&#8217;Reilly Foo Camp...
9     Braid &#8212; a set of extensions to HTTP whic...
10    Why “Always use UTC” is Bad Advice &#8212; thr...
11    FAQ Off &#8212; open source software that lets...
12    In a classic 1983 paper, cognitive psychologis...
13    In this interview from O&#8217;Reilly Foo Camp...
14    Program Synthesis and the Art of Programming b...
15    Local-First Software: You Own Your Data, in Sp...
16    In this interview from O&#8217;Reilly Foo Camp...
17    Ghost Ships, Crop Circles, and Soft Gold: 

In [47]:
wanted_phrase

[False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 False]

In [49]:
df_entries.summary[16]

'In this interview from O&#8217;Reilly Foo Camp 2019, Hands-On Unsupervised Learning Using Python author Ankur Patel discusses the challenges and opportunities in making machine learning and AI accessible and financially viable for enterprise applications. Highlights from the interview include: The biggest hurdle businesses face when implementing machine learning or AI solutions is cleaning and preparing [&#8230;]'