# Working with RSS Feeds Lab

Complete the following set of exercises to solidify your knowledge of parsing RSS feeds and extracting information from them.

In [1]:
import feedparser

In [None]:
!pip install feedparser

### 1. Use feedparser to parse the following RSS feed URL.

In [2]:
url = 'http://feeds.feedburner.com/oreilly/radar/atom'

In [3]:
res = feedparser.parse(url)
print(type(res))

<class 'feedparser.FeedParserDict'>


### 2. Obtain a list of components (keys) that are available for this feed.

In [4]:
list_parse=list(res.keys())
print (list_parse)

['feed', 'entries', 'bozo', 'headers', 'etag', 'updated', 'updated_parsed', 'href', 'status', 'encoding', 'version', 'namespaces']


### 3. Obtain a list of components (keys) that are available for the *feed* component of this RSS feed.

In [5]:
list_feed=[res['feed'].keys()]
print (list_feed)

[dict_keys(['title', 'title_detail', 'links', 'link', 'subtitle', 'subtitle_detail', 'updated', 'updated_parsed', 'language', 'sy_updateperiod', 'sy_updatefrequency', 'generator_detail', 'generator', 'feedburner_info', 'geo_lat', 'geo_long', 'feedburner_emailserviceid', 'feedburner_feedburnerhostname'])]


### 4. Extract and print the feed title, subtitle, author, and link.

In [None]:
print (res['feed']['title'])
print (res['feed']['subtitle'])
print (res['feed']['link'])

### 5. Count the number of entries that are contained in this RSS feed.

In [6]:
print (len(res['entries']))

60


### 6. Obtain a list of components (keys) available for an entry.

*Hint: Remember to index first before requesting the keys*

In [7]:
list_components=[res['entries'][0].keys()]
print (list_components)

[dict_keys(['title', 'title_detail', 'links', 'link', 'comments', 'published', 'published_parsed', 'authors', 'author', 'author_detail', 'tags', 'id', 'guidislink', 'summary', 'summary_detail', 'content', 'wfw_commentrss', 'slash_comments', 'feedburner_origlink'])]


### 7. Extract a list of entry titles.

In [8]:
list_titles=[res['entries'][i]['title'] for i in range(len(res['entries']))]
print (list_titles)

['Four short links: 14 April 2020', 'Radar trends to watch: April 2020', 'Four short links: 13 April 2020', 'Four short links: 10 April 2020', 'Four short links: 9 April 2020', 'Four short links: 8 April 2020', 'Four short links: 7 April 2020', 'Governance and Discovery', 'Four short links: 6 April 2020', 'Four short links: 3 April 2020', 'Four short links: 2 April 2020', 'Four short links: 1 April 2020', 'Four short links: 31 March 2020', 'What you need to know about product management for AI', 'The unreasonable importance of data preparation', 'Four short links: 24 March 2020', '3 ways to confront modern business challenges', 'An enterprise vision is your company’s North Star', 'Leaders need to mobilize change-ready workforces', 'Great leaders inspire innovation and creativity from within their workforces', 'Strong leaders forge an intersection of knowledge and experience', 'Four short links: 23 March 2020', 'Four short links: 20 March 2020', '6 trends framing the state of AI and ML'

### 8. Calculate the percentage of "Four short links" entry titles.

In [9]:
count=len([i for i in range(len(list_titles)) if 'Four short links' in list_titles[i]])
print (count*100/(len(res['entries'])), '%')


60.0 %


### 9. Create a Pandas data frame from the feed's entries.

In [10]:
import pandas as pd

In [11]:
entry=pd.DataFrame.from_dict(res['entries'])
display (entry.head())

Unnamed: 0,title,title_detail,links,link,comments,published,published_parsed,authors,author,author_detail,tags,id,guidislink,summary,summary_detail,content,wfw_commentrss,slash_comments,feedburner_origlink
0,Four short links: 14 April 2020,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Tue, 14 Apr 2020 11:55:12 +0000","(2020, 4, 14, 11, 55, 12, 1, 105, 0)",[{'name': 'Nat Torkington'}],Nat Torkington,{'name': 'Nat Torkington'},"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=12633,False,The Science of Happiness &#8212; free enrolmen...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,0,https://www.oreilly.com/radar/four-short-links...
1,Radar trends to watch: April 2020,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/radar-trends-to-...,"Mon, 13 Apr 2020 19:39:00 +0000","(2020, 4, 13, 19, 39, 0, 0, 104, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,{'name': 'Mike Loukides'},"[{'term': 'Radar Trends', 'scheme': None, 'lab...",https://www.oreilly.com/radar/?p=12597,False,"Since early in March, technology news has been...","{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/radar-trends-to-...,0,https://www.oreilly.com/radar/radar-trends-to-...
2,Four short links: 13 April 2020,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Mon, 13 Apr 2020 11:53:38 +0000","(2020, 4, 13, 11, 53, 38, 0, 104, 0)",[{'name': 'Nat Torkington'}],Nat Torkington,{'name': 'Nat Torkington'},"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=12625,False,Introduction to COBOL &#8212; a 1999 web site ...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,0,https://www.oreilly.com/radar/four-short-links...
3,Four short links: 10 April 2020,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Fri, 10 Apr 2020 11:33:40 +0000","(2020, 4, 10, 11, 33, 40, 4, 101, 0)",[{'name': 'Nat Torkington'}],Nat Torkington,{'name': 'Nat Torkington'},"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=12618,False,FairMOT &#8212; one-shot multi-object tracking...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,0,https://www.oreilly.com/radar/four-short-links...
4,Four short links: 9 April 2020,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Thu, 09 Apr 2020 11:42:34 +0000","(2020, 4, 9, 11, 42, 34, 3, 100, 0)",[{'name': 'Nat Torkington'}],Nat Torkington,{'name': 'Nat Torkington'},"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=12614,False,The Fuzzy Edges of Character Encoding &#8212; ...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,0,https://www.oreilly.com/radar/four-short-links...


### 10. Count the number of entries per author and sort them in descending order.

In [12]:

author=entry['author'].value_counts()
display (author.head())

Nat Torkington                     36
Jenn Webb                           4
Mike Loukides                       4
Roger Magoulas and Steve Swoyer     3
Tim O’Reilly                        1
Name: author, dtype: int64

### 11. Add a new column to the data frame that contains the length (number of characters) of each entry title. Return a data frame that contains the title, author, and title length of each entry in descending order (longest title length at the top).

In [13]:
len_title=[len(entry['title'][i]) for i in range(len(entry))]
entry['len_title']=len_title
display (entry.head())

Unnamed: 0,title,title_detail,links,link,comments,published,published_parsed,authors,author,author_detail,tags,id,guidislink,summary,summary_detail,content,wfw_commentrss,slash_comments,feedburner_origlink,len_title
0,Four short links: 14 April 2020,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Tue, 14 Apr 2020 11:55:12 +0000","(2020, 4, 14, 11, 55, 12, 1, 105, 0)",[{'name': 'Nat Torkington'}],Nat Torkington,{'name': 'Nat Torkington'},"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=12633,False,The Science of Happiness &#8212; free enrolmen...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,0,https://www.oreilly.com/radar/four-short-links...,31
1,Radar trends to watch: April 2020,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/radar-trends-to-...,"Mon, 13 Apr 2020 19:39:00 +0000","(2020, 4, 13, 19, 39, 0, 0, 104, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,{'name': 'Mike Loukides'},"[{'term': 'Radar Trends', 'scheme': None, 'lab...",https://www.oreilly.com/radar/?p=12597,False,"Since early in March, technology news has been...","{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/radar-trends-to-...,0,https://www.oreilly.com/radar/radar-trends-to-...,33
2,Four short links: 13 April 2020,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Mon, 13 Apr 2020 11:53:38 +0000","(2020, 4, 13, 11, 53, 38, 0, 104, 0)",[{'name': 'Nat Torkington'}],Nat Torkington,{'name': 'Nat Torkington'},"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=12625,False,Introduction to COBOL &#8212; a 1999 web site ...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,0,https://www.oreilly.com/radar/four-short-links...,31
3,Four short links: 10 April 2020,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Fri, 10 Apr 2020 11:33:40 +0000","(2020, 4, 10, 11, 33, 40, 4, 101, 0)",[{'name': 'Nat Torkington'}],Nat Torkington,{'name': 'Nat Torkington'},"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=12618,False,FairMOT &#8212; one-shot multi-object tracking...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,0,https://www.oreilly.com/radar/four-short-links...,31
4,Four short links: 9 April 2020,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Thu, 09 Apr 2020 11:42:34 +0000","(2020, 4, 9, 11, 42, 34, 3, 100, 0)",[{'name': 'Nat Torkington'}],Nat Torkington,{'name': 'Nat Torkington'},"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=12614,False,The Fuzzy Edges of Character Encoding &#8212; ...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,0,https://www.oreilly.com/radar/four-short-links...,30


### 12. Create a list of entry titles whose summary includes the phrase "machine learning."

In [14]:

ml=[entry['title'][i] for i in range(len(entry)) if 'machine learning' in entry['summary'][i]]
print (ml)

['What you need to know about product management for AI']
