# Working with RSS Feeds Lab

Complete the following set of exercises to solidify your knowledge of parsing RSS feeds and extracting information from them.

In [1]:
import feedparser

### 1. Use feedparser to parse the following RSS feed URL.

In [2]:
url = 'http://feeds.feedburner.com/oreilly/radar/atom'

In [3]:
rss = feedparser.parse(url)

### 2. Obtain a list of components (keys) that are available for this feed.

In [4]:
rss.keys()

dict_keys(['feed', 'entries', 'bozo', 'headers', 'etag', 'updated', 'updated_parsed', 'href', 'status', 'encoding', 'version', 'namespaces'])

### 3. Obtain a list of components (keys) that are available for the *feed* component of this RSS feed.

In [5]:
rss.feed.keys()

dict_keys(['title', 'title_detail', 'links', 'link', 'subtitle', 'subtitle_detail', 'updated', 'updated_parsed', 'language', 'sy_updateperiod', 'sy_updatefrequency', 'generator_detail', 'generator', 'feedburner_info', 'geo_lat', 'geo_long', 'feedburner_emailserviceid', 'feedburner_feedburnerhostname'])

### 4. Extract and print the feed title, subtitle, author, and link.

In [6]:
print(rss.feed.title)
print(rss.feed.subtitle)
# feeds.feed.author (DOES NOT EXIST)
print(rss.feed.link)

Radar
Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology
https://www.oreilly.com/radar


### 5. Count the number of entries that are contained in this RSS feed.

In [7]:
len(rss.entries)

60

### 6. Obtain a list of components (keys) available for an entry.

*Hint: Remember to index first before requesting the keys*

In [8]:
rss.entries[0].keys()

dict_keys(['title', 'title_detail', 'links', 'link', 'comments', 'published', 'published_parsed', 'authors', 'author', 'author_detail', 'tags', 'id', 'guidislink', 'summary', 'summary_detail', 'content', 'wfw_commentrss', 'slash_comments', 'feedburner_origlink'])

### 7. Extract a list of entry titles.

In [9]:
# list_of_titles = (rss.feed.title)
# list_of_titles

titles = [rss.entries[i].title for i in range(len(rss.entries))]
print(titles)
# feeds.entries[].title


['The unreasonable importance of data preparation', 'Four short links: 24 March 2020', '3 ways to confront modern business challenges', 'An enterprise vision is your company’s North Star', 'Leaders need to mobilize change-ready workforces', 'Great leaders inspire innovation and creativity from within their workforces', 'Strong leaders forge an intersection of knowledge and experience', 'Four short links: 23 March 2020', 'Four short links: 20 March 2020', '6 trends framing the state of AI and ML', 'Four short links: 19 March 2020', 'It’s an unprecedented crisis: 8 things to do right now', 'AI adoption in the enterprise 2020', 'Four short links: 18 March 2020', 'Four short links: 17 March 2020', 'Four short links: 16 March 2020', 'Four short links: 13 March 2020', 'Four short links: 12 March 2020', 'Four short links: 11 March 2020', 'Four short links: 10 March 2020', 'Four short links: 9 March 2020', 'Four short links: 6 March 2020', 'Radar trends to watch: March 2020', 'Four short links

### 8. Calculate the percentage of "Four short links" entry titles.

In [10]:
count = 0
for x in titles:
    if 'Four short links' in x:
        count += 1

print(count/len(titles)*100)

61.66666666666667


### 9. Create a Pandas data frame from the feed's entries.

In [11]:
import pandas as pd

In [12]:
df = pd.DataFrame(rss.entries)
df

Unnamed: 0,author,author_detail,authors,comments,content,feedburner_origlink,guidislink,id,link,links,published,published_parsed,slash_comments,summary,summary_detail,tags,title,title_detail,wfw_commentrss
0,Hugo Bowne-Anderson,{'name': 'Hugo Bowne-Anderson'},[{'name': 'Hugo Bowne-Anderson'}],https://www.oreilly.com/radar/the-unreasonable...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/the-unreasonable...,False,https://www.oreilly.com/radar/?p=12448,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Tue, 24 Mar 2020 10:00:00 +0000","(2020, 3, 24, 10, 0, 0, 1, 84, 0)",0,In a world focused on buzzword-driven models a...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'AI & ML', 'scheme': None, 'label': ...",The unreasonable importance of data preparation,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/the-unreasonable...
1,Nat Torkington,{'name': 'Nat Torkington'},[{'name': 'Nat Torkington'}],https://www.oreilly.com/radar/four-short-links...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,False,https://www.oreilly.com/radar/?p=12558,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Tue, 24 Mar 2020 04:01:00 +0000","(2020, 3, 24, 4, 1, 0, 1, 84, 0)",0,Potential Distributed Reading Group on Distrib...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Four Short Links', 'scheme': None, ...",Four short links: 24 March 2020,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...
2,Rita J. King,{'name': 'Rita J. King'},[{'name': 'Rita J. King'}],https://www.oreilly.com/radar/3-ways-to-confro...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/3-ways-to-confro...,False,https://www.oreilly.com/radar/?p=12425,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Mon, 23 Mar 2020 10:00:00 +0000","(2020, 3, 23, 10, 0, 0, 0, 83, 0)",0,I interviewed four business leaders in late 20...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Future of the Firm', 'scheme': None...",3 ways to confront modern business challenges,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/3-ways-to-confro...
3,Jenn Webb,{'name': 'Jenn Webb'},[{'name': 'Jenn Webb'}],https://www.oreilly.com/radar/an-enterprise-vi...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/an-enterprise-vi...,False,https://www.oreilly.com/radar/?p=12395,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Mon, 23 Mar 2020 09:59:00 +0000","(2020, 3, 23, 9, 59, 0, 0, 83, 0)",0,"Rita J. King, co-director and EVP for business...","{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Future of the Firm', 'scheme': None...",An enterprise vision is your company’s North Star,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/an-enterprise-vi...
4,Jenn Webb,{'name': 'Jenn Webb'},[{'name': 'Jenn Webb'}],https://www.oreilly.com/radar/leaders-need-to-...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/leaders-need-to-...,False,https://www.oreilly.com/radar/?p=12408,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Mon, 23 Mar 2020 09:58:00 +0000","(2020, 3, 23, 9, 58, 0, 0, 83, 0)",0,"Rita J. King, co-director and EVP for business...","{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Future of the Firm', 'scheme': None...",Leaders need to mobilize change-ready workforces,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/leaders-need-to-...
5,Jenn Webb,{'name': 'Jenn Webb'},[{'name': 'Jenn Webb'}],https://www.oreilly.com/radar/great-leaders-in...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/great-leaders-in...,False,https://www.oreilly.com/radar/?p=12403,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Mon, 23 Mar 2020 09:57:00 +0000","(2020, 3, 23, 9, 57, 0, 0, 83, 0)",0,"Rita J. King, co-director and EVP for business...","{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Future of the Firm', 'scheme': None...",Great leaders inspire innovation and creativit...,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/great-leaders-in...
6,Jenn Webb,{'name': 'Jenn Webb'},[{'name': 'Jenn Webb'}],https://www.oreilly.com/radar/strong-leaders-f...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/strong-leaders-f...,False,https://www.oreilly.com/radar/?p=10839,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Mon, 23 Mar 2020 09:56:00 +0000","(2020, 3, 23, 9, 56, 0, 0, 83, 0)",0,"Rita J. King, co-director and EVP for business...","{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Future of the Firm', 'scheme': None...",Strong leaders forge an intersection of knowle...,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/strong-leaders-f...
7,Nat Torkington,{'name': 'Nat Torkington'},[{'name': 'Nat Torkington'}],https://www.oreilly.com/radar/four-short-links...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,False,https://www.oreilly.com/radar/?p=12547,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Mon, 23 Mar 2020 04:01:00 +0000","(2020, 3, 23, 4, 1, 0, 0, 83, 0)",0,Stanza: A Python Natural Language Processing T...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Four Short Links', 'scheme': None, ...",Four short links: 23 March 2020,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...
8,Nat Torkington,{'name': 'Nat Torkington'},[{'name': 'Nat Torkington'}],https://www.oreilly.com/radar/four-short-links...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,False,https://www.oreilly.com/radar/?p=12532,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Fri, 20 Mar 2020 04:01:00 +0000","(2020, 3, 20, 4, 1, 0, 4, 80, 0)",0,NASCAR Replaces Canceled Races with Esports Fe...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Four Short Links', 'scheme': None, ...",Four short links: 20 March 2020,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...
9,Roger Magoulas and Steve Swoyer,{'name': 'Roger Magoulas and Steve Swoyer'},[{'name': 'Roger Magoulas and Steve Swoyer'}],https://www.oreilly.com/radar/6-trends-framing...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/6-trends-framing...,False,https://www.oreilly.com/radar/?p=12341,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Thu, 19 Mar 2020 10:00:00 +0000","(2020, 3, 19, 10, 0, 0, 3, 79, 0)",0,O’Reilly online learning is a trove of informa...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'AI & ML', 'scheme': None, 'label': ...",6 trends framing the state of AI and ML,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/6-trends-framing...


### 10. Count the number of entries per author and sort them in descending order.

In [13]:

df['author'].value_counts()

Nat Torkington                     37
Jenn Webb                           4
Roger Magoulas and Steve Swoyer     4
Mike Loukides                       3
Rita J. King                        1
Tim O’Reilly                        1
Mark Richards                       1
Martin Fowler                       1
Mac Slocum                          1
Rachel Laycock and Neal Ford        1
Pamela Rucker                       1
Mary Poppendieck                    1
Kai Holnes                          1
George Fairbanks                    1
Cynthia Owens                       1
Hugo Bowne-Anderson                 1
Name: author, dtype: int64

### 11. Add a new column to the data frame that contains the length (number of characters) of each entry title. Return a data frame that contains the title, author, and title length of each entry in descending order (longest title length at the top).

In [14]:
df['title_length'] = df['title'].apply(len)
df[['title', 'author', 'title_length']].sort_values('title_length', ascending=False)

Unnamed: 0,title,author,title_length
32,Highlights from the O’Reilly Software Architec...,Mac Slocum,78
5,Great leaders inspire innovation and creativit...,Jenn Webb,76
43,10 ways to get untapped talent in your organiz...,Pamela Rucker,65
6,Strong leaders forge an intersection of knowle...,Jenn Webb,64
11,It’s an unprecedented crisis: 8 things to do r...,Cynthia Owens,54
3,An enterprise vision is your company’s North Star,Jenn Webb,49
4,Leaders need to mobilize change-ready workforces,Jenn Webb,48
0,The unreasonable importance of data preparation,Hugo Bowne-Anderson,47
45,5 key areas for tech leaders to watch in 2020,Roger Magoulas and Steve Swoyer,45
2,3 ways to confront modern business challenges,Rita J. King,45


### 12. Create a list of entry titles whose summary includes the phrase "machine learning."

In [15]:
ML = []
# print(df['summary'])
# summaries = [rss.summary[i].title for i in range(len(rss.entries))]
for ind in df.index:
    if 'machine learning' in df['summary'][ind]:
        ML.append(df['title'][ind])
print(ML)

['Four short links: 13 February 2020']
