# Working with RSS Feeds Lab

Complete the following set of exercises to solidify your knowledge of parsing RSS feeds and extracting information from them.

In [2]:
import feedparser

### 1. Use feedparser to parse the following RSS feed URL.

In [3]:
url = 'http://feeds.feedburner.com/oreilly/radar/atom'

In [19]:
rss = feedparser.parse(url)

### 2. Obtain a list of components (keys) that are available for this feed.

In [20]:
rss.keys()

dict_keys(['feed', 'entries', 'bozo', 'headers', 'etag', 'updated', 'updated_parsed', 'href', 'status', 'encoding', 'version', 'namespaces'])

### 3. Obtain a list of components (keys) that are available for the *feed* component of this RSS feed.

In [21]:
rss.feed.keys()

dict_keys(['title', 'title_detail', 'links', 'link', 'subtitle', 'subtitle_detail', 'updated', 'updated_parsed', 'language', 'sy_updateperiod', 'sy_updatefrequency', 'generator_detail', 'generator', 'feedburner_info', 'geo_lat', 'geo_long', 'feedburner_emailserviceid', 'feedburner_feedburnerhostname'])

### 4. Extract and print the feed title, subtitle, author, and link.

In [22]:
print(rss.feed.title)
print(rss.feed.subtitle)
# feeds.feed.author (DOES NOT EXIST)
print(rss.feed.link)

Radar
Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology
https://www.oreilly.com/radar


### 5. Count the number of entries that are contained in this RSS feed.

In [28]:
len(rss.entries)

18

### 6. Obtain a list of components (keys) available for an entry.

*Hint: Remember to index first before requesting the keys*

In [24]:
rss.entries[0].keys()

dict_keys(['title', 'title_detail', 'links', 'link', 'comments', 'published', 'published_parsed', 'authors', 'author', 'author_detail', 'tags', 'id', 'guidislink', 'summary', 'summary_detail', 'content', 'wfw_commentrss', 'slash_comments', 'feedburner_origlink'])

### 7. Extract a list of entry titles.

In [27]:
# list_of_titles = (rss.feed.title)
# list_of_titles

titles = [rss.entries[i].title for i in range(len(rss.entries))]
print(titles)
# feeds.entries[].title


['Four short links: 4 October 2019', 'Four short links: 3 October 2019', 'Four short links: 2 October 2019', 'Four short links: 1 October 2019', 'TinyML: The challenges and opportunities of low-power ML applications', 'Four short links: 30 September 2019', 'Highlights from the Strata Data Conference in New York 2019', 'Four short links: 27 September 2019', 'Delivering the enterprise data cloud', 'Data Science Pioneers: Conquering the next frontier, a documentary investigating the future of data science', 'Postrevolutionary big data: Promoting the general welfare', 'Say what? The ethical challenges of designing for humanlike interaction', 'RL in real life: Bringing reinforcement learning to the enterprise', 'Strata Data Awards winners 2019', 'Staying safe in the AI era', 'Unlocking the value of your data', 'Data sonification: Making music from the yield curve', 'Four Short Links: 26 September 2019']


### 8. Calculate the percentage of "Four short links" entry titles.

In [34]:
count = 0
for x in titles:
    if 'Four short links' in x:
        count += 1

print(count/len(titles)*100)

33.33333333333333


### 9. Create a Pandas data frame from the feed's entries.

In [16]:
import pandas as pd

In [26]:
df = pd.DataFrame(rss.entries)
df

Unnamed: 0,author,author_detail,authors,comments,content,feedburner_origlink,guidislink,id,link,links,published,published_parsed,slash_comments,summary,summary_detail,tags,title,title_detail,wfw_commentrss
0,jwebb@oreilly.com,{'email': 'jwebb@oreilly.com'},[{'email': 'jwebb@oreilly.com'}],https://www.oreilly.com/radar/four-short-links...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,False,https://www.oreilly.com/radar/?p=9850,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Fri, 04 Oct 2019 04:01:01 +0000","(2019, 10, 4, 4, 1, 1, 4, 277, 0)",0,SQL Queries Don&#8217;t Start with SELECT (Jul...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Four Short Links', 'scheme': None, ...",Four short links: 4 October 2019,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...
1,jwebb@oreilly.com,{'email': 'jwebb@oreilly.com'},[{'email': 'jwebb@oreilly.com'}],https://www.oreilly.com/radar/four-short-links...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,False,https://www.oreilly.com/radar/?p=9847,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Thu, 03 Oct 2019 04:01:45 +0000","(2019, 10, 3, 4, 1, 45, 3, 276, 0)",0,Why Do Companies With Huge Resources Still Hav...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Four Short Links', 'scheme': None, ...",Four short links: 3 October 2019,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...
2,jwebb@oreilly.com,{'email': 'jwebb@oreilly.com'},[{'email': 'jwebb@oreilly.com'}],https://www.oreilly.com/radar/four-short-links...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,False,https://www.oreilly.com/radar/?p=9832,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Wed, 02 Oct 2019 04:01:09 +0000","(2019, 10, 2, 4, 1, 9, 2, 275, 0)",0,Data Fallacies to Avoid &#8212; nifty infograp...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Four Short Links', 'scheme': None, ...",Four short links: 2 October 2019,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...
3,jwebb@oreilly.com,{'email': 'jwebb@oreilly.com'},[{'email': 'jwebb@oreilly.com'}],https://www.oreilly.com/radar/four-short-links...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,False,https://www.oreilly.com/radar/?p=9792,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Tue, 01 Oct 2019 04:05:11 +0000","(2019, 10, 1, 4, 5, 11, 1, 274, 0)",0,Just Enough Research &#8212; a book that comes...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Four Short Links', 'scheme': None, ...",Four short links: 1 October 2019,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...
4,Mac Slocum,{'name': 'Mac Slocum'},[{'name': 'Mac Slocum'}],https://www.oreilly.com/radar/tinyml-the-chall...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/tinyml-the-chall...,False,https://www.oreilly.com/radar/?p=9378,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Tue, 01 Oct 2019 04:01:53 +0000","(2019, 10, 1, 4, 1, 53, 1, 274, 0)",0,Pete Warden has an ambitious goal: he wants to...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': '~home', 'scheme': None, 'label': No...",TinyML: The challenges and opportunities of lo...,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/tinyml-the-chall...
5,Mac Slocum,{'name': 'Mac Slocum'},[{'name': 'Mac Slocum'}],https://www.oreilly.com/radar/four-short-links...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,False,https://www.oreilly.com/radar/?p=9768,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Mon, 30 Sep 2019 04:01:40 +0000","(2019, 9, 30, 4, 1, 40, 0, 273, 0)",0,Stamos on CLOUD Act — cogent and informative s...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Four Short Links', 'scheme': None, ...",Four short links: 30 September 2019,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...
6,Mac Slocum,{'name': 'Mac Slocum'},[{'name': 'Mac Slocum'}],https://www.oreilly.com/radar/highlights-from-...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/highlights-from-...,False,https://www.oreilly.com/radar/?p=9487,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Fri, 27 Sep 2019 12:00:52 +0000","(2019, 9, 27, 12, 0, 52, 4, 270, 0)",0,People from across the data world came togethe...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'AI & ML', 'scheme': None, 'label': ...",Highlights from the Strata Data Conference in ...,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/highlights-from-...
7,Mac Slocum,{'name': 'Mac Slocum'},[{'name': 'Mac Slocum'}],https://www.oreilly.com/radar/four-short-links...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,False,https://www.oreilly.com/radar/?p=9715,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Fri, 27 Sep 2019 04:01:35 +0000","(2019, 9, 27, 4, 1, 35, 4, 270, 0)",0,"Intro to Creative Coding — this is the repo, a...","{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Four Short Links', 'scheme': None, ...",Four short links: 27 September 2019,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...
8,Mac Slocum,{'name': 'Mac Slocum'},[{'name': 'Mac Slocum'}],https://www.oreilly.com/radar/delivering-the-e...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/delivering-the-e...,False,https://www.oreilly.com/radar/?p=9452,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Fri, 27 Sep 2019 00:00:48 +0000","(2019, 9, 27, 0, 0, 48, 4, 270, 0)",0,This is a keynote highlight from the Strata Da...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'AI & ML', 'scheme': None, 'label': ...",Delivering the enterprise data cloud,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/delivering-the-e...
9,Mac Slocum,{'name': 'Mac Slocum'},[{'name': 'Mac Slocum'}],https://www.oreilly.com/radar/data-science-pio...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/data-science-pio...,False,https://www.oreilly.com/radar/?p=9479,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Fri, 27 Sep 2019 00:00:43 +0000","(2019, 9, 27, 0, 0, 43, 4, 270, 0)",0,This is a keynote from the Strata Data Confere...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'AI & ML', 'scheme': None, 'label': ...",Data Science Pioneers: Conquering the next fro...,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/data-science-pio...


### 10. Count the number of entries per author and sort them in descending order.

In [36]:
# authors = df.groupby('author', as_index=False).agg({'title':'count'})
# authors.columns = ['author', 'entries']
# authors.sort_values('entries', ascending=True)

df['author'].value_counts()

Mac Slocum           14
jwebb@oreilly.com     4
Name: author, dtype: int64

### 11. Add a new column to the data frame that contains the length (number of characters) of each entry title. Return a data frame that contains the title, author, and title length of each entry in descending order (longest title length at the top).

In [32]:
df['title_length'] = df['title'].apply(len)
df[['title', 'author', 'title_length']].sort_values('title_length', ascending=False)

Unnamed: 0,title,author,title_length
9,Data Science Pioneers: Conquering the next fro...,Mac Slocum,107
11,Say what? The ethical challenges of designing ...,Mac Slocum,71
4,TinyML: The challenges and opportunities of lo...,Mac Slocum,69
12,RL in real life: Bringing reinforcement learni...,Mac Slocum,66
6,Highlights from the Strata Data Conference in ...,Mac Slocum,59
10,Postrevolutionary big data: Promoting the gene...,Mac Slocum,57
16,Data sonification: Making music from the yield...,Mac Slocum,52
8,Delivering the enterprise data cloud,Mac Slocum,36
17,Four Short Links: 26 September 2019,Mac Slocum,35
7,Four short links: 27 September 2019,Mac Slocum,35


### 12. Create a list of entry titles whose summary includes the phrase "machine learning."

In [48]:
ML = []
# print(df['summary'])
# summaries = [rss.summary[i].title for i in range(len(rss.entries))]
for ind in df.index:
    if 'machine learning' in df['summary'][ind]:
        ML.append(df['title'][ind])
print(ML)

['TinyML: The challenges and opportunities of low-power ML applications']
