# Working with RSS Feeds Lab

Complete the following set of exercises to solidify your knowledge of parsing RSS feeds and extracting information from them.

In [32]:
import feedparser as fp
import pandas as pd


### 1. Use feedparser to parse the following RSS feed URL.

In [8]:
url = 'http://feeds.feedburner.com/oreilly/radar/atom'

In [9]:
oreily = fp.parse(url)
oreily

{'feed': {'title': 'Radar',
  'title_detail': {'type': 'text/plain',
   'language': None,
   'base': 'http://feeds.feedburner.com/oreilly/radar/atom',
   'value': 'Radar'},
  'links': [{'rel': 'alternate',
    'type': 'text/html',
    'href': 'https://www.oreilly.com/radar'},
   {'rel': 'self',
    'type': 'application/rss+xml',
    'href': 'http://feeds.feedburner.com/oreilly/radar/atom'},
   {'rel': 'hub',
    'href': 'http://pubsubhubbub.appspot.com/',
    'type': 'text/html'}],
  'link': 'https://www.oreilly.com/radar',
  'subtitle': 'Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology',
  'subtitle_detail': {'type': 'text/html',
   'language': None,
   'base': 'http://feeds.feedburner.com/oreilly/radar/atom',
   'value': 'Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology'},
  'updated': 'Wed, 18 Sep 2019 11:48:50 +0000',
  'updated_parsed': time.struct_time(tm_year=2019, tm_mon=9, tm_md

### 2. Obtain a list of components (keys) that are available for this feed.

In [10]:
oreily.keys()

dict_keys(['feed', 'entries', 'bozo', 'headers', 'etag', 'updated', 'updated_parsed', 'href', 'status', 'encoding', 'version', 'namespaces'])

### 3. Obtain a list of components (keys) that are available for the *feed* component of this RSS feed.

In [11]:
oreily.feed.keys()

dict_keys(['title', 'title_detail', 'links', 'link', 'subtitle', 'subtitle_detail', 'updated', 'updated_parsed', 'language', 'sy_updateperiod', 'sy_updatefrequency', 'generator_detail', 'generator', 'feedburner_info', 'geo_lat', 'geo_long', 'feedburner_emailserviceid', 'feedburner_feedburnerhostname'])

### 4. Extract and print the feed title, subtitle, author, and link.

In [29]:
print(oreily.feed.title)
print(oreily.feed.subtitle)
print(oreily.entries[10].author)
print(oreily.feed.link)

Radar
Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology
Mac Slocum
https://www.oreilly.com/radar


### 5. Count the number of entries that are contained in this RSS feed.

In [39]:
df = pd.DataFrame(oreily.entries)
df.count()
authors = df.groupby('author', as_index=False).agg({'title':'count'})
authors.columns = ['author', 'entries']
authors.sort_values('entries', ascending=False)

Unnamed: 0,author,entries
0,Mac Slocum,18


### 6. Obtain a list of components (keys) available for an entry.

*Hint: Remember to index first before requesting the keys*

In [79]:
oreily.entries[0].keys()
# for names in oreily.keys():
#     print(oreily.names.keys())

dict_keys(['title', 'title_detail', 'links', 'link', 'comments', 'published', 'published_parsed', 'authors', 'author', 'author_detail', 'tags', 'id', 'guidislink', 'summary', 'summary_detail', 'content', 'wfw_commentrss', 'slash_comments', 'feedburner_origlink'])

### 7. Extract a list of entry titles.

In [94]:
titles = [oreily.entries[i].title for i in range(len(oreily.entries))]
titles

['Four short links: 18 September 2019',
 'Four short links: 17 September 2019',
 'Four short links: 16 September 2019',
 'Radar trends to watch: September 2019',
 'Four short links: 13 September 2019',
 'Safe and smarter driving, powered by AI',
 'On gradient-based methods for finding game-theoretic equilibria',
 'Accelerate with purpose',
 'Practical insights into deep reinforcement learning',
 'Open-endedness: A new grand challenge for AI',
 'Four short links: 12 September 2019',
 'AI for ophthalmology: Doing what doctors can’t',
 'Enabling AI’s potential through wafer-scale integration',
 'Getting from A to AI',
 'Highlights from the O’Reilly Artificial Intelligence Conference in San Jose 2019',
 'Going beyond fully supervised learning',
 'Developing AI responsibly',
 'Unlocking the value of your data']

### 8. Calculate the percentage of "Four short links" entry titles.

In [122]:
import re
pattern = 'Four short links'

string_titles = ' '.join(titles)

x = re.findall(pattern,string_titles)

j = 0
for i in range(len(x)):
    j+=1

print(str((j/len(titles))*100) + "%")

27.77777777777778%


### 9. Create a Pandas data frame from the feed's entries.

In [None]:
import pandas as pd

In [125]:
df = pd.DataFrame(oreily.entries)
df

Unnamed: 0,author,author_detail,authors,comments,content,feedburner_origlink,guidislink,id,link,links,published,published_parsed,slash_comments,summary,summary_detail,tags,title,title_detail,wfw_commentrss
0,Mac Slocum,{'name': 'Mac Slocum'},[{'name': 'Mac Slocum'}],https://www.oreilly.com/radar/four-short-links...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,False,https://www.oreilly.com/radar/?p=9372,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Wed, 18 Sep 2019 04:01:52 +0000","(2019, 9, 18, 4, 1, 52, 2, 261, 0)",0,Extracting Insights from the Shape of Complex ...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Four Short Links', 'scheme': None, ...",Four short links: 18 September 2019,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...
1,Mac Slocum,{'name': 'Mac Slocum'},[{'name': 'Mac Slocum'}],https://www.oreilly.com/radar/four-short-links...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,False,https://www.oreilly.com/radar/?p=9350,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Tue, 17 Sep 2019 04:01:36 +0000","(2019, 9, 17, 4, 1, 36, 1, 260, 0)",0,Mirroring to Build Trust in Digital Assistants...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Four Short Links', 'scheme': None, ...",Four short links: 17 September 2019,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...
2,Mac Slocum,{'name': 'Mac Slocum'},[{'name': 'Mac Slocum'}],https://www.oreilly.com/radar/four-short-links...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,False,https://www.oreilly.com/radar/?p=9320,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Mon, 16 Sep 2019 04:01:02 +0000","(2019, 9, 16, 4, 1, 2, 0, 259, 0)",0,Challenges in the Decentralised Web: The Masto...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Four Short Links', 'scheme': None, ...",Four short links: 16 September 2019,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...
3,Mac Slocum,{'name': 'Mac Slocum'},[{'name': 'Mac Slocum'}],https://www.oreilly.com/radar/radar-trends-to-...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/radar-trends-to-...,False,https://www.oreilly.com/radar/?p=9078,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Fri, 13 Sep 2019 04:02:19 +0000","(2019, 9, 13, 4, 2, 19, 4, 256, 0)",0,Radar trends began as an internal resource for...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Radar Trends', 'scheme': None, 'lab...",Radar trends to watch: September 2019,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/radar-trends-to-...
4,Mac Slocum,{'name': 'Mac Slocum'},[{'name': 'Mac Slocum'}],https://www.oreilly.com/radar/four-short-links...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,False,https://www.oreilly.com/radar/?p=9304,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Fri, 13 Sep 2019 04:00:56 +0000","(2019, 9, 13, 4, 0, 56, 4, 256, 0)",0,Universal Adversarial Triggers for Attacking a...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'Four Short Links', 'scheme': None, ...",Four short links: 13 September 2019,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...
5,Mac Slocum,{'name': 'Mac Slocum'},[{'name': 'Mac Slocum'}],https://www.oreilly.com/radar/safe-and-smarter...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/safe-and-smarter...,False,https://www.oreilly.com/radar/?p=9335,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Fri, 13 Sep 2019 01:00:27 +0000","(2019, 9, 13, 1, 0, 27, 4, 256, 0)",0,This is a keynote from the O&#8217;Reilly Arti...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'AI & ML', 'scheme': None, 'label': ...","Safe and smarter driving, powered by AI","{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/safe-and-smarter...
6,Mac Slocum,{'name': 'Mac Slocum'},[{'name': 'Mac Slocum'}],https://www.oreilly.com/radar/on-gradient-base...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/on-gradient-base...,False,https://www.oreilly.com/radar/?p=9148,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Fri, 13 Sep 2019 00:00:58 +0000","(2019, 9, 13, 0, 0, 58, 4, 256, 0)",0,This is a keynote highlight from the O&#8217;R...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'AI & ML', 'scheme': None, 'label': ...",On gradient-based methods for finding game-the...,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/on-gradient-base...
7,Mac Slocum,{'name': 'Mac Slocum'},[{'name': 'Mac Slocum'}],https://www.oreilly.com/radar/accelerate-with-...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/accelerate-with-...,False,https://www.oreilly.com/radar/?p=9154,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Fri, 13 Sep 2019 00:00:45 +0000","(2019, 9, 13, 0, 0, 45, 4, 256, 0)",0,This is a keynote highlight from the O&#8217;R...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'AI & ML', 'scheme': None, 'label': ...",Accelerate with purpose,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/accelerate-with-...
8,Mac Slocum,{'name': 'Mac Slocum'},[{'name': 'Mac Slocum'}],https://www.oreilly.com/radar/practical-insigh...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/practical-insigh...,False,https://www.oreilly.com/radar/?p=9162,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Fri, 13 Sep 2019 00:00:42 +0000","(2019, 9, 13, 0, 0, 42, 4, 256, 0)",0,This is a keynote highlight from the O&#8217;R...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'AI & ML', 'scheme': None, 'label': ...",Practical insights into deep reinforcement lea...,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/practical-insigh...
9,Mac Slocum,{'name': 'Mac Slocum'},[{'name': 'Mac Slocum'}],https://www.oreilly.com/radar/open-endedness-a...,"[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/open-endedness-a...,False,https://www.oreilly.com/radar/?p=9168,http://feedproxy.google.com/~r/oreilly/radar/a...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...","Fri, 13 Sep 2019 00:00:22 +0000","(2019, 9, 13, 0, 0, 22, 4, 256, 0)",0,This is a keynote highlight from the O&#8217;R...,"{'type': 'text/html', 'language': None, 'base'...","[{'term': 'AI & ML', 'scheme': None, 'label': ...",Open-endedness: A new grand challenge for AI,"{'type': 'text/plain', 'language': None, 'base...",https://www.oreilly.com/radar/open-endedness-a...


### 10. Count the number of entries per author and sort them in descending order.

In [126]:
authors = df.groupby('author', as_index=False).agg({'title':'count'})
authors.columns = ['author', 'entries']
authors.sort_values('entries', ascending=False)

Unnamed: 0,author,entries
0,Mac Slocum,18


### 11. Add a new column to the data frame that contains the length (number of characters) of each entry title. Return a data frame that contains the title, author, and title length of each entry in descending order (longest title length at the top).

In [None]:
#do this after pandas lab


### 12. Create a list of entry titles whose summary includes the phrase "machine learning."

In [149]:
x = []
for i in range(len(oreily.entries)):
    x.append(oreily.entries[i].summary)


18