# Working with RSS Feeds Lab

Complete the following set of exercises to solidify your knowledge of parsing RSS feeds and extracting information from them.

In [1]:
import feedparser

### 1. Use feedparser to parse the following RSS feed URL.

In [2]:
url = 'http://feeds.feedburner.com/oreilly/radar/atom'

In [3]:
fburner = feedparser.parse(url)
print(fburner)

{'feed': {'title': 'Radar', 'title_detail': {'type': 'text/plain', 'language': None, 'base': 'http://feeds.feedburner.com/oreilly/radar/atom', 'value': 'Radar'}, 'links': [{'rel': 'alternate', 'type': 'text/html', 'href': 'https://www.oreilly.com/radar'}, {'rel': 'self', 'type': 'application/rss+xml', 'href': 'http://feeds.feedburner.com/oreilly/radar/atom'}, {'rel': 'hub', 'href': 'http://pubsubhubbub.appspot.com/', 'type': 'text/html'}], 'link': 'https://www.oreilly.com/radar', 'subtitle': 'Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology', 'subtitle_detail': {'type': 'text/html', 'language': None, 'base': 'http://feeds.feedburner.com/oreilly/radar/atom', 'value': 'Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology'}, 'updated': 'Mon, 07 Oct 2019 16:49:55 +0000', 'updated_parsed': time.struct_time(tm_year=2019, tm_mon=10, tm_mday=7, tm_hour=16, tm_min=49, tm_sec=55, tm_wday=0, tm_yday=28

### 2. Obtain a list of components (keys) that are available for this feed.

In [4]:
fburner.keys()

dict_keys(['feed', 'entries', 'bozo', 'headers', 'etag', 'updated', 'updated_parsed', 'href', 'status', 'encoding', 'version', 'namespaces'])

### 3. Obtain a list of components (keys) that are available for the *feed* component of this RSS feed.

In [5]:
fburner.feed.keys()

dict_keys(['title', 'title_detail', 'links', 'link', 'subtitle', 'subtitle_detail', 'updated', 'updated_parsed', 'language', 'sy_updateperiod', 'sy_updatefrequency', 'generator_detail', 'generator', 'feedburner_info', 'geo_lat', 'geo_long', 'feedburner_emailserviceid', 'feedburner_feedburnerhostname'])

### 4. Extract and print the feed title, subtitle, author, and link.

In [6]:
fburner.feed.title

'Radar'

In [7]:
fburner.feed.subtitle

'Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology'

In [8]:
fburner.feed.title_detail

{'type': 'text/plain',
 'language': None,
 'base': 'http://feeds.feedburner.com/oreilly/radar/atom',
 'value': 'Radar'}

In [9]:
fburner.feed.link

'https://www.oreilly.com/radar'

### 5. Count the number of entries that are contained in this RSS feed.

In [10]:
import pandas as pd
import re 

df = pd.DataFrame(fburner.entries)
df.tail(10)
df.count()

title                  18
title_detail           18
links                  18
link                   18
comments               18
published              18
published_parsed       18
authors                18
author                 18
author_detail          18
tags                   18
id                     18
guidislink             18
summary                18
summary_detail         18
content                18
wfw_commentrss         18
slash_comments         18
feedburner_origlink    18
dtype: int64

### 6. Obtain a list of components (keys) available for an entry.

*Hint: Remember to index first before requesting the keys*

In [11]:
df.columns

Index(['title', 'title_detail', 'links', 'link', 'comments', 'published',
       'published_parsed', 'authors', 'author', 'author_detail', 'tags', 'id',
       'guidislink', 'summary', 'summary_detail', 'content', 'wfw_commentrss',
       'slash_comments', 'feedburner_origlink'],
      dtype='object')

### 7. Extract a list of entry titles.

In [12]:
lst=[fburner.entries[i].title for i in range(len(fburner.entries))]
print(lst)

['Radar trends to watch: October 2019', 'Four short links: 7 October 2019', 'Four short links: 4 October 2019', 'Four short links: 3 October 2019', 'Four short links: 2 October 2019', 'Four short links: 1 October 2019', 'TinyML: The challenges and opportunities of low-power ML applications', 'Four short links: 30 September 2019', 'Highlights from the Strata Data Conference in New York 2019', 'Four short links: 27 September 2019', 'Delivering the enterprise data cloud', 'Data Science Pioneers: Conquering the next frontier, a documentary investigating the future of data science', 'Postrevolutionary big data: Promoting the general welfare', 'Say what? The ethical challenges of designing for humanlike interaction', 'RL in real life: Bringing reinforcement learning to the enterprise', 'Strata Data Awards winners 2019', 'Staying safe in the AI era', 'Data sonification: Making music from the yield curve']


### 8. Calculate the percentage of "Four short links" entry titles.

In [13]:
f_sh=[]
for element in lst:
    if re.search(r'Four short links', element):
        f_sh.append(element)
percentage = len(f_sh)/len(lst)*100
print(int(percentage))

38


### 9. Create a Pandas data frame from the feed's entries.

In [31]:
import pandas as pd

In [32]:
df = pd.DataFrame(fburner.entries)
df.head(5)

Unnamed: 0,title,title_detail,links,link,comments,published,published_parsed,authors,author,author_detail,tags,id,guidislink,summary,summary_detail,content,wfw_commentrss,slash_comments,feedburner_origlink
0,Radar trends to watch: October 2019,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/radar-trends-to-...,"Mon, 07 Oct 2019 12:00:09 +0000","(2019, 10, 7, 12, 0, 9, 0, 280, 0)",[{'name': 'Mac Slocum'}],Mac Slocum,{'name': 'Mac Slocum'},"[{'term': 'Radar Trends', 'scheme': None, 'lab...",https://www.oreilly.com/radar/?p=9859,False,Open source and activism trends A new kind of ...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/radar-trends-to-...,0,https://www.oreilly.com/radar/radar-trends-to-...
1,Four short links: 7 October 2019,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Mon, 07 Oct 2019 04:01:40 +0000","(2019, 10, 7, 4, 1, 40, 0, 280, 0)",[{'email': 'jwebb@oreilly.com'}],jwebb@oreilly.com,{'email': 'jwebb@oreilly.com'},"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=9908,False,Addicted to Screens? That’s Really a You Probl...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,0,https://www.oreilly.com/radar/four-short-links...
2,Four short links: 4 October 2019,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Fri, 04 Oct 2019 04:01:01 +0000","(2019, 10, 4, 4, 1, 1, 4, 277, 0)",[{'email': 'jwebb@oreilly.com'}],jwebb@oreilly.com,{'email': 'jwebb@oreilly.com'},"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=9850,False,SQL Queries Don&#8217;t Start with SELECT (Jul...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,0,https://www.oreilly.com/radar/four-short-links...
3,Four short links: 3 October 2019,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Thu, 03 Oct 2019 04:01:45 +0000","(2019, 10, 3, 4, 1, 45, 3, 276, 0)",[{'email': 'jwebb@oreilly.com'}],jwebb@oreilly.com,{'email': 'jwebb@oreilly.com'},"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=9847,False,Why Do Companies With Huge Resources Still Hav...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,0,https://www.oreilly.com/radar/four-short-links...
4,Four short links: 2 October 2019,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Wed, 02 Oct 2019 04:01:09 +0000","(2019, 10, 2, 4, 1, 9, 2, 275, 0)",[{'email': 'jwebb@oreilly.com'}],jwebb@oreilly.com,{'email': 'jwebb@oreilly.com'},"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=9832,False,Data Fallacies to Avoid &#8212; nifty infograp...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,0,https://www.oreilly.com/radar/four-short-links...


### 10. Count the number of entries per author and sort them in descending order.

In [33]:
authors = df.groupby('author', as_index=False).agg({'title':'count'})
authors.sort_values(by=['title'], ascending= False)

Unnamed: 0,author,title
0,Mac Slocum,13
1,jwebb@oreilly.com,5


### 11. Add a new column to the data frame that contains the length (number of characters) of each entry title. Return a data frame that contains the title, author, and title length of each entry in descending order (longest title length at the top).

In [38]:
count_tit=[]
for element in df['title']:
    x=(len(element))
    count_tit.append(x)

df['Title lenght']=count_tit
df_count= df[['title','author','Title lenght']]
df_count.sort_values(by=['Title lenght'],ascending=False)

Unnamed: 0,title,author,Title lenght
11,Data Science Pioneers: Conquering the next fro...,Mac Slocum,107
13,Say what? The ethical challenges of designing ...,Mac Slocum,71
6,TinyML: The challenges and opportunities of lo...,Mac Slocum,69
14,RL in real life: Bringing reinforcement learni...,Mac Slocum,66
8,Highlights from the Strata Data Conference in ...,Mac Slocum,59
12,Postrevolutionary big data: Promoting the gene...,Mac Slocum,57
17,Data sonification: Making music from the yield...,Mac Slocum,52
10,Delivering the enterprise data cloud,Mac Slocum,36
0,Radar trends to watch: October 2019,Mac Slocum,35
7,Four short links: 30 September 2019,Mac Slocum,35


### 12. Create a list of entry titles whose summary includes the phrase "machine learning."

In [43]:
mac=[]
for element in df['summary']:
    if re.search(r'machine learning', element):
        mac.append(element)
print(mac)

['Pete Warden has an ambitious goal: he wants to build machine learning (ML) applications that can run on a microcontroller for a year using only a hearing aid battery for power. This goal means that the system&#8217;s power consumption has to be under a milliwatt, ideally a few tens of microwatts. This power level places [&#8230;]']
