# Working with RSS Feeds Lab

Complete the following set of exercises to solidify your knowledge of parsing RSS feeds and extracting information from them.

In [1]:
import feedparser

### 1. Use feedparser to parse the following RSS feed URL.

In [2]:
url = 'http://feeds.feedburner.com/oreilly/radar/atom'

In [3]:
feedburner = feedparser.parse(url)

### 2. Obtain a list of components (keys) that are available for this feed.

In [4]:
feedburner.keys()

dict_keys(['feed', 'entries', 'bozo', 'headers', 'etag', 'updated', 'updated_parsed', 'href', 'status', 'encoding', 'version', 'namespaces'])

### 3. Obtain a list of components (keys) that are available for the *feed* component of this RSS feed.

In [5]:
feedburner.feed.keys()

dict_keys(['title', 'title_detail', 'links', 'link', 'subtitle', 'subtitle_detail', 'updated', 'updated_parsed', 'language', 'sy_updateperiod', 'sy_updatefrequency', 'generator_detail', 'generator', 'feedburner_info', 'geo_lat', 'geo_long', 'feedburner_emailserviceid', 'feedburner_feedburnerhostname'])

### 4. Extract and print the feed title, subtitle, author, and link.

In [6]:
title = feedburner.feed.title
subtitle = feedburner.feed.subtitle
#author = feedburner.feed.author
link = feedburner.feed.link
print(f"TITLE: {title}, SUBTITLE: {subtitle}, LINK: {link}")

TITLE: Radar, SUBTITLE: Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology, LINK: https://www.oreilly.com/radar


### 5. Count the number of entries that are contained in this RSS feed.

In [7]:
len(feedburner.entries)

60

### 6. Obtain a list of components (keys) available for an entry.

*Hint: Remember to index first before requesting the keys*

In [8]:
feedburner.entries[0].keys()

dict_keys(['title', 'title_detail', 'links', 'link', 'comments', 'published', 'published_parsed', 'authors', 'author', 'author_detail', 'tags', 'id', 'guidislink', 'summary', 'summary_detail', 'content', 'wfw_commentrss', 'slash_comments', 'feedburner_origlink'])

### 7. Extract a list of entry titles.

In [9]:
titles = [entry.title for entry in feedburner.entries]
titles

['Four short links: 24 July 2020',
 'Four short links: 26 July 2020',
 'Four short links: 22 July 2020',
 'AI, Protests, and Justice',
 'Four short links: 21 July 2020',
 'Four short links: 20 July 2020',
 'Four short links: 17 July 2020',
 'Four short links: 16 July 2020',
 'Microservices Adoption in 2020',
 'Four short links: 15 July 2020',
 'Society-Centered Design',
 'Four short links: 14 July 2020',
 'Four short links: 13 July 2020',
 'Four short links: 10 July 2020',
 'Automated Coding and the Future of Programming',
 'Four short links: 9 July 2020',
 'Four short links: 8 July 2020',
 'Four short links: 7 July 2020',
 'Four short links: 6 July 2020',
 'Four short links: 3 July 2020',
 'Four short links: 2 July 2020',
 'Radar trends to watch: July 2020',
 'Four short links: 1 July 2020',
 'Four short links: 30 June 2020',
 'Four short links: 29 June 2020',
 'Four short links: 28 June 2020',
 'Four short links: 25 June 2020',
 'COVID-19 and Complex Systems',
 'Four short links: 24 

### 8. Calculate the percentage of "Four short links" entry titles.

In [10]:
len([title for title in titles if title[:16] == "Four short links"])/len(titles)*100

80.0

### 9. Create a Pandas data frame from the feed's entries.

In [11]:
import pandas as pd

In [12]:
df = pd.DataFrame(feedburner.entries)
df

Unnamed: 0,title,title_detail,links,link,comments,published,published_parsed,authors,author,author_detail,tags,id,guidislink,summary,summary_detail,content,wfw_commentrss,slash_comments,feedburner_origlink
0,Four short links: 24 July 2020,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Fri, 24 Jul 2020 11:30:50 +0000","(2020, 7, 24, 11, 30, 50, 4, 206, 0)",[{'name': 'Nat Torkington'}],Nat Torkington,{'name': 'Nat Torkington'},"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=13166,False,How Taiwan&#8217;s Unlikely Digital Minister H...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,0,https://www.oreilly.com/radar/four-short-links...
1,Four short links: 26 July 2020,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Thu, 23 Jul 2020 11:18:39 +0000","(2020, 7, 23, 11, 18, 39, 3, 205, 0)",[{'name': 'Nat Torkington'}],Nat Torkington,{'name': 'Nat Torkington'},"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=13163,False,As We May Code &#8212; why aren’t we — ostensi...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,0,https://www.oreilly.com/radar/four-short-links...
2,Four short links: 22 July 2020,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Wed, 22 Jul 2020 11:29:30 +0000","(2020, 7, 22, 11, 29, 30, 2, 204, 0)",[{'name': 'Nat Torkington'}],Nat Torkington,{'name': 'Nat Torkington'},"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=13160,False,wiki.js &#8212; Portable open-source Javascrip...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,0,https://www.oreilly.com/radar/four-short-links...
3,"AI, Protests, and Justice","{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/ai-protests-and-...,"Tue, 21 Jul 2020 11:42:29 +0000","(2020, 7, 21, 11, 42, 29, 1, 203, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,{'name': 'Mike Loukides'},"[{'term': 'AI & ML', 'scheme': None, 'label': ...",https://www.oreilly.com/radar/?p=13156,False,Largely on the impetus of the Black Lives Matt...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/ai-protests-and-...,0,https://www.oreilly.com/radar/ai-protests-and-...
4,Four short links: 21 July 2020,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Tue, 21 Jul 2020 11:20:35 +0000","(2020, 7, 21, 11, 20, 35, 1, 203, 0)",[{'name': 'Nat Torkington'}],Nat Torkington,{'name': 'Nat Torkington'},"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=13153,False,22 Principles for Great Product Managers &#821...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,0,https://www.oreilly.com/radar/four-short-links...
5,Four short links: 20 July 2020,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Mon, 20 Jul 2020 11:14:44 +0000","(2020, 7, 20, 11, 14, 44, 0, 202, 0)",[{'name': 'Nat Torkington'}],Nat Torkington,{'name': 'Nat Torkington'},"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=13150,False,A Bug-Sized Camera for Bug-Sized Robots and Bu...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,0,https://www.oreilly.com/radar/four-short-links...
6,Four short links: 17 July 2020,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Fri, 17 Jul 2020 10:49:53 +0000","(2020, 7, 17, 10, 49, 53, 4, 199, 0)",[{'name': 'Nat Torkington'}],Nat Torkington,{'name': 'Nat Torkington'},"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=13147,False,"Matters Computational: Ideas, Algorithms, Sour...","{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,0,https://www.oreilly.com/radar/four-short-links...
7,Four short links: 16 July 2020,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Thu, 16 Jul 2020 11:18:37 +0000","(2020, 7, 16, 11, 18, 37, 3, 198, 0)",[{'name': 'Nat Torkington'}],Nat Torkington,{'name': 'Nat Torkington'},"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=13144,False,Pseudogen &#8212; [open source] tool to automa...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,0,https://www.oreilly.com/radar/four-short-links...
8,Microservices Adoption in 2020,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/microservices-ad...,"Wed, 15 Jul 2020 13:33:44 +0000","(2020, 7, 15, 13, 33, 44, 2, 197, 0)",[{'name': 'Mike Loukides and Steve Swoyer'}],Mike Loukides and Steve Swoyer,{'name': 'Mike Loukides and Steve Swoyer'},"[{'term': 'Next Architecture', 'scheme': None,...",https://www.oreilly.com/radar/?p=13077,False,Microservices&#160;seem to be everywhere. Scra...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/microservices-ad...,0,https://www.oreilly.com/radar/microservices-ad...
9,Four short links: 15 July 2020,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Wed, 15 Jul 2020 11:19:45 +0000","(2020, 7, 15, 11, 19, 45, 2, 197, 0)",[{'name': 'Nat Torkington'}],Nat Torkington,{'name': 'Nat Torkington'},"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=13140,False,Fixing Bugs Properly &#8212; Why did a fix tha...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,0,https://www.oreilly.com/radar/four-short-links...


### 10. Count the number of entries per author and sort them in descending order.

In [13]:
df.groupby("author").count()[["title"]].sort_values('title', ascending=False)

Unnamed: 0_level_0,title
author,Unnamed: 1_level_1
Nat Torkington,48
Mike Loukides,6
"Adam Jacob, Nat Torkington and Mike Loukides",1
David G. Robinson,1
Hugo Bowne-Anderson,1
Mike Loukides and Steve Swoyer,1
Roger Magoulas and Steve Swoyer,1
Sarah Gold,1


### 11. Add a new column to the data frame that contains the length (number of characters) of each entry title. Return a data frame that contains the title, author, and title length of each entry in descending order (longest title length at the top).

In [14]:
df['title_length'] = df.title.str.len()
df_new = df

df_new[["title","author","title_length"]].head(10)


Unnamed: 0,title,author,title_length
0,Four short links: 24 July 2020,Nat Torkington,30
1,Four short links: 26 July 2020,Nat Torkington,30
2,Four short links: 22 July 2020,Nat Torkington,30
3,"AI, Protests, and Justice",Mike Loukides,25
4,Four short links: 21 July 2020,Nat Torkington,30
5,Four short links: 20 July 2020,Nat Torkington,30
6,Four short links: 17 July 2020,Nat Torkington,30
7,Four short links: 16 July 2020,Nat Torkington,30
8,Microservices Adoption in 2020,Mike Loukides and Steve Swoyer,30
9,Four short links: 15 July 2020,Nat Torkington,30


### 12. Create a list of entry titles whose summary includes the phrase "machine learning."

In [15]:
#df_new["machine learning" in df_new["summary"].str ]
df_new[df_new["summary"].str.contains("machine learning")]

Unnamed: 0,title,title_detail,links,link,comments,published,published_parsed,authors,author,author_detail,tags,id,guidislink,summary,summary_detail,content,wfw_commentrss,slash_comments,feedburner_origlink,title_length
16,Four short links: 8 July 2020,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Wed, 08 Jul 2020 12:49:23 +0000","(2020, 7, 8, 12, 49, 23, 2, 190, 0)",[{'name': 'Nat Torkington'}],Nat Torkington,{'name': 'Nat Torkington'},"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=13100,False,When Data is Messy &#8212; I love stories that...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,0,https://www.oreilly.com/radar/four-short-links...,29
39,Machine Learning and the Production Gap,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/machine-learning...,"Tue, 09 Jun 2020 11:31:48 +0000","(2020, 6, 9, 11, 31, 48, 1, 161, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,{'name': 'Mike Loukides'},"[{'term': 'AI & ML', 'scheme': None, 'label': ...",https://www.oreilly.com/radar/?p=12965,False,The biggest problem facing machine learning to...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/machine-learning...,0,https://www.oreilly.com/radar/machine-learning...,39


In [16]:
df[df["title_length"]==25]

Unnamed: 0,title,title_detail,links,link,comments,published,published_parsed,authors,author,author_detail,tags,id,guidislink,summary,summary_detail,content,wfw_commentrss,slash_comments,feedburner_origlink,title_length
3,"AI, Protests, and Justice","{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/ai-protests-and-...,"Tue, 21 Jul 2020 11:42:29 +0000","(2020, 7, 21, 11, 42, 29, 1, 203, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,{'name': 'Mike Loukides'},"[{'term': 'AI & ML', 'scheme': None, 'label': ...",https://www.oreilly.com/radar/?p=13156,False,Largely on the impetus of the Black Lives Matt...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/ai-protests-and-...,0,https://www.oreilly.com/radar/ai-protests-and-...,25
