# Working with RSS Feeds Lab

Complete the following set of exercises to solidify your knowledge of parsing RSS feeds and extracting information from them.

In [1]:
import feedparser

### 1. Use feedparser to parse the following RSS feed URL.

In [2]:
url = 'http://feeds.feedburner.com/oreilly/radar/atom'

In [3]:
rss = feedparser.parse(url)
print(rss)



### 2. Obtain a list of components (keys) that are available for this feed.

In [34]:
print([x for x in rss])

['feed', 'entries', 'bozo', 'headers', 'etag', 'updated', 'updated_parsed', 'href', 'status', 'encoding', 'version', 'namespaces']


### 3. Obtain a list of components (keys) that are available for the *feed* component of this RSS feed.

In [35]:
print([x for x in rss['feed']])

['title', 'title_detail', 'links', 'link', 'subtitle', 'subtitle_detail', 'updated', 'updated_parsed', 'language', 'sy_updateperiod', 'sy_updatefrequency', 'generator_detail', 'generator', 'feedburner_info', 'geo_lat', 'geo_long', 'feedburner_emailserviceid', 'feedburner_feedburnerhostname']


### 4. Extract and print the feed title, subtitle, author, and link.

In [41]:
print(f"Feeds Title : {rss['feed']['title']}")
print(f"Feeds subitle : {rss['feed']['subtitle']}")
print(f"Feeds Link : {rss['feed']['link']}")

Feeds Title : Radar
Feeds subitle : Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology
Feeds Link : https://www.oreilly.com/radar


### 5. Count the number of entries that are contained in this RSS feed.

In [42]:
print(len([x for x in rss['feed']]))

18


### 6. Obtain a list of components (keys) available for an entry.

*Hint: Remember to index first before requesting the keys*

In [44]:
print([x for x in rss['entries'][0]])

['title', 'title_detail', 'links', 'link', 'comments', 'published', 'published_parsed', 'authors', 'author', 'author_detail', 'tags', 'id', 'guidislink', 'summary', 'summary_detail', 'content', 'wfw_commentrss', 'slash_comments', 'feedburner_origlink']


### 7. Extract a list of entry titles.

In [48]:
print([x['title'] for x in rss['entries']])

['Four short links: 5 August 2020', 'Radar trends to watch: August 2020', 'Four short links: 31 July 2020', 'Four short links: 30 July 2020', 'Four short links: 29 July 2020', 'Bringing an AI Product to Market', 'Power, Harms, and Data', 'Four short links: 27 July 2020', 'Four short links: 24 July 2020', 'Four short links: 26 July 2020', 'Four short links: 22 July 2020', 'AI, Protests, and Justice', 'Four short links: 21 July 2020', 'Four short links: 20 July 2020', 'Four short links: 17 July 2020', 'Four short links: 16 July 2020', 'Microservices Adoption in 2020', 'Four short links: 15 July 2020', 'Society-Centered Design', 'Four short links: 14 July 2020', 'Four short links: 13 July 2020', 'Four short links: 10 July 2020', 'Automated Coding and the Future of Programming', 'Four short links: 9 July 2020', 'Four short links: 8 July 2020', 'Four short links: 7 July 2020', 'Four short links: 6 July 2020', 'Four short links: 3 July 2020', 'Four short links: 2 July 2020', 'Radar trends to

### 8. Calculate the percentage of "Four short links" entry titles.

In [None]:
[mydict[x] for x in mykeys]

### 9. Create a Pandas data frame from the feed's entries.

In [57]:
import pandas as pd
import numpy as np
pd.__version__

'1.0.3'

In [58]:
df = pd.json_normalize(rss['entries'])
df

Unnamed: 0,title,links,link,comments,published,published_parsed,authors,author,tags,id,...,feedburner_origlink,title_detail.type,title_detail.language,title_detail.base,title_detail.value,author_detail.name,summary_detail.type,summary_detail.language,summary_detail.base,summary_detail.value
0,Four short links: 5 August 2020,"[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Wed, 05 Aug 2020 11:21:38 +0000","(2020, 8, 5, 11, 21, 38, 2, 218, 0)",[{'name': 'Nat Torkington'}],Nat Torkington,"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=13196,...,https://www.oreilly.com/radar/four-short-links...,text/plain,,http://feeds.feedburner.com/oreilly/radar/atom,Four short links: 5 August 2020,Nat Torkington,text/html,,http://feeds.feedburner.com/oreilly/radar/atom,Tales of the Autistic Developer &#8211; Senior...
1,Radar trends to watch: August 2020,"[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/radar-trends-to-...,"Mon, 03 Aug 2020 11:33:02 +0000","(2020, 8, 3, 11, 33, 2, 0, 216, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,"[{'term': 'Radar Trends', 'scheme': None, 'lab...",https://www.oreilly.com/radar/?p=13193,...,https://www.oreilly.com/radar/radar-trends-to-...,text/plain,,http://feeds.feedburner.com/oreilly/radar/atom,Radar trends to watch: August 2020,Mike Loukides,text/html,,http://feeds.feedburner.com/oreilly/radar/atom,"I thought July was going to be a dull month, b..."
2,Four short links: 31 July 2020,"[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Fri, 31 Jul 2020 11:33:09 +0000","(2020, 7, 31, 11, 33, 9, 4, 213, 0)",[{'name': 'Nat Torkington'}],Nat Torkington,"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=13188,...,https://www.oreilly.com/radar/four-short-links...,text/plain,,http://feeds.feedburner.com/oreilly/radar/atom,Four short links: 31 July 2020,Nat Torkington,text/html,,http://feeds.feedburner.com/oreilly/radar/atom,Migrating a 40TB SQL Server Database &#8212; A...
3,Four short links: 30 July 2020,"[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Thu, 30 Jul 2020 11:19:09 +0000","(2020, 7, 30, 11, 19, 9, 3, 212, 0)",[{'name': 'Nat Torkington'}],Nat Torkington,"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=13185,...,https://www.oreilly.com/radar/four-short-links...,text/plain,,http://feeds.feedburner.com/oreilly/radar/atom,Four short links: 30 July 2020,Nat Torkington,text/html,,http://feeds.feedburner.com/oreilly/radar/atom,Turning the IDE Inside Out with Datalog &#8212...
4,Four short links: 29 July 2020,"[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Wed, 29 Jul 2020 11:27:14 +0000","(2020, 7, 29, 11, 27, 14, 2, 211, 0)",[{'name': 'Nat Torkington'}],Nat Torkington,"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=13182,...,https://www.oreilly.com/radar/four-short-links...,text/plain,,http://feeds.feedburner.com/oreilly/radar/atom,Four short links: 29 July 2020,Nat Torkington,text/html,,http://feeds.feedburner.com/oreilly/radar/atom,The Golden Rule of Software Quality &#8212; Pr...
5,Bringing an AI Product to Market,"[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/bringing-an-ai-p...,"Tue, 28 Jul 2020 12:33:02 +0000","(2020, 7, 28, 12, 33, 2, 1, 210, 0)","[{'name': 'Justin Norman, Peter Skomoroch and ...","Justin Norman, Peter Skomoroch and Mike Loukides","[{'term': 'AI & ML', 'scheme': None, 'label': ...",https://www.oreilly.com/radar/?p=13173,...,https://www.oreilly.com/radar/bringing-an-ai-p...,text/plain,,http://feeds.feedburner.com/oreilly/radar/atom,Bringing an AI Product to Market,"Justin Norman, Peter Skomoroch and Mike Loukides",text/html,,http://feeds.feedburner.com/oreilly/radar/atom,The Core Responsibilities of the AI Product Ma...
6,"Power, Harms, and Data","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/power-harms-and-...,"Tue, 28 Jul 2020 12:20:32 +0000","(2020, 7, 28, 12, 20, 32, 1, 210, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,"[{'term': 'AI & ML', 'scheme': None, 'label': ...",https://www.oreilly.com/radar/?p=13178,...,https://www.oreilly.com/radar/power-harms-and-...,text/plain,,http://feeds.feedburner.com/oreilly/radar/atom,"Power, Harms, and Data",Mike Loukides,text/html,,http://feeds.feedburner.com/oreilly/radar/atom,"A recent article in The Verge discussed PULSE,..."
7,Four short links: 27 July 2020,"[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Mon, 27 Jul 2020 11:34:03 +0000","(2020, 7, 27, 11, 34, 3, 0, 209, 0)",[{'name': 'Nat Torkington'}],Nat Torkington,"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=13170,...,https://www.oreilly.com/radar/four-short-links...,text/plain,,http://feeds.feedburner.com/oreilly/radar/atom,Four short links: 27 July 2020,Nat Torkington,text/html,,http://feeds.feedburner.com/oreilly/radar/atom,"Tech Regulation &#8212; ‘Tech’, of course, has..."
8,Four short links: 24 July 2020,"[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Fri, 24 Jul 2020 11:30:50 +0000","(2020, 7, 24, 11, 30, 50, 4, 206, 0)",[{'name': 'Nat Torkington'}],Nat Torkington,"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=13166,...,https://www.oreilly.com/radar/four-short-links...,text/plain,,http://feeds.feedburner.com/oreilly/radar/atom,Four short links: 24 July 2020,Nat Torkington,text/html,,http://feeds.feedburner.com/oreilly/radar/atom,How Taiwan&#8217;s Unlikely Digital Minister H...
9,Four short links: 26 July 2020,"[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Thu, 23 Jul 2020 11:18:39 +0000","(2020, 7, 23, 11, 18, 39, 3, 205, 0)",[{'name': 'Nat Torkington'}],Nat Torkington,"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=13163,...,https://www.oreilly.com/radar/four-short-links...,text/plain,,http://feeds.feedburner.com/oreilly/radar/atom,Four short links: 26 July 2020,Nat Torkington,text/html,,http://feeds.feedburner.com/oreilly/radar/atom,As We May Code &#8212; why aren’t we — ostensi...


### 10. Count the number of entries per author and sort them in descending order.

In [59]:
lst_col = 'authors'

r = pd.DataFrame({
      col:np.repeat(df[col].values, df[lst_col].str.len())
      for col in df.columns.drop(lst_col)}
    ).assign(**{lst_col:np.concatenate(df[lst_col].values)})[df.columns]

In [76]:
df['author'].value_counts()

Nat Torkington                                      46
Mike Loukides                                        8
Justin Norman, Peter Skomoroch and Mike Loukides     1
Hugo Bowne-Anderson                                  1
Adam Jacob, Nat Torkington and Mike Loukides         1
Sarah Gold                                           1
David G. Robinson                                    1
Mike Loukides and Steve Swoyer                       1
Name: author, dtype: int64

### 11. Add a new column to the data frame that contains the length (number of characters) of each entry title. Return a data frame that contains the title, author, and title length of each entry in descending order (longest title length at the top).

In [77]:
df['len_title'] = df['title'].apply(len)
df2 = df[['title', 'authors','len_title']].sort_values(by='len_title',ascending=False)
df2

Unnamed: 0,title,authors,len_title
22,Automated Coding and the Future of Programming,[{'name': 'Mike Loukides'}],46
59,Reclaiming the stories that algorithms tell,[{'name': 'David G. Robinson'}],43
47,Machine Learning and the Production Gap,[{'name': 'Mike Loukides'}],39
41,Decision-Making in a Time of Crisis,[{'name': 'Hugo Bowne-Anderson'}],35
1,Radar trends to watch: August 2020,[{'name': 'Mike Loukides'}],34
49,Radar trends to watch: June 2020,[{'name': 'Mike Loukides'}],32
5,Bringing an AI Product to Market,"[{'name': 'Justin Norman, Peter Skomoroch and ...",32
29,Radar trends to watch: July 2020,[{'name': 'Mike Loukides'}],32
39,"Four short links: June 19, 2020",[{'name': 'Nat Torkington'}],31
0,Four short links: 5 August 2020,[{'name': 'Nat Torkington'}],31


### 12. Create a list of entry titles whose summary includes the phrase "machine learning."

In [78]:
[title for title  in df[df['summary_detail.value'].str.contains('machine learning')]['title']]

['Four short links: 8 July 2020', 'Machine Learning and the Production Gap']