# Working with RSS Feeds Lab

Complete the following set of exercises to solidify your knowledge of parsing RSS feeds and extracting information from them.

In [1]:
#Previously installed feedparser in the Conda Environment
import feedparser

### 1. Use feedparser to parse the following RSS feed URL.

In [2]:
url = 'http://feeds.feedburner.com/oreilly/radar/atom'

In [3]:
feed = feedparser.parse(url)

### 2. Obtain a list of components (keys) that are available for this feed.

In [4]:
#option 1:
list_keys = [key for key in feed]
print(list_keys)

['feed', 'entries', 'bozo', 'headers', 'etag', 'updated', 'updated_parsed', 'href', 'status', 'encoding', 'version', 'namespaces']


In [5]:
#option 2:
print(list(feed.keys()))

['feed', 'entries', 'bozo', 'headers', 'etag', 'updated', 'updated_parsed', 'href', 'status', 'encoding', 'version', 'namespaces']


### 3. Obtain a list of components (keys) that are available for the *feed* component of this RSS feed.

In [6]:
#option 1:
print(list(feed['feed'].keys()))

['title', 'title_detail', 'links', 'link', 'subtitle', 'subtitle_detail', 'updated', 'updated_parsed', 'language', 'sy_updateperiod', 'sy_updatefrequency', 'generator_detail', 'generator', 'feedburner_info', 'geo_lat', 'geo_long', 'feedburner_emailserviceid', 'feedburner_feedburnerhostname']


In [7]:
#option 2:
k = [keys for keys in feed['feed']]
print(k)

['title', 'title_detail', 'links', 'link', 'subtitle', 'subtitle_detail', 'updated', 'updated_parsed', 'language', 'sy_updateperiod', 'sy_updatefrequency', 'generator_detail', 'generator', 'feedburner_info', 'geo_lat', 'geo_long', 'feedburner_emailserviceid', 'feedburner_feedburnerhostname']


### 4. Extract and print the feed title, subtitle, author, and link.

In [8]:
selection = ['title', 'subtitle', 'author', 'link']
extract = [value for (key,value) in feed['feed'].items() if key in selection] 
extract
#as we can see in the result, the 'author' field is missing

['Radar',
 'https://www.oreilly.com/radar',
 'Now, next, and beyond: Tracking need-to-know trends at the intersection of business and technology']

In [9]:
#missing 'author' field:
feed['feed'].author

AttributeError: object has no attribute 'author'

### 5. Count the number of entries that are contained in this RSS feed.

In [10]:
number_of_entries = [entries for entries in feed['entries']]
print(len(number_of_entries))

18


### 6. Obtain a list of components (keys) available for an entry.

*Hint: Remember to index first before requesting the keys*

In [11]:
#option 1:
list_components = [key for key in feed['entries'][0]]
print(list_components)

['title', 'title_detail', 'links', 'link', 'comments', 'published', 'published_parsed', 'authors', 'author', 'author_detail', 'tags', 'id', 'guidislink', 'summary', 'summary_detail', 'content', 'wfw_commentrss', 'slash_comments', 'feedburner_origlink']


In [12]:
 #option 2:
print(list(feed['entries'][0].keys()))

['title', 'title_detail', 'links', 'link', 'comments', 'published', 'published_parsed', 'authors', 'author', 'author_detail', 'tags', 'id', 'guidislink', 'summary', 'summary_detail', 'content', 'wfw_commentrss', 'slash_comments', 'feedburner_origlink']


### 7. Extract a list of entry titles.

In [13]:
#[feed.entries[i].title]
list_entries = [feed.entries[i].title for i in range(0,len(number_of_entries))]
list_entries

['Four short links: 6 December 2019',
 'Radar trends to watch: December 2019',
 'Four short links: 5 December 2019',
 'Four short links: 4 December 2019',
 'Use your people as competitive advantage',
 'Four short links: 3 December 2019',
 'A 5G future',
 'Four short links: 2 December 2019',
 'Four short links: 29 November 2019',
 'Four short links: 28 November 2019',
 'Four short links: 27 November 2019',
 'Moving AI and ML from research into production',
 'Four short links: 26 November 2019',
 'Four short links: 25 November 2019',
 'Four short links: 22 November 2019',
 'Why you should care about robotic process automation',
 'Unraveling the mystery of code',
 'Four short links: 21 November 2019']

### 8. Calculate the percentage of "Four short links" entry titles.

In [14]:
#% of 'Four short links'
four_shor_links = [feed.entries[i].title for i in range(0,len(number_of_entries)) if feed.entries[i].title.startswith('Four short links')]# / [feed.entries[i].title for i in range(0,len(number_of_entries))]

percentage = round((len(four_shor_links)/len(list_entries))*100,2)

print(f'"Four short links" = {percentage}% of the total entry titles')

"Four short links" = 66.67% of the total entry titles


### 9. Create a Pandas data frame from the feed's entries.

In [15]:
import pandas as pd

In [16]:
df = pd.DataFrame(feed['entries'])
df.head()

Unnamed: 0,title,title_detail,links,link,comments,published,published_parsed,authors,author,author_detail,tags,id,guidislink,summary,summary_detail,content,wfw_commentrss,slash_comments,feedburner_origlink
0,Four short links: 6 December 2019,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Fri, 06 Dec 2019 05:01:00 +0000","(2019, 12, 6, 5, 1, 0, 4, 340, 0)",[{'name': 'Nat Torkington'}],Nat Torkington,{'name': 'Nat Torkington'},"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=11147,False,Declarative Assembly of Web Applications From ...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,0,https://www.oreilly.com/radar/four-short-links...
1,Radar trends to watch: December 2019,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/radar-trends-to-...,"Thu, 05 Dec 2019 12:00:00 +0000","(2019, 12, 5, 12, 0, 0, 3, 339, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,{'name': 'Mike Loukides'},"[{'term': 'Radar Trends', 'scheme': None, 'lab...",https://www.oreilly.com/radar/?p=11118,False,Privacy and security trends DNS over HTTPS is ...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/radar-trends-to-...,0,https://www.oreilly.com/radar/radar-trends-to-...
2,Four short links: 5 December 2019,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Thu, 05 Dec 2019 05:01:00 +0000","(2019, 12, 5, 5, 1, 0, 3, 339, 0)",[{'name': 'Nat Torkington'}],Nat Torkington,{'name': 'Nat Torkington'},"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=11136,False,Rediscovered Incomplete Infocom Text Adventure...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,0,https://www.oreilly.com/radar/four-short-links...
3,Four short links: 4 December 2019,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Wed, 04 Dec 2019 05:01:00 +0000","(2019, 12, 4, 5, 1, 0, 2, 338, 0)",[{'name': 'Nat Torkington'}],Nat Torkington,{'name': 'Nat Torkington'},"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=11129,False,The Complexity Explorer &#8212; online courses...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,0,https://www.oreilly.com/radar/four-short-links...
4,Use your people as competitive advantage,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/use-your-people-...,"Tue, 03 Dec 2019 09:00:00 +0000","(2019, 12, 3, 9, 0, 0, 1, 337, 0)",[{'name': 'Pamela Rucker'}],Pamela Rucker,{'name': 'Pamela Rucker'},"[{'term': 'Future of the Firm', 'scheme': None...",https://www.oreilly.com/radar/?p=11068,False,"In a fast-paced digital world, it is tempting ...","{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/use-your-people-...,0,https://www.oreilly.com/radar/use-your-people-...


### 10. Count the number of entries per author and sort them in descending order.

In [17]:
g = df['author'].value_counts().sort_values(ascending=False)
print(g)

Nat Torkington                                  12
Jenn Webb                                        2
Mike Loukides                                    2
Sunil Ranka, Roger Magoulas and Steve Swoyer     1
Pamela Rucker                                    1
Name: author, dtype: int64


### 11. Add a new column to the data frame that contains the length (number of characters) of each entry title. Return a data frame that contains the title, author, and title length of each entry in descending order (longest title length at the top).

In [18]:
num_characters = df['title'].str.len()
df['title length'] = num_characters
df.head()

Unnamed: 0,title,title_detail,links,link,comments,published,published_parsed,authors,author,author_detail,tags,id,guidislink,summary,summary_detail,content,wfw_commentrss,slash_comments,feedburner_origlink,title length
0,Four short links: 6 December 2019,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Fri, 06 Dec 2019 05:01:00 +0000","(2019, 12, 6, 5, 1, 0, 4, 340, 0)",[{'name': 'Nat Torkington'}],Nat Torkington,{'name': 'Nat Torkington'},"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=11147,False,Declarative Assembly of Web Applications From ...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,0,https://www.oreilly.com/radar/four-short-links...,33
1,Radar trends to watch: December 2019,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/radar-trends-to-...,"Thu, 05 Dec 2019 12:00:00 +0000","(2019, 12, 5, 12, 0, 0, 3, 339, 0)",[{'name': 'Mike Loukides'}],Mike Loukides,{'name': 'Mike Loukides'},"[{'term': 'Radar Trends', 'scheme': None, 'lab...",https://www.oreilly.com/radar/?p=11118,False,Privacy and security trends DNS over HTTPS is ...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/radar-trends-to-...,0,https://www.oreilly.com/radar/radar-trends-to-...,36
2,Four short links: 5 December 2019,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Thu, 05 Dec 2019 05:01:00 +0000","(2019, 12, 5, 5, 1, 0, 3, 339, 0)",[{'name': 'Nat Torkington'}],Nat Torkington,{'name': 'Nat Torkington'},"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=11136,False,Rediscovered Incomplete Infocom Text Adventure...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,0,https://www.oreilly.com/radar/four-short-links...,33
3,Four short links: 4 December 2019,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/four-short-links...,"Wed, 04 Dec 2019 05:01:00 +0000","(2019, 12, 4, 5, 1, 0, 2, 338, 0)",[{'name': 'Nat Torkington'}],Nat Torkington,{'name': 'Nat Torkington'},"[{'term': 'Four Short Links', 'scheme': None, ...",https://www.oreilly.com/radar/?p=11129,False,The Complexity Explorer &#8212; online courses...,"{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/four-short-links...,0,https://www.oreilly.com/radar/four-short-links...,33
4,Use your people as competitive advantage,"{'type': 'text/plain', 'language': None, 'base...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",http://feedproxy.google.com/~r/oreilly/radar/a...,https://www.oreilly.com/radar/use-your-people-...,"Tue, 03 Dec 2019 09:00:00 +0000","(2019, 12, 3, 9, 0, 0, 1, 337, 0)",[{'name': 'Pamela Rucker'}],Pamela Rucker,{'name': 'Pamela Rucker'},"[{'term': 'Future of the Firm', 'scheme': None...",https://www.oreilly.com/radar/?p=11068,False,"In a fast-paced digital world, it is tempting ...","{'type': 'text/html', 'language': None, 'base'...","[{'type': 'text/html', 'language': None, 'base...",https://www.oreilly.com/radar/use-your-people-...,0,https://www.oreilly.com/radar/use-your-people-...,40


### 12. Create a list of entry titles whose summary includes the phrase "machine learning."

In [19]:
filter_machine_learning = list(df['summary'].str.contains("machine learning."))
filter_machine_learning

[False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 False,
 False,
 False,
 False,
 False,
 False]

In [20]:
[df.iloc[counter,0] for counter,value in enumerate(filter_machine_learning) if value]

['Moving AI and ML from research into production']