# Working with RSS Feeds Lab

Complete the following set of exercises to solidify your knowledge of parsing RSS feeds and extracting information from them.

In [27]:
# import feedparser
import requests
import xmltodict
import re
import pandas as pd

### 1. Use feedparser to parse the following RSS feed URL.

In [28]:
url = 'http://feeds.feedburner.com/oreilly/radar/atom'

In [29]:
req = requests.get(url)
req

<Response [200]>

In [35]:
data = xmltodict.parse(req.content)

### 2. Obtain a list of components (keys) that are available for this feed.

In [36]:
lis(data.keys())

odict_keys(['rss'])

In [52]:
print(list(data['rss'].keys()))

['@xmlns:content', '@xmlns:wfw', '@xmlns:dc', '@xmlns:atom', '@xmlns:sy', '@xmlns:slash', '@xmlns:geo', '@xmlns:feedburner', '@version', 'channel']


### 3. Obtain a list of components (keys) that are available for the *feed* component of this RSS feed.

In [46]:
print(list(data['rss']['channel'].keys()))

['title', 'link', 'description', 'lastBuildDate', 'language', 'sy:updatePeriod', 'sy:updateFrequency', 'generator', 'atom10:link', 'feedburner:info', 'geo:lat', 'geo:long', 'feedburner:emailServiceId', 'feedburner:feedburnerHostname', 'item']


In [53]:
# data['rss']['channel']['item']

### 4. Extract and print the feed title, subtitle, author, and link.

In [49]:
data_item = data['rss']['channel']['item']

In [69]:
titles = []
subtitles = []
authors = []
links  = []

for i in range(len(data_item))  :
    # Extrayendo el titulo
    titles.append(data_item[i]['title'])
    # Extrayendo el subtitulo
    subtitles.append(data_item[i]['description'].split('&')[0])
    # Extrayendo los autores
    authors.append(data_item[i]['dc:creator'])
    # Extrayendo los links
    links.append(data_item[i]['link'])
    
for i in range(len(titles)):
    print(titles[i], ' | ', subtitles[i], ' | ', authors[i], ' | ', links[0] )
    print('')

Four short links: 28 Oct 2020  |  Phantom of the ADAS   |  Nat Torkington  |  http://feedproxy.google.com/~r/oreilly/radar/atom/~3/9SAYsodoeJo/

Our Favorite Questions  |    |  Q Ethan McCallum, Chris Butler and Shane Glynn  |  http://feedproxy.google.com/~r/oreilly/radar/atom/~3/9SAYsodoeJo/

Four short links: 21 Oct 2020  |  Justice Department Antitrust Filing Against Google   |  Nat Torkington  |  http://feedproxy.google.com/~r/oreilly/radar/atom/~3/9SAYsodoeJo/

Four Short Links: 16 October 2020  |  Automerge   |  None  |  http://feedproxy.google.com/~r/oreilly/radar/atom/~3/9SAYsodoeJo/

Four short links: 14 Oct 2020  |  Data Organization in Spreadsheets   |  Nat Torkington  |  http://feedproxy.google.com/~r/oreilly/radar/atom/~3/9SAYsodoeJo/

AI Product Management After Deployment  |  The field of AI product management continues to gain momentum. As the AI product management role advances in maturity, more and more information and advice has become available. Our previous article

### 5. Count the number of entries that are contained in this RSS feed.

In [57]:
len(data_item)

60

### 6. Obtain a list of components (keys) available for an entry.

*Hint: Remember to index first before requesting the keys*

In [59]:
data_item[0].keys()

odict_keys(['title', 'link', 'comments', 'pubDate', 'dc:creator', 'category', 'guid', 'description', 'content:encoded', 'wfw:commentRss', 'slash:comments', 'feedburner:origLink'])

### 7. Extract a list of entry titles.

In [60]:
titles

['Four short links: 28 Oct 2020',
 'Our Favorite Questions',
 'Four short links: 21 Oct 2020',
 'Four Short Links: 16 October 2020',
 'Four short links: 14 Oct 2020',
 'AI Product Management After Deployment',
 'Four short links: 9 October 2020',
 'AI and Creativity',
 'Four short links: 6 October 2020',
 'Four short links: 2 October 2020',
 'Radar trends to watch: October 2020',
 'Four short links: 29 Sep 2020',
 'Four short links: 25 September 2020',
 'Four short links: 18 Sep 2020',
 'Four short links: 16 Sep 2020',
 'How to Set AI Goals',
 'Four short links: 11 Sep 2020',
 'Four short links: 9 Sep 2020',
 'Pair Programming with AI',
 'Four short links: 4 September 2020',
 'Four short links: 2 September 2020',
 'Radar trends to watch: September 2020',
 'Four short links: 28 August 2020',
 'An Agent of Change',
 'Four short links: 25 August 2020',
 'Four short links: 21 August 2020',
 'Four Short Links: 19 August 2020',
 'Why Best-of-Breed is a Better Choice than All-in-One Platforms

### 8. Calculate the percentage of "Four short links" entry titles.

In [62]:
contador = 0

for i in titles:
    if 'Four short links' in i:
        contador+=1
        
print('Porcentaje:', contador/len(titles) )

Porcentaje: 0.6833333333333333


### 9. Create a Pandas data frame from the feed's entries.

In [None]:
import pandas as pd

In [63]:
df = pd.DataFrame({'title': titles, 'subtitle': subtitles, 'author': authors, 'link':links})
df

Unnamed: 0,title,subtitle,author,link
0,Four short links: 28 Oct 2020,Phantom of the ADAS,Nat Torkington,http://feedproxy.google.com/~r/oreilly/radar/a...
1,Our Favorite Questions,,"Q Ethan McCallum, Chris Butler and Shane Glynn",http://feedproxy.google.com/~r/oreilly/radar/a...
2,Four short links: 21 Oct 2020,Justice Department Antitrust Filing Against Go...,Nat Torkington,http://feedproxy.google.com/~r/oreilly/radar/a...
3,Four Short Links: 16 October 2020,Automerge,,http://feedproxy.google.com/~r/oreilly/radar/a...
4,Four short links: 14 Oct 2020,Data Organization in Spreadsheets,Nat Torkington,http://feedproxy.google.com/~r/oreilly/radar/a...
5,AI Product Management After Deployment,The field of AI product management continues t...,Justin Norman and Mike Loukides,http://feedproxy.google.com/~r/oreilly/radar/a...
6,Four short links: 9 October 2020,T-SQL in SQLite,Nat Torkington,http://feedproxy.google.com/~r/oreilly/radar/a...
7,AI and Creativity,The release of GPT-3 has reinvigorated a discu...,Mike Loukides,http://feedproxy.google.com/~r/oreilly/radar/a...
8,Four short links: 6 October 2020,Algorithms Can Collude,Nat Torkington,http://feedproxy.google.com/~r/oreilly/radar/a...
9,Four short links: 2 October 2020,Single Device Behaves Like a Neuron,Nat Torkington,http://feedproxy.google.com/~r/oreilly/radar/a...


### 10. Count the number of entries per author and sort them in descending order.

In [74]:
df['author'].value_counts().to_frame()

Unnamed: 0,author
Nat Torkington,42
Mike Loukides,9
Q Ethan McCallum and Mike Loukides,1
Mike Loukides and Steve Swoyer,1
Alex Castrounis,1
Sarah Gold,1
Justin Norman and Mike Loukides,1
"Q Ethan McCallum, Chris Butler and Shane Glynn",1
"Justin Norman, Peter Skomoroch and Mike Loukides",1
Matthew Rocklin and Hugo Bowne-Anderson,1


### 11. Add a new column to the data frame that contains the length (number of characters) of each entry title. Return a data frame that contains the title, author, and title length of each entry in descending order (longest title length at the top).

In [77]:
df1 = df[['title', 'author']].copy()

# Creamos la nueva columna
df1['length_title'] = df1['title'].str.len()

df1.sort_values(by=['length_title'])

Unnamed: 0,title,author,length_title
7,AI and Creativity,Mike Loukides,17
23,An Agent of Change,Q Ethan McCallum and Mike Loukides,18
15,How to Set AI Goals,Alex Castrounis,19
1,Our Favorite Questions,"Q Ethan McCallum, Chris Butler and Shane Glynn",22
38,"Power, Harms, and Data",Mike Loukides,22
50,Society-Centered Design,Sarah Gold,23
18,Pair Programming with AI,Mike Loukides,24
43,"AI, Protests, and Justice",Mike Loukides,25
31,Four short links: 7 Aug 2020,Nat Torkington,28
17,Four short links: 9 Sep 2020,Nat Torkington,28


### 12. Create a list of entry titles whose summary includes the phrase "machine learning."

In [79]:
ml_titles = []
for i in range(60):
    
    if 'machine learning' in data_item[i]['content:encoded']:
        
        ml_titles.append(data_item[i]['title'])

ml_titles

['Our Favorite Questions',
 'AI Product Management After Deployment',
 'AI and Creativity',
 'Radar trends to watch: October 2020',
 'Four short links: 16 Sep 2020',
 'How to Set AI Goals',
 'Radar trends to watch: September 2020',
 'Why Best-of-Breed is a Better Choice than All-in-One Platforms for Data Science',
 'Radar trends to watch: August 2020',
 'Bringing an AI Product to Market',
 'Power, Harms, and Data',
 'Four short links: 8 July 2020']