# Working with RSS Feeds Lab

Complete the following set of exercises to solidify your knowledge of parsing RSS feeds and extracting information from them.

In [2]:
pip install feedparser

Note: you may need to restart the kernel to use updated packages.


In [3]:
import feedparser

### 1. Use feedparser to parse the following RSS feed URL.

In [4]:
url = 'https://www.nasa.gov/news-release/feed/'

In [5]:
nasa = feedparser.parse(url)

### 2. Obtain a list of components (keys) that are available for this feed.

In [6]:
nasa.keys()

dict_keys(['bozo', 'entries', 'feed', 'headers', 'etag', 'updated', 'updated_parsed', 'href', 'status', 'encoding', 'version', 'namespaces'])

### 3. Obtain a list of components (keys) that are available for the *feed* component of this RSS feed.

In [7]:
print(nasa['feed'])

{'title': 'NASA', 'title_detail': {'type': 'text/plain', 'language': None, 'base': 'https://www.nasa.gov/news-release/feed/', 'value': 'NASA'}, 'links': [{'href': 'https://www.nasa.gov/news-release/feed/', 'rel': 'self', 'type': 'application/rss+xml'}, {'rel': 'alternate', 'type': 'text/html', 'href': 'https://www.nasa.gov'}], 'link': 'https://www.nasa.gov', 'subtitle': 'Official National Aeronautics and Space Administration Website', 'subtitle_detail': {'type': 'text/html', 'language': None, 'base': 'https://www.nasa.gov/news-release/feed/', 'value': 'Official National Aeronautics and Space Administration Website'}, 'updated': 'Wed, 21 Feb 2024 19:05:57 +0000', 'updated_parsed': time.struct_time(tm_year=2024, tm_mon=2, tm_mday=21, tm_hour=19, tm_min=5, tm_sec=57, tm_wday=2, tm_yday=52, tm_isdst=0), 'language': 'en-US', 'sy_updateperiod': 'hourly', 'sy_updatefrequency': '1', 'generator_detail': {'name': 'https://wordpress.org/?v=6.3.3'}, 'generator': 'https://wordpress.org/?v=6.3.3'}


In [8]:
nasa.feed.keys()

dict_keys(['title', 'title_detail', 'links', 'link', 'subtitle', 'subtitle_detail', 'updated', 'updated_parsed', 'language', 'sy_updateperiod', 'sy_updatefrequency', 'generator_detail', 'generator'])

### 4. Extract and print the feed title, subtitle, author, and link.

In [9]:
print (nasa.feed.title)
print ('')
print (nasa.feed.subtitle)
print ('')
print (nasa.feed.generator)
print ('')
print (nasa.feed.link)

NASA

Official National Aeronautics and Space Administration Website

https://wordpress.org/?v=6.3.3

https://www.nasa.gov


### 5. Count the number of entries that are contained in this RSS feed.

In [10]:
len(nasa.entries)

10

### 6. Obtain a list of components (keys) available for an entry.

*Hint: Remember to index first before requesting the keys*

In [11]:
nasa.entries[0].keys()

dict_keys(['title', 'title_detail', 'links', 'link', 'authors', 'author', 'author_detail', 'published', 'published_parsed', 'tags', 'id', 'guidislink', 'summary', 'summary_detail', 'content', 'media_content', 'media_player', 'media_thumbnail', 'href', 'media_rating', 'rating'])

### 7. Extract a list of entry titles.

In [12]:
titles = [nasa.entries[i].title for i in range(len(nasa.entries))]
print(titles)

['Stellar Beads on a String', 'NASA Astronomer Sees Power in Community, Works to Build More', 'Ride the Wave of Radio Astronomy During the Solar Eclipse', 'NASA Selects University Teams to Explore Innovative Aeronautical Research', 'Become a SunSketcher, and Help Measure the Shape of the Sun!', 'NASA Sets Coverage of First US Uncrewed Commercial Moon Landing', 'Seeing is Communicating', 'Annual Highlights of Results 2023: Introduction and Analyses', '2023 Annual Highlights of Results from the International Space Station', 'Renee King: Ensuring Space for Everyone']


### 8. Calculate the percentage of "Four short links" entry titles.

In [13]:
????

Object `??` not found.


### 9. Create a Pandas data frame from the feed's entries.

In [21]:
import pandas as pd

df = pd.DataFrame(pd.json_normalize(nasa.entries))
df.head()

Unnamed: 0,title,links,link,authors,author,published,published_parsed,tags,id,guidislink,...,title_detail.value,author_detail.name,summary_detail.type,summary_detail.language,summary_detail.base,summary_detail.value,media_player.url,media_player.content,media_rating.scheme,media_rating.content
0,Stellar Beads on a String,"[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.nasa.gov/image-article/stellar-bea...,[{'name': 'Lee Mohon'}],Lee Mohon,"Wed, 21 Feb 2024 17:25:50 +0000","(2024, 2, 21, 17, 25, 50, 2, 52, 0)","[{'term': 'Astrophysics', 'scheme': None, 'lab...",https://www.nasa.gov/?post_type=image-article&...,False,...,Stellar Beads on a String,Lee Mohon,text/html,,https://www.nasa.gov/news-release/feed/,Astronomers have discovered one of the most po...,https://www.youtube.com/embed/bouQkHDXMKA,,urn:simple,nonadult
1,"NASA Astronomer Sees Power in Community, Works...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.nasa.gov/general/nasa-astronomer-s...,[{'name': 'Abby Tabor'}],Abby Tabor,"Wed, 21 Feb 2024 17:22:18 +0000","(2024, 2, 21, 17, 22, 18, 2, 52, 0)","[{'term': 'General', 'scheme': None, 'label': ...",https://www.nasa.gov/?p=616846,False,...,"NASA Astronomer Sees Power in Community, Works...",Abby Tabor,text/html,,https://www.nasa.gov/news-release/feed/,Science is often portrayed as a solitary affai...,,,,
2,Ride the Wave of Radio Astronomy During the So...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://science.nasa.gov/solar-system/skywatch...,[{}],,"Wed, 21 Feb 2024 16:24:06 +0000","(2024, 2, 21, 16, 24, 6, 2, 52, 0)","[{'term': '2024 Solar Eclipse', 'scheme': None...",https://science.nasa.gov/solar-system/skywatch...,False,...,Ride the Wave of Radio Astronomy During the So...,,text/html,,https://www.nasa.gov/news-release/feed/,Students and science enthusiasts are invited t...,,,,
3,NASA Selects University Teams to Explore Innov...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.nasa.gov/aeronautics/university-te...,[{'name': 'Jim Banke'}],Jim Banke,"Wed, 21 Feb 2024 05:50:33 +0000","(2024, 2, 21, 5, 50, 33, 2, 52, 0)","[{'term': 'Aeronautics', 'scheme': None, 'labe...",https://www.nasa.gov/?p=617878,False,...,NASA Selects University Teams to Explore Innov...,Jim Banke,text/html,,https://www.nasa.gov/news-release/feed/,NASA has selected another five university team...,,,,
4,"Become a SunSketcher, and Help Measure the Sha...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://science.nasa.gov/get-involved/citizen-...,[{}],,"Tue, 20 Feb 2024 21:35:49 +0000","(2024, 2, 20, 21, 35, 49, 1, 51, 0)","[{'term': 'Citizen Science', 'scheme': None, '...",https://science.nasa.gov/get-involved/citizen-...,False,...,"Become a SunSketcher, and Help Measure the Sha...",,text/html,,https://www.nasa.gov/news-release/feed/,What shape Is the Sun? Hint: it’s not perfectl...,,,,


### 10. Count the number of entries per author and sort them in descending order.

In [24]:
df.groupby('author', as_index = False).agg({'published':'count'}).sort_values('published', ascending = False)

Unnamed: 0,author,published
0,,2
2,Ana Guzman,2
1,Abby Tabor,1
3,Andrew Wagner,1
4,Jim Banke,1
5,Lee Mohon,1
6,Madison Olson,1
7,Tiernan P. Doyle,1


### 11. Add a new column to the data frame that contains the length (number of characters) of each entry title. Return a data frame that contains the title, author, and title length of each entry in descending order (longest title length at the top).

In [25]:
df['title_len'] = [len(nasa.entries[i].title) for i in range(len(nasa.entries))]
df.head()

Unnamed: 0,title,links,link,authors,author,published,published_parsed,tags,id,guidislink,...,author_detail.name,summary_detail.type,summary_detail.language,summary_detail.base,summary_detail.value,media_player.url,media_player.content,media_rating.scheme,media_rating.content,title_len
0,Stellar Beads on a String,"[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.nasa.gov/image-article/stellar-bea...,[{'name': 'Lee Mohon'}],Lee Mohon,"Wed, 21 Feb 2024 17:25:50 +0000","(2024, 2, 21, 17, 25, 50, 2, 52, 0)","[{'term': 'Astrophysics', 'scheme': None, 'lab...",https://www.nasa.gov/?post_type=image-article&...,False,...,Lee Mohon,text/html,,https://www.nasa.gov/news-release/feed/,Astronomers have discovered one of the most po...,https://www.youtube.com/embed/bouQkHDXMKA,,urn:simple,nonadult,25
1,"NASA Astronomer Sees Power in Community, Works...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.nasa.gov/general/nasa-astronomer-s...,[{'name': 'Abby Tabor'}],Abby Tabor,"Wed, 21 Feb 2024 17:22:18 +0000","(2024, 2, 21, 17, 22, 18, 2, 52, 0)","[{'term': 'General', 'scheme': None, 'label': ...",https://www.nasa.gov/?p=616846,False,...,Abby Tabor,text/html,,https://www.nasa.gov/news-release/feed/,Science is often portrayed as a solitary affai...,,,,,60
2,Ride the Wave of Radio Astronomy During the So...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://science.nasa.gov/solar-system/skywatch...,[{}],,"Wed, 21 Feb 2024 16:24:06 +0000","(2024, 2, 21, 16, 24, 6, 2, 52, 0)","[{'term': '2024 Solar Eclipse', 'scheme': None...",https://science.nasa.gov/solar-system/skywatch...,False,...,,text/html,,https://www.nasa.gov/news-release/feed/,Students and science enthusiasts are invited t...,,,,,57
3,NASA Selects University Teams to Explore Innov...,"[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://www.nasa.gov/aeronautics/university-te...,[{'name': 'Jim Banke'}],Jim Banke,"Wed, 21 Feb 2024 05:50:33 +0000","(2024, 2, 21, 5, 50, 33, 2, 52, 0)","[{'term': 'Aeronautics', 'scheme': None, 'labe...",https://www.nasa.gov/?p=617878,False,...,Jim Banke,text/html,,https://www.nasa.gov/news-release/feed/,NASA has selected another five university team...,,,,,73
4,"Become a SunSketcher, and Help Measure the Sha...","[{'rel': 'alternate', 'type': 'text/html', 'hr...",https://science.nasa.gov/get-involved/citizen-...,[{}],,"Tue, 20 Feb 2024 21:35:49 +0000","(2024, 2, 20, 21, 35, 49, 1, 51, 0)","[{'term': 'Citizen Science', 'scheme': None, '...",https://science.nasa.gov/get-involved/citizen-...,False,...,,text/html,,https://www.nasa.gov/news-release/feed/,What shape Is the Sun? Hint: it’s not perfectl...,,,,,60


In [26]:
df_new = df[['title', 'author', 'title_len']].sort_values('title_len', ascending = False)
df_new.head()



Unnamed: 0,title,author,title_len
3,NASA Selects University Teams to Explore Innov...,Jim Banke,73
8,2023 Annual Highlights of Results from the Int...,Ana Guzman,70
5,NASA Sets Coverage of First US Uncrewed Commer...,Tiernan P. Doyle,63
1,"NASA Astronomer Sees Power in Community, Works...",Abby Tabor,60
4,"Become a SunSketcher, and Help Measure the Sha...",,60


### 12. Create a list of entry titles whose summary includes the phrase "machine learning."

In [18]:
df.title[0]

'Stellar Beads on a String'

In [19]:
df.title[1]

'NASA Astronomer Sees Power in Community, Works to Build More'

In [20]:
lst = []

for i in df.title:
    if 'machine learning' in i.lower():
        lst.append(i)
print(lst)        

[]
