## Importing packages

In [16]:
import pandas as pd
from bs4 import BeautifulSoup
import requests

## Getting the RSS feed in `XML` format
Using BeautifulSoup to parse the `.xml` file

In [2]:
url = 'https://pbcdn1.podbean.com/criticalrolepodcast.geekandsundry.com/feed.xml'
response = requests.get(url)

soup = BeautifulSoup(response.content, 'xml')

## Constructing the dataframe
Loop through the soup object and extract individual episodes and relevant metadata.

In [24]:
# List of episodes
episodes = []

# 'item' class references each individual epsiode
for i in soup.find_all('item'):
    episode = {}
    episode['title'] = i.find('title').text
    episode['link'] = i.find('enclosure')['url']
    episode['ep_number'] = int(i.find('itunes:episode').text)
    episode['duration'] = i.find('itunes:duration').text
    episodes.append(episode)

In [25]:
# Converting to dataframe
episodes_df = pd.DataFrame(episodes)

In [26]:
# Checking results
episodes_df.head()

Unnamed: 0,duration,ep_number,link,title
0,03:28:53,166,https://mcdn.podbean.com/mf/web/c9dzik/2049_CR...,Campaign 2 Ep. 49 - A Game of Names
1,03:56:09,165,https://mcdn.podbean.com/mf/web/cm35r9/2048_CR...,Campaign 2 Ep. 48 - Homeward Bound
2,03:35:49,164,https://mcdn.podbean.com/mf/web/e5cn5u/2047_CR...,Campaign 2 Ep. 47 - The Second Seal
3,03:59:04,163,https://mcdn.podbean.com/mf/web/zje7mn/2046_CR...,Campaign 2 Ep. 46 - A Storm of Memories
4,04:59:05,162,https://mcdn.podbean.com/mf/web/fyawaf/2045_CR...,Campaign 2 Ep. 45 - The Stowaway


## Downloading episodes of interest
From the resulting dataframe, I'll download and save enough episodes for this project.

In [13]:
def download(url, file_name):
    # open in binary mode
    with open(file_name, "wb") as file:
        # get request
        response = requests.get(url)
        # write to file
        file.write(response.content)

In [47]:
# Downloading episodes 30 to 40:
for i in range(30,41):
    # Store row of the individual episode
    episode = episodes_df[episodes_df['ep_number'] == i]
    
    # Set the download url
    url = episode['link'].values[0]
    
    # Set the filename and path
    filename = './data/episode_' + str(i) + '.mp3'
    
    # Download the file
    download(url, filename)
    