# Download "Sternengeschichten" podcast

## Download feed.xml and parse the entries

In [None]:
import feedparser
import pandas as pd
import re
import requests
import os
from joblib import Parallel, delayed

N_JOBS = 10
RSS_URL = 'http://feeds.feedburner.com/sternengeschichten?format=xml'
OUT_FOLDER = 'data'

feeds = feedparser.parse(RSS_URL)

def get_folge(x):
    matches = re.findall(r'Folge (\d+) ?[:-]', x)
    return matches[0]

def get_title(title):
    matches = re.findall(r'Folge \d+ ?[:-](.+)', title)
    return matches[0].strip()

# Create pandas dataframe and parse folgen id 
df = pd.DataFrame(feeds['entries']).set_index('id')
df['folge'] = pd.to_numeric(df.title.map(get_folge))
df.set_index('folge', inplace = True)
df.sort_index(inplace = True)

## Download files

In [None]:
os.makedirs(OUT_FOLDER, exist_ok= True)

def download_folge(folge, item):
    mp3_link = item.feedburner_origenclosurelink
    title = get_title(item.title)
    folgen_title = '{} - {}'.format(str(folge).zfill(3), title)
    
    def print_(msg):
        print('{:40}\n\t{}'.format(folgen_title, msg))
    
    filename = os.path.join(OUT_FOLDER, '{}.mp3'.format(folgen_title))
    if os.path.exists(filename):
        print_('Already downloaded')
        return
    
    print_('Starting download')
    res = requests.get(mp3_link)
    with open(filename, 'wb') as f:
        f.write(res.content)
    print_('Finished download')

Parallel(n_jobs=N_JOBS)(delayed(download_folge)(*x) for x in df.iterrows())