Skip to content

Commit

Permalink
Merge pull request #4 from elreydetoda/master
Browse files Browse the repository at this point in the history
Extending rss parsing for rough Podcast support
  • Loading branch information
dhvcc committed Apr 13, 2021
2 parents 1fbdc34 + ea5e79f commit d28ef06
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 2 deletions.
33 changes: 32 additions & 1 deletion rss_parser/_parser.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from bs4 import BeautifulSoup
from typing import Optional

from .models import RSSFeed

Expand All @@ -15,6 +16,17 @@ def __init__(self, xml: str, limit=None):
def get_soup(xml: str, parser: str = "xml") -> BeautifulSoup:
return BeautifulSoup(xml, parser)

@staticmethod
def check_none(item: object, default: str, item_dict: Optional[str] = None, default_dict: Optional[str] = None):
if item:
return item[item_dict]
else:
if default_dict:
return default[default_dict]
else:
return default


def parse(self) -> RSSFeed:
main_soup = self.get_soup(self.xml)
self.raw_data = {
Expand Down Expand Up @@ -44,7 +56,26 @@ def parse(self) -> RSSFeed:
"description_images": [
{"alt": image.get("alt", ""), "source": image.get("src")}
for image in description_soup.findAll('img')
]
],
"enclosure": {
'content': '',
'attrs': {
'url': item.enclosure['url'] ,
'length': item.enclosure['length'] ,
'type': item.enclosure['type']
}
},
"itunes": {
'content': '',
'attrs': {
'href': self.check_none(
item.find("itunes:image"),
main_soup.find("itunes:image"),
'href',
'href'
)
}
}
}
self.raw_data["feed"].append(item_dict)

Expand Down
25 changes: 24 additions & 1 deletion rss_parser/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,26 @@

from pydantic import BaseModel

class ItunesAttrs(BaseModel):
href: str

class Itunes(BaseModel):
content: str
attrs: Optional[ItunesAttrs]

class EnclosureAttrs(BaseModel):
url: str
length: int
type: str

class Enclosure(BaseModel):
content: str
attrs: Optional[EnclosureAttrs]

class DescriptionImage(BaseModel):
alt: Optional[str]
source: str


class FeedItem(BaseModel):
title: str
link: str
Expand All @@ -16,7 +30,16 @@ class FeedItem(BaseModel):
description: str
description_links: Optional[List[str]]
description_images: Optional[List[DescriptionImage]]
enclosure: Optional[Enclosure]
itunes: Optional[Itunes]

# https://stackoverflow.com/questions/10994229/how-to-make-an-object-properly-hashable#answer-38259091
# added this, so you can call/use in a set() on the FeedItem's to ensure no duplicates in a list.
def __hash__(self):
return hash(self.title.strip())

def __eq__(self,other):
return self.title.strip() == other.title.strip()

class RSSFeed(BaseModel):
title: str
Expand Down

0 comments on commit d28ef06

Please sign in to comment.