-
Notifications
You must be signed in to change notification settings - Fork 44
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(gulli): support gulli as a finder to create a new podcast
We use the replays page of gulli to find the information about the show to be registered. The OpenGraph meta information are useful to get information about description. The title is fetch from breadcrumb and the cover from the show list behind the replay
- Loading branch information
1 parent
76375b8
commit 93cf15f
Showing
4 changed files
with
232 additions
and
0 deletions.
There are no files selected for viewing
56 changes: 56 additions & 0 deletions
56
Backend/src/main/java/lan/dk/podcastserver/manager/worker/downloader/GulliDownloader.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
package lan.dk.podcastserver.manager.worker.downloader; | ||
|
||
import javaslang.collection.List; | ||
import lan.dk.podcastserver.entity.Item; | ||
import lan.dk.podcastserver.repository.ItemRepository; | ||
import lan.dk.podcastserver.repository.PodcastRepository; | ||
import lan.dk.podcastserver.service.HtmlService; | ||
import lan.dk.podcastserver.service.JsonService; | ||
import lan.dk.podcastserver.service.MimeTypeService; | ||
import lan.dk.podcastserver.service.UrlService; | ||
import lan.dk.podcastserver.service.factory.WGetFactory; | ||
import lan.dk.podcastserver.service.properties.PodcastServerParameters; | ||
import org.apache.commons.lang3.StringUtils; | ||
import org.springframework.messaging.simp.SimpMessagingTemplate; | ||
|
||
import java.util.regex.Pattern; | ||
|
||
import static java.util.Objects.nonNull; | ||
|
||
/** | ||
* Created by kevin on 12/10/2016 for Podcast Server | ||
*/ | ||
public class GulliDownloader extends HTTPDownloader { | ||
|
||
static final Pattern NUMBER_IN_PLAYLIST_EXTRACTOR = Pattern.compile("playlistItem\\(([^\\)]*)\\);"); | ||
static final Pattern PLAYLIST_EXTRACTOR = Pattern.compile("playlist:\\s*(.*?(?=events:))", Pattern.DOTALL); | ||
|
||
private final HtmlService htmlService; | ||
private final JsonService jsonService; | ||
|
||
private String url = null; | ||
|
||
public GulliDownloader(ItemRepository itemRepository, PodcastRepository podcastRepository, PodcastServerParameters podcastServerParameters, SimpMessagingTemplate template, MimeTypeService mimeTypeService, UrlService urlService, WGetFactory wGetFactory, HtmlService htmlService, JsonService jsonService) { | ||
super(itemRepository, podcastRepository, podcastServerParameters, template, mimeTypeService, urlService, wGetFactory); | ||
this.htmlService = htmlService; | ||
this.jsonService = jsonService; | ||
} | ||
|
||
@Override | ||
public String getItemUrl(Item item) { | ||
if (nonNull(this.item) && !this.item.equals(item)) | ||
return item.getUrl(); | ||
|
||
if (nonNull(url)) | ||
return url; | ||
|
||
url = htmlService.get(item.getUrl()) | ||
.map(d -> d.select("script")) | ||
.flatMap(scripts -> List.ofAll(scripts).find(e -> e.html().contains("playlist"))) | ||
.map(e -> "Foo") | ||
.getOrElse(StringUtils.EMPTY); | ||
|
||
|
||
return super.getItemUrl(item); | ||
} | ||
} |
55 changes: 55 additions & 0 deletions
55
Backend/src/main/java/lan/dk/podcastserver/manager/worker/finder/GulliFinder.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
package lan.dk.podcastserver.manager.worker.finder; | ||
|
||
import javaslang.control.Option; | ||
import lan.dk.podcastserver.entity.Cover; | ||
import lan.dk.podcastserver.entity.Podcast; | ||
import lan.dk.podcastserver.service.HtmlService; | ||
import lan.dk.podcastserver.service.ImageService; | ||
import lombok.AllArgsConstructor; | ||
import org.hibernate.validator.constraints.NotEmpty; | ||
import org.jsoup.nodes.Document; | ||
import org.springframework.stereotype.Service; | ||
|
||
/** | ||
* Created by kevin on 04/10/2016 for Podcast Server | ||
*/ | ||
@Service("GulliFinder") | ||
@AllArgsConstructor | ||
public class GulliFinder implements Finder { | ||
|
||
private static final String COVER_SELECTOR = "div.program_gullireplay a[href=%s] img"; | ||
|
||
final HtmlService htmlService; | ||
final ImageService imageService; | ||
|
||
@Override | ||
public Podcast find(String url) { | ||
return htmlService.get(url) | ||
.map(this::htmlToPodcast) | ||
.getOrElse(Podcast.DEFAULT_PODCAST); | ||
} | ||
|
||
private Podcast htmlToPodcast(Document d) { | ||
return Podcast.builder() | ||
.title(d.select("ol.breadcrumb li.active").first().text()) | ||
.cover(coverOf(d)) | ||
.description(d.select("meta[property=og:description]").attr("content")) | ||
.url(d.select("meta[property=og:url]").attr("content")) | ||
.type("Gulli") | ||
.build(); | ||
} | ||
|
||
private Cover coverOf(Document d) { | ||
String pageUrl = d.select("meta[property=og:url]").attr("content"); | ||
|
||
return Option.of(d.select(String.format(COVER_SELECTOR, pageUrl)).first()) | ||
.map(e -> e.attr("src")) | ||
.map(imageService::getCoverFromURL) | ||
.getOrElse(Cover.DEFAULT_COVER); | ||
} | ||
|
||
@Override | ||
public Integer compatibility(@NotEmpty String url) { | ||
return url.contains("replay.gulli.fr") ? 1 : Integer.MAX_VALUE; | ||
} | ||
} |
113 changes: 113 additions & 0 deletions
113
Backend/src/main/java/lan/dk/podcastserver/manager/worker/updater/GulliUpdater.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
package lan.dk.podcastserver.manager.worker.updater; | ||
|
||
import com.google.common.collect.Sets; | ||
import javaslang.collection.HashSet; | ||
import javaslang.collection.List; | ||
import javaslang.control.Option; | ||
import lan.dk.podcastserver.entity.Cover; | ||
import lan.dk.podcastserver.entity.Item; | ||
import lan.dk.podcastserver.entity.Podcast; | ||
import lan.dk.podcastserver.service.HtmlService; | ||
import lan.dk.podcastserver.service.ImageService; | ||
import lan.dk.podcastserver.service.SignatureService; | ||
import lan.dk.podcastserver.service.properties.PodcastServerParameters; | ||
import lombok.extern.slf4j.Slf4j; | ||
import org.apache.commons.lang3.StringUtils; | ||
import org.jsoup.nodes.Element; | ||
import org.jsoup.select.Elements; | ||
import org.springframework.stereotype.Component; | ||
|
||
import javax.validation.Validator; | ||
import java.time.ZonedDateTime; | ||
import java.util.Set; | ||
import java.util.regex.Matcher; | ||
import java.util.regex.Pattern; | ||
|
||
import static javaslang.collection.HashSet.collector; | ||
|
||
/** | ||
* Created by kevin on 05/10/2016 for Podcast Server | ||
*/ | ||
@Slf4j | ||
@Component("GulliUpdater") | ||
public class GulliUpdater extends AbstractUpdater { | ||
|
||
private static final Pattern FRAME_EXTRACTOR = Pattern.compile(".*\\.html\\(.<iframe.* src=\"([^\"]*)\".*"); | ||
|
||
private final HtmlService htmlService; | ||
private final ImageService imageService; | ||
|
||
public GulliUpdater(PodcastServerParameters podcastServerParameters, SignatureService signatureService, Validator validator, HtmlService htmlService, ImageService imageService) { | ||
super(podcastServerParameters, signatureService, validator); | ||
this.htmlService = htmlService; | ||
this.imageService = imageService; | ||
} | ||
|
||
@Override | ||
public Set<Item> getItems(Podcast podcast) { | ||
return htmlService.get(podcast.getUrl()) | ||
.map(d -> d.select("div.all-videos ul li.col-md-3")) | ||
.map(this::asSet) | ||
.map(HashSet::toJavaSet) | ||
.getOrElse(Sets.newHashSet()); | ||
} | ||
|
||
private HashSet<Item> asSet(Elements elements) { | ||
return elements.stream() | ||
.map(this::findDetailsInFromPage) | ||
.collect(collector()); | ||
} | ||
|
||
private Item findDetailsInFromPage(Element e) { | ||
return Option.of(e.select("a").first()) | ||
.map(elem -> elem.attr("href")) | ||
.flatMap(htmlService::get) | ||
.flatMap(Option::of) | ||
.map(d -> d.select(".bloc_streaming").first()) | ||
.flatMap(this::htmlToItem) | ||
.flatMap(Option::of) | ||
.map(i -> i.setCover(getCover(e))) | ||
.getOrElse(Item.DEFAULT_ITEM); | ||
} | ||
|
||
private Option<Item> htmlToItem(Element block) { | ||
return List.ofAll(block.select("script")) | ||
.find(e -> e.html().contains("iframe")) | ||
.map(Element::html) | ||
.map(FRAME_EXTRACTOR::matcher) | ||
.filter(Matcher::find) | ||
.map(m -> m.group(1)) | ||
.map(url -> Item.builder() | ||
.title(block.select(".episode_title").text()) | ||
.description(block.select(".description").text()) | ||
.url(url) | ||
.pubDate(ZonedDateTime.now()) | ||
.build()); | ||
} | ||
|
||
private Cover getCover(Element block) { | ||
return Option.of(block) | ||
.map(e -> e.select("img").attr("src")) | ||
.map(imageService::getCoverFromURL) | ||
.getOrElse(Cover.DEFAULT_COVER); | ||
} | ||
|
||
@Override | ||
public String signatureOf(Podcast podcast) { | ||
return htmlService.get(podcast.getUrl()) | ||
.map(d -> d.select("div.all-videos ul").first()) | ||
.map(Element::html) | ||
.map(signatureService::generateMD5Signature) | ||
.getOrElse(StringUtils.EMPTY); | ||
} | ||
|
||
@Override | ||
public Type type() { | ||
return new Type("Gulli", "Gulli"); | ||
} | ||
|
||
@Override | ||
public Integer compatibility(String url) { | ||
return url.contains("replay.gulli.fr") ? 1 : Integer.MAX_VALUE; | ||
} | ||
} |
8 changes: 8 additions & 0 deletions
8
...end/src/test/java/lan/dk/podcastserver/manager/worker/downloader/GulliDownloaderTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
import static org.junit.Assert.*; | ||
|
||
/** | ||
* Created by kevin on 12/10/2016 for Podcast Server | ||
*/ | ||
public class GulliDownloaderTest { | ||
|
||
} |