From b02b08e0276db3a77bf57866959527efd0406e37 Mon Sep 17 00:00:00 2001 From: Ivan K Date: Mon, 1 Jun 2015 16:12:13 +0100 Subject: [PATCH] Added france24 rules --- goose/cleaners.py | 1 + goose/extractors.py | 1 + 2 files changed, 2 insertions(+) diff --git a/goose/cleaners.py b/goose/cleaners.py index 92d4fff5..bfc6ed97 100644 --- a/goose/cleaners.py +++ b/goose/cleaners.py @@ -61,6 +61,7 @@ 'blogs.wsj.com': '.socialByline, .linkBar, .pMetadataType-comments, .pMetadataType-button, #dShareTop', 'www.huffingtonpost.co.uk': '.hp-slideshow-wrapper', 'www.huffingtonpost.com': { 'reference': 'www.huffingtonpost.co.uk' }, + 'www.france24.com': '.article-action, .tag, .category, .modification, .title, .emission_title, .emission-social-tabs, .g-carusel', } class OutputFormatterCleaner(clean.Cleaner): diff --git a/goose/extractors.py b/goose/extractors.py index 608affc9..d1e85bfd 100644 --- a/goose/extractors.py +++ b/goose/extractors.py @@ -100,6 +100,7 @@ 'www.huffingtonpost.co.uk': '#mainentrycontent', 'www.huffingtonpost.com': { 'reference': 'www.huffingtonpost.co.uk' }, 'route.newsactus.com': '.article .left-part p', + 'www.france24.com': '.article-long', }