---
# Tagesschau API


In [1]:
import requests


In [2]:
# Endpoints
url = "https://www.tagesschau.de"

endpoints = {
    "homepage": "/api2/homepage",
    "news": "/api2/news",
    "newsfeed": "/api2/newsfeed-101~_date-{date}.json",  # yymmdd format
}


# "enums"
ressorts_and_topics = [
    "inland",
    "ausland",
    "wirtschaft",
    "sport",
    "video",
    "investigativ",
    "faktenfinder",
]

regions = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]

region_mapping = {
    1: "Baden-Württemberg",
    2: "Bayern",
    3: "Berlin",
    4: "Brandenburg",
    5: "Bremen",
    6: "Hamburg",
    7: "Hessen",
    8: "Mecklenburg-Vorpommern",
    9: "Niedersachsen",
    10: "Nordrhein-Westfalen",
    11: "Rheinland-Pfalz",
    12: "Saarland",
    13: "Sachsen",
    14: "Sachsen-Anhalt",
    15: "Schleswig-Holstein",
    16: "Thüringen",
}

types = ["story", "webview", "video"]

# Blacklists
blacklist_url = ["liveblog"]
blacklist_type = ["video", "webview"]


---
# Endpoint:  newsfeed


In [3]:
response_newsfeed = requests.get(f"{url}{endpoints['newsfeed'].format(date='221106')}")

if response_newsfeed.status_code == 200:
    parsed_response_newsfeed = response_newsfeed.json()
else:
    print(response_newsfeed.status_code)
    print(response_newsfeed.text)


In [4]:
parsed_response_newsfeed.keys()

dict_keys(['news', 'regional', 'newStoriesCountLink', 'type', 'nextPage'])

In [22]:
len(parsed_response_newsfeed["news"])


160

In [6]:
list(parsed_response_newsfeed["news"][0].keys())


['sophoraId',
 'externalId',
 'title',
 'teaserImage',
 'date',
 'tracking',
 'tags',
 'updateCheckUrl',
 'regionId',
 'details',
 'detailsweb',
 'shareURL',
 'topline',
 'firstSentence',
 'geotags',
 'ressort',
 'type']

In [7]:
all_details_links_from_newsfeed = [news["details"] for news in parsed_response_newsfeed["news"] if news.get("details")]


---
# Endpoint: news


In [8]:
response_news = requests.get(f"{url}{endpoints['news']}")

if response_news.status_code == 200:
    parsed_response_news = response_news.json()
else:
    print(response_news.status_code)
    print(response_news.text)


In [9]:
parsed_response_news["news"]


318

In [10]:
all_tags_from_news = [
    [i["tag"] for i in news["tags"]]
    for news in parsed_response_news["news"]
    if news.get("tags")
]


In [35]:
parsed_response_news["news"][0]


{'sophoraId': 'ndr-mecklenburg-vorpommern-story-29713',
 'externalId': 'lra_feed_imports_ndr_id-2489a916-7188-4812-8f27-d27746c9ec65',
 'title': 'Medien: Disney zahlt Schnäppchenpreis für "Global Dream"',
 'teaserImage': {'title': 'Das im Bau befindlichen Kreuzfahrtschiff "Global Dream" - auch als "Global One" bekannt - liegt im Baudock der ansonsten leeren Schiffbauhalle der ehemaligen MV-Werft.',
  'copyright': 'dpa',
  'alttext': 'Das im Bau befindlichen Kreuzfahrtschiff "Global Dream" - auch als "Global One" bekannt - liegt im Baudock der ansonsten leeren Schiffbauhalle der ehemaligen MV-Werft.',
  'type': 'image',
  'videowebl': {'imageurl': 'https://www.tagesschau.de/ardimport/regional/mecklenburgvorpommern/ndr-mecklenburg-vorpommern-image-64217~_v-videowebl.jpg'},
  'klein1x1': {'imageurl': 'https://www.tagesschau.de/ardimport/regional/mecklenburgvorpommern/ndr-mecklenburg-vorpommern-image-64217~_v-klein1x1.jpg'},
  'portraetgrossplus8x9': {'imageurl': 'https://www.tagesschau.de

---
# Get individual news article from news details


In [11]:
whitelist_type = ["headline", "text"]


In [12]:
response_news_article = requests.get(parsed_response_news["news"][0]["details"])

if response_news_article.status_code == 200:
    parsed_response_news_article = response_news_article.json()
else:
    print(response_news_article.status_code)
    print(response_news_article.text)


In [13]:
list(parsed_response_news_article.keys())


['sophoraId',
 'externalId',
 'title',
 'teaserImage',
 'content',
 'date',
 'tracking',
 'tags',
 'updateCheckUrl',
 'regionId',
 'regionIds',
 'images',
 'details',
 'detailsweb',
 'shareURL',
 'topline',
 'firstSentence',
 'geotags',
 'brandingImage',
 'type',
 'breakingNews']

In [14]:
parsed_response_news_article["title"]


'Medien: Disney zahlt Schnäppchenpreis für "Global Dream"'

In [15]:
print("\n\n".join([i["value"] for i in parsed_response_news_article["content"] if i["type"] in whitelist_type]))


<strong>Der Disney-Konzern hat das auf den insolventen MV-Werften gebaute Kreuzfahrtschiff "Global Dream" offenbar für den Schnäppchenpreis von 40 Millionen Euro bekommen. Das berichten die Magazine "Capital" und "stern". Das Land Mecklenburg-Vorpommern könnte auf einem dreistelligen Millionenschaden sitzen bleiben.</strong>

Ursprünglich war das mehr als 340 Meter lange Kreuzfahrtschiff laut dem Bericht von "Capital" und "stern" auf einen Preis von rund 1,8 Milliarden Euro taxiert worden - wenn es fertiggestellt wäre. Nach jüngsten Angaben des MV-Werften-Insolvenzverwalters Christoph Morgen ist die in der Werfthalle in Wismar liegende "Global Dream" zu rund 60 Prozent fertiggestellt. Den Angaben zufolge werde Disney, das eine eigene Kreuzfahrtsparte im Portfolio hat, das Schiff ohne Gewährleistungsansprüche übernehmen und es auf eigenes Risiko und auf eigene Kosten fertig- und umbauen. Disney hatte das Schiff Mitte November gekauft, über den Kaufpreis war Stillschweigen vereinbart wor

---
# Application


In [1]:
import logging
from typing import Union

import requests
from pydantic import BaseModel


In [66]:
class DynamicConfig(BaseModel):
    keywords: list

    def __init__(self, **data):
        data["keywords"] = [keyword.lower() for keyword in data["keywords"]]
        super().__init__(**data)


class ArticleInfo(BaseModel):
    id: str  # sophoraId
    title: str
    tags: list[str]
    link_json: str  # details
    link_web: str  # detailsweb
    timestamp: str  # date


In [25]:
logging.basicConfig(
    format="%(levelname)s - %(asctime)s -\t%(message)s",
    level=logging.DEBUG,
    datefmt="%Y-%m-%d %I:%M:%S",
)


In [10]:
# Application config

# Endpoints
url = "https://www.tagesschau.de"

endpoints = {
    "homepage": "/api2/homepage",
    "news": "/api2/news",
    "newsfeed": "/api2/newsfeed-101~_date-{date}.json",  # yymmdd format
}


# "enums"
ressorts_and_topics = [
    "inland",
    "ausland",
    "wirtschaft",
    "sport",
    "video",
    "investigativ",
    "faktenfinder",
]

regions = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]

region_mapping = {
    1: "Baden-Württemberg",
    2: "Bayern",
    3: "Berlin",
    4: "Brandenburg",
    5: "Bremen",
    6: "Hamburg",
    7: "Hessen",
    8: "Mecklenburg-Vorpommern",
    9: "Niedersachsen",
    10: "Nordrhein-Westfalen",
    11: "Rheinland-Pfalz",
    12: "Saarland",
    13: "Sachsen",
    14: "Sachsen-Anhalt",
    15: "Schleswig-Holstein",
    16: "Thüringen",
}

types = ["story", "webview", "video"]

# Blacklists
blacklist_url = ["liveblog"]
blacklist_type = ["video", "webview"]


In [70]:
def get_news() -> list[dict]:
    logging.info("Get request to news endpoint")
    response_news = requests.get(f"{url}{endpoints['news']}")

    if response_news.status_code == 200:
        logging.info("Received status code 200")
        return response_news.json()["news"]
    else:
        raise Exception(f"""Error
        Expected status code:  200
        Got status code:  {response_news.status_code}
        Response text:
        {response_news.text}
        """)


def get_unique_tags(news: list[dict]) -> list[Union[str, None]]:
    tags = [[j["tag"] for j in i["tags"]] for i in news]
    return list(set([inner for outer in tags for inner in outer]))


def get_relevant_articles(
    news: list[dict],
    keywords: list[str],
) -> list[Union[ArticleInfo, None]]:
    relevant_articles = []

    for article in news:
        current_title = article["title"].lower()
        current_tags = [i["tag"].lower() for i in article["tags"]]

        logging.info(f"Current title:  {current_title}")
        logging.info(f"Current tags:  {current_tags}")

        if [word for word in current_title.split() if any(keyword in word for keyword in keywords)] or \
            [tag for tag in current_tags if any(keyword in tag for keyword in keywords)]:
            try:
                relevant_articles.append(
                    ArticleInfo(
                        id=article["sophoraId"],
                        title=article["title"],  # Fresh parse to keep original title formatting
                        tags=current_tags,
                        link_json=article["details"],
                        link_web=article["detailsweb"],
                        timestamp=article["date"],
                    )
                )
            except KeyError as error:
                logging.info(f"KeyError detected:  {error}")
                logging.info(f"\tarticle.get('type'):  {article.get('type')}")
                logging.info(f"\tarticle.get('shareURL'):  {article.get('shareURL')}")

    logging.info(f"Found {len(relevant_articles)} relevant articles")
    return relevant_articles


In [34]:
dynamic_config = DynamicConfig(keywords=["Ukraine", "katar"])


In [36]:
news = get_news()


INFO - 2022-11-29 09:25:29:	Get request to news endpoint
INFO - 2022-11-29 09:25:29:	Received status code 200


In [68]:
relevant_articles = get_relevant_articles(
    news=news,
    keywords=dynamic_config.keywords
)


INFO - 2022-11-29 09:52:46:	Current title:  kritik an geplanten grundschulprojekten in bw
INFO - 2022-11-29 09:52:46:	Current tags:  ['baden-württemberg']
INFO - 2022-11-29 09:52:46:	Current title:  lemke will gasförderung vor borkum abwenden
INFO - 2022-11-29 09:52:46:	Current tags:  ['borkum', 'gasförderung']
INFO - 2022-11-29 09:52:46:	Current title:  fliegerbombe in wilhelmsburg erfolgreich entschärft
INFO - 2022-11-29 09:52:46:	Current tags:  ['hamburg']
INFO - 2022-11-29 09:52:46:	Current title:  warum es in thüringen eine einzige wasserstofftankstelle gibt
INFO - 2022-11-29 09:52:46:	Current tags:  ['thüringen']
INFO - 2022-11-29 09:52:46:	Current title:  baby bei geburt gestorben: hebamme zu haftstrafe verurteilt
INFO - 2022-11-29 09:52:46:	Current tags:  ['niedersachsen']
INFO - 2022-11-29 09:52:46:	Current title:  verkehrsminister: 49-euro-ticket soll zum 1. april starten
INFO - 2022-11-29 09:52:46:	Current tags:  ['niedersachsen']
INFO - 2022-11-29 09:52:46:	Current title:  

In [71]:
relevant_articles[0]


ArticleInfo(id='justizminister-treffen-ukraine-kriegsverbrechen-101', title='Gemeinsame Jagd Auf Kriegsverbrecher', tags=['ukraine', 'russland', 'kriegsverbrechen'], link_json='https://www.tagesschau.de/api2/inland/justizminister-treffen-ukraine-kriegsverbrechen-101.json', link_web='https://www.tagesschau.de/inland/justizminister-treffen-ukraine-kriegsverbrechen-101.html', timestamp='2022-11-29T20:05:26.108+01:00')

In [73]:
print(relevant_articles[0])


id='justizminister-treffen-ukraine-kriegsverbrechen-101' title='Gemeinsame Jagd Auf Kriegsverbrecher' tags=['ukraine', 'russland', 'kriegsverbrechen'] link_json='https://www.tagesschau.de/api2/inland/justizminister-treffen-ukraine-kriegsverbrechen-101.json' link_web='https://www.tagesschau.de/inland/justizminister-treffen-ukraine-kriegsverbrechen-101.html' timestamp='2022-11-29T20:05:26.108+01:00'


---
# ntfy


In [76]:
ntfy_url = "https://ntfy.sh"

topic = "news-ticker"

def post_news_alert(
    title: str,
    data: str,
    link_web: str,
    topic: str=topic,
    ntfy_url: str=ntfy_url,
) -> None:
    logging.info("Post news alert")
    requests.post(f"{ntfy_url}/{topic}",
        data=data,
        headers={
            "Title": title,
            "Click": link_web,
            # "Priority": "urgent",
            # "Tags": "warning,skull"
        })
        
    logging.info("News alert posted")


In [81]:
from time import sleep

for article in relevant_articles:
    post_news_alert(
        title="Source:  tagesschau.de",
        data=article.title,
        link_web=article.link_web,
    )


INFO - 2022-11-29 10:12:46:	Post news alert
INFO - 2022-11-29 10:12:46:	News alert posted
INFO - 2022-11-29 10:12:46:	Post news alert
INFO - 2022-11-29 10:12:47:	News alert posted
INFO - 2022-11-29 10:12:47:	Post news alert
INFO - 2022-11-29 10:12:47:	News alert posted
INFO - 2022-11-29 10:12:47:	Post news alert
INFO - 2022-11-29 10:12:48:	News alert posted
INFO - 2022-11-29 10:12:48:	Post news alert
INFO - 2022-11-29 10:12:48:	News alert posted
INFO - 2022-11-29 10:12:48:	Post news alert
INFO - 2022-11-29 10:12:49:	News alert posted
INFO - 2022-11-29 10:12:49:	Post news alert
INFO - 2022-11-29 10:12:49:	News alert posted
INFO - 2022-11-29 10:12:49:	Post news alert
INFO - 2022-11-29 10:12:50:	News alert posted
INFO - 2022-11-29 10:12:50:	Post news alert
INFO - 2022-11-29 10:12:50:	News alert posted
INFO - 2022-11-29 10:12:50:	Post news alert
INFO - 2022-11-29 10:12:50:	News alert posted
INFO - 2022-11-29 10:12:50:	Post news alert
INFO - 2022-11-29 10:12:51:	News alert posted
INFO - 202