In [1]:
import logging
logging.basicConfig(
    level=logging.DEBUG,
    format="%(asctime)s - %(name)s - %(levelname)s -- l.%(lineno)d: %(message)s",
)

In [2]:
import pandas as pd

from src.newsletters.main import runner as runner_newsletters
from src.scoring.main import runner as runner_scoring
from src.reporting.report import Report

In [3]:
news_stories = runner_newsletters(after='2024-12-01', before='2024-12-03')
df_news_stories, target_fields = runner_scoring(news_stories)

2024-12-31 07:08:37,213 - googleapiclient.discovery_cache - INFO -- l.49: file_cache is only supported with oauth2client<4.0.0
2024-12-31 07:08:37,219 - src.newsletters.main - INFO -- l.26: Query emails
2024-12-31 07:08:37,220 - src.newsletters.gmail - DEBUG -- l.35: Fetch email for sender TLDR AI <dan@tldrnewsletter.com> from 2024-12-01 until 2024-12-03.
2024-12-31 07:08:37,224 - googleapiclient.discovery - DEBUG -- l.1258: URL being requested: GET https://gmail.googleapis.com/gmail/v1/users/me/messages?q=after%3A2024-12-01+before%3A2024-12-03+from%3ATLDR+AI+%3Cdan%40tldrnewsletter.com%3E&alt=json
2024-12-31 07:08:37,542 - googleapiclient.discovery - DEBUG -- l.1258: URL being requested: GET https://gmail.googleapis.com/gmail/v1/users/me/messages/19387c6f6bdf98de?alt=json
2024-12-31 07:08:37,670 - googleapiclient.discovery - DEBUG -- l.1258: URL being requested: GET https://gmail.googleapis.com/gmail/v1/users/me/messages/193871f73323b0e6?alt=json
2024-12-31 07:08:37,780 - src.newslett

In [4]:
report = Report(df_news_stories, target_fields)

In [5]:
news_stories_for_report = report.filtered_news_stories(min_score_threshold=3, min_nb_entries=5, min_pct_entries=0.1)
news_stories_for_report.keys()

2024-12-31 07:08:48,900 - src.reporting.report - DEBUG -- l.100: min_nb_entries=5, min_pct_entries_in_db=1, min_entries=5, score_min_entries=2
2024-12-31 07:08:48,902 - src.reporting.report - DEBUG -- l.106: title                       ELON MUSK SEEKS TO BLOCK OPENAI'S FOR-PROFIT C...
url                         https://www.theverge.com/2024/11/30/24309697/e...
news_provider                                                www.theverge.com
source_of_the_news                           TLDR AI <dan@tldrnewsletter.com>
text                                                                         
news_summary                Elon Musk's legal team has filed a motion to p...
date_source                                    Mon, 2 Dec 2024 14:29:42 +0000
competitive_intelligence                                                   []
themes                                                                [Legal]
market_intelligence                                                  [OpenAI]
personalitie

dict_keys(['COMPETTIVE_INTELLIGENCE', 'FUNDING', 'EVALUATION', 'THEMES'])

In [6]:
len(news_stories_for_report["THEMES"])

12

In [7]:
report_str = report.create_report(news_stories_for_report)

In [8]:
with open("test.txt", "w") as f:
    f.write(report_str)

In [11]:
df_news_stories

Unnamed: 0,title,url,news_provider,source_of_the_news,text,news_summary,date_source,competitive_intelligence,themes,market_intelligence,personalities,score
0,ELON MUSK SEEKS TO BLOCK OPENAI'S FOR-PROFIT C...,https://www.theverge.com/2024/11/30/24309697/e...,www.theverge.com,TLDR AI <dan@tldrnewsletter.com>,,Elon Musk's legal team has filed a motion to p...,"Mon, 2 Dec 2024 14:29:42 +0000",[],[Legal],[OpenAI],[],2
1,PERPLEXITY MULLS GETTING INTO HARDWARE,https://techcrunch.com/2024/11/26/perplexity-m...,techcrunch.com,TLDR AI <dan@tldrnewsletter.com>,,"Perplexity's CEO plans to develop a ""simple, u...","Mon, 2 Dec 2024 14:29:42 +0000",[],"[AI&GenAI, Multimodal]",[],[],2
2,INFLECTION AI CEO SAYS IT'S DONE TRYING TO MAK...,https://techcrunch.com/2024/11/26/inflection-c...,techcrunch.com,TLDR AI <dan@tldrnewsletter.com>,,Inflection AI shifted its focus from developin...,"Mon, 2 Dec 2024 14:29:42 +0000",[],"[AI&GenAI, Model]",[],[],2
3,INTELLECT-1 RELEASE: THE FIRST GLOBALLY TRAINE...,https://www.primeintellect.ai/blog/intellect-1...,www.primeintellect.ai,TLDR AI <dan@tldrnewsletter.com>,,INTELLECT-1 is a 10B parameter model trained o...,"Mon, 2 Dec 2024 14:29:42 +0000",[],"[Model Training, Evaluation]",[],[],2
4,DETECT AND LEARN UNSEEN OBJECTS,https://arxiv.org/abs/2411.18207v1,arxiv.org,TLDR AI <dan@tldrnewsletter.com>,,This new framework pushes object detection int...,"Mon, 2 Dec 2024 14:29:42 +0000",[],"[AI&GenAI, Multimodal]",[],[],2
5,MAKING UNDERWATER IMAGES CLEAR,https://arxiv.org/abs/2411.18296v1,arxiv.org,TLDR AI <dan@tldrnewsletter.com>,,HUPE is an AI-powered method that improves und...,"Mon, 2 Dec 2024 14:29:42 +0000",[],"[AI&GenAI, Multimodal]",[],[],2
6,MAPPING THE IONOSPHERE WITH THE POWER OF ANDROID,https://research.google/blog/mapping-the-ionos...,research.google,TLDR AI <dan@tldrnewsletter.com>,,Google researchers were able to accurately map...,"Mon, 2 Dec 2024 14:29:42 +0000",[],[],[Google],[],0
7,INTRODUCING LTNTORCH,https://arxiv.org/abs/2409.16045v1,arxiv.org,TLDR AI <dan@tldrnewsletter.com>,,Logic Tensor Networks (LTN) merge deep learnin...,"Mon, 2 Dec 2024 14:29:42 +0000",[],[ML&DL],[],[],1
8,REFINING PRETRAINING DATA PROGRAMMATICALLY,https://gair-nlp.github.io/ProX/homepage.html,gair-nlp.github.io,TLDR AI <dan@tldrnewsletter.com>,,ProX is a framework that treats data refinemen...,"Mon, 2 Dec 2024 14:29:42 +0000",[],"[Model, Model Training]",[],[],2
9,CHATS WITH VIDEOS IN REAL TIME,https://huggingface.co/wangyueqian/MMDuet,huggingface.co,TLDR AI <dan@tldrnewsletter.com>,,"MMDuet is a novel ""video-text duet"" interactio...","Mon, 2 Dec 2024 14:29:42 +0000",[],[AI&GenAI],[],[],1


In [22]:
df_news_stories.loc[10][["competitive_intelligence", "themes","market_intelligence", "personalities"]].sum()

['AI&GenAI', 'Model', 'Funding', 'OpenAI', 'NVIDIA']