In [1]:
import sqlite3
import re

import pandas as pd

In [2]:
def load_log(path_to_log):
    ans = list()
    matcher = re.compile(r'^Written:\s(.+)$')
    with open(path_to_log) as infile:
        for line in infile:
            if (matched := matcher.match(line)) is not None:
                ans.append(matched.groups()[0])
    return pd.Series(ans)


def load_db(path_to_db):
    with sqlite3.connect(path_to_db) as db:
        cursor = (db
                  .cursor()
                  .execute('SELECT an, ingestion_datetime FROM received_articles'))
        columns = [t[0] for t in cursor.description]
        data = cursor.fetchall()
    return pd.DataFrame(data, columns=columns)

# NodeJS

In [6]:
ans_log_nodejs = load_log('log_nodejs.log')
ans_log_python = load_log('log_python.log')
ans_db_nodejs = load_db('test_listener_nodejs.sqlite3')
ans_db_python = load_db('test_listener_python.sqlite3')
display(ans_log_nodejs)
display(ans_log_python)
display(ans_db_nodejs)
display(ans_db_python)

0        PRNDIS0020241126ekbq005eh
1        DJDN000020241126ekbq002jb
2        DJDN000020241126ekbq002kv
3        AFNWS00020241126ekbq001gu
4        NWMONI0020241126ekbq00tv1
                   ...            
44663    ASANEW0020241126ekbr000rt
44664    JALOPNK020241126ekbq000b6
44665    JALOPNK020241126ekbq000b8
44666    ASANEW0020241126ekbr000rt
44667    JALOPNK020241126ekbq000b5
Length: 44668, dtype: object

0        PRESSA0020241126ekbq0050l
1        NWMONI0020241126ekbq00tv1
2        AFNWS00020241126ekbq001gt
3        AFNWS00020241126ekbq001gw
4        AFNWS00020241126ekbq001gu
                   ...            
44656    ASANEW0020241126ekbr000rt
44657    JALOPNK020241126ekbq000b5
44658    JALOPNK020241126ekbq000b7
44659    NMNR000020241126ekbq0008d
44660    NMNR000020241126ekbq0008e
Length: 44661, dtype: object

Unnamed: 0,an,ingestion_datetime
0,PRNDIS0020241126ekbq005eh,2024-11-26T17:27:52.000Z
1,DJDN000020241126ekbq002jb,2024-11-26T17:27:53.000Z
2,DJDN000020241126ekbq002kv,2024-11-26T17:27:52.000Z
3,AFNWS00020241126ekbq001gu,2024-11-26T17:30:15.000Z
4,NWMONI0020241126ekbq00tv1,2024-11-26T17:30:14.000Z
...,...,...
44663,ASANEW0020241126ekbr000rt,2024-11-26T22:30:17.000Z
44664,JALOPNK020241126ekbq000b6,2024-11-26T22:30:18.000Z
44665,JALOPNK020241126ekbq000b8,2024-11-26T22:30:18.000Z
44666,ASANEW0020241126ekbr000rt,2024-11-26T22:30:17.000Z


Unnamed: 0,an,ingestion_datetime
0,PRESSA0020241126ekbq0050l,2024-11-26T13:09:21.000Z
1,NWMONI0020241126ekbq00tv1,2024-11-26T17:30:14.000Z
2,AFNWS00020241126ekbq001gt,2024-11-26T17:30:14.000Z
3,AFNWS00020241126ekbq001gw,2024-11-26T17:30:15.000Z
4,AFNWS00020241126ekbq001gu,2024-11-26T17:30:15.000Z
...,...,...
44656,ASANEW0020241126ekbr000rt,2024-11-26T22:30:17.000Z
44657,JALOPNK020241126ekbq000b5,2024-11-26T22:30:18.000Z
44658,JALOPNK020241126ekbq000b7,2024-11-26T22:30:18.000Z
44659,NMNR000020241126ekbq0008d,2024-11-26T22:30:21.000Z


In [4]:
ans_log_nodejs[~ans_log_nodejs.isin(ans_log_python)]

0     PRNDIS0020241126ekbq005eh
8     MTPW000020241126ekbq005pl
27    LBA0000020241126ekbq0139x
46    MTPW000020241126ekbq005pm
51    LBA0000020241126ekbq00wy6
67    CNNW000020241126ekbq0058x
dtype: object

In [5]:
ans_log_python[~ans_log_python.isin(ans_log_nodejs)]

44658    JALOPNK020241126ekbq000b7
44659    NMNR000020241126ekbq0008d
44660    NMNR000020241126ekbq0008e
dtype: object

In [7]:
ans_db_nodejs[ans_db_nodejs['an'].isin(_4)]

Unnamed: 0,an,ingestion_datetime
0,PRNDIS0020241126ekbq005eh,2024-11-26T17:27:52.000Z
8,MTPW000020241126ekbq005pl,2024-11-26T17:27:40.000Z
27,LBA0000020241126ekbq0139x,2024-11-26T17:27:46.000Z
46,MTPW000020241126ekbq005pm,2024-11-26T17:27:40.000Z
51,LBA0000020241126ekbq00wy6,2024-11-26T14:50:34.000Z
67,CNNW000020241126ekbq0058x,2024-11-26T17:27:52.000Z


In [8]:
ans_db_python[ans_db_python['an'].isin(_5)]

Unnamed: 0,an,ingestion_datetime
44658,JALOPNK020241126ekbq000b7,2024-11-26T22:30:18.000Z
44659,NMNR000020241126ekbq0008d,2024-11-26T22:30:21.000Z
44660,NMNR000020241126ekbq0008e,2024-11-26T22:30:21.000Z


In [18]:
pd.to_datetime(ans_db_nodejs['ingestion_datetime'])

0       2024-11-26 17:27:52+00:00
1       2024-11-26 17:27:53+00:00
2       2024-11-26 17:27:52+00:00
3       2024-11-26 17:30:15+00:00
4       2024-11-26 17:30:14+00:00
                   ...           
44663   2024-11-26 22:30:17+00:00
44664   2024-11-26 22:30:18+00:00
44665   2024-11-26 22:30:18+00:00
44666   2024-11-26 22:30:17+00:00
44667   2024-11-26 22:30:18+00:00
Name: ingestion_datetime, Length: 44668, dtype: datetime64[ns, UTC]