In [89]:
from pymongo import MongoClient
import os
from datetime import datetime, timedelta, timezone
import pandas as pd

DB_CONNECTION = os.getenv("DB_CONNECTION")
CLIENT = MongoClient(DB_CONNECTION)

In [90]:
start_hours = -1
end_hours = 0
start_date = datetime.now(timezone.utc) + timedelta(hours=start_hours)
end_data = datetime.now(timezone.utc) + timedelta(hours=end_hours)
duplicate_count = 3
result = CLIENT["pingslurp"]["all_podpings"].aggregate(
    [
        {
            "$match": {
                "$and": [
                    {"timestamp": {"$gt": start_date}},
                    {"timestamp": {"$lt": end_data}},
                ]
            }
        },
        {
            "$unwind": {
                "path": "$iris",
                "includeArrayIndex": "iri_pos",
                "preserveNullAndEmptyArrays": True,
            }
        },
        {"$group": {"_id": "$iris", "count": {"$sum": 1}}},
        {"$match": {"count": {"$gte": duplicate_count}}},
        {"$sort": {"count": -1}},
    ]
)

In [91]:
df = pd.DataFrame(result)
df.rename(columns={"_id": "iri"}, inplace=True)
df

Unnamed: 0,iri,count
0,https://media.rss.com/peacethebible/feed.xml,68
1,https://100mba.net/category/show/feed/,31
2,https://100mba.net/feed/podcast/,25
3,https://100mba.net/category/about-omar/feed/,19
4,http://newsradio1620.com/feed/podcast,12
...,...,...
64,https://feeds.buzzsprout.com/1373014.rss,3
65,https://feeds.buzzsprout.com/1470802.rss,3
66,https://feeds.buzzsprout.com/1742934.rss,3
67,https://feeds.buzzsprout.com/2093014.rss,3


In [92]:
df.set_index("count", inplace=True)

In [93]:
df['iri'].iloc[0]

'https://media.rss.com/peacethebible/feed.xml'

In [99]:
check_show = df['iri'].iloc[0]
filter={
    'iris': check_show,
      "$and": [
          {"timestamp": {"$gt": start_date}},
          {"timestamp": {"$lt": end_data}},
      ]    
}
sort=list({
    'timestamp': -1
}.items())
one_show = CLIENT['pingslurp']['all_podpings'].find(
  filter=filter,
  sort=sort
)
df_one_show = pd.DataFrame(one_show)


In [100]:
df_one_show.timestamp.diff(periods=-1).agg(func=['min','max','mean','median','std'])

min                0 days 00:00:18
max                0 days 00:02:42
mean     0 days 00:00:52.343283582
median             0 days 00:00:45
std      0 days 00:00:31.134387224
Name: timestamp, dtype: timedelta64[ns]

In [96]:
df_one_show.set_index('timestamp')

Unnamed: 0_level_0,_id,required_posting_auths,json_size,num_iris,id,trx_id,block_num,trx_num,version,medium,reason,iris,timestampNs,sessionId,op_id,stored_hosts,stored_meta
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2023-03-08 09:34:42,640856b40c60ff85f2061274,[podping.ccc],177,1,pp_podcast_update,67357fe47672b4e34ee45e650d41b34c216d1a5e,72930774,30,1.1,podcast,update,[https://media.rss.com/peacethebible/feed.xml],2023-03-08 09:34:33.362,13217607545254245367,1,True,True
2023-03-08 09:33:54,640856820c60ff85f2061247,[podping.aaa],177,1,pp_podcast_update,81235c5724eab10a2a5978b62f1a776c51e35e4f,72930758,35,1.1,podcast,update,[https://media.rss.com/peacethebible/feed.xml],2023-03-08 09:33:44.292,13366996297696889532,1,True,True
2023-03-08 09:33:21,640856620c60ff85f2061233,[podping.bbb],219,2,pp_podcast_update,91ec117c13dc1c2428b89874ecd76be51c99cb00,72930747,17,1.1,podcast,update,"[https://feeds.buzzsprout.com/2126307.rss, htt...",2023-03-08 09:33:13.343,9291692092280165503,1,True,True
2023-03-08 09:31:00,640855d40c60ff85f20611be,[podping.bbb],176,1,pp_podcast_update,09c340ced2d19051c4e67922711f9d75f3bb8aad,72930700,17,1.1,podcast,update,[https://media.rss.com/peacethebible/feed.xml],2023-03-08 09:30:50.407,9291692092280165503,1,True,True
2023-03-08 09:30:09,640855a20c60ff85f2061191,[podping.bbb],215,2,pp_podcast_update,e6fd86792d941fdc5652f5ccfdb1c1eb2b6008ec,72930683,27,1.1,podcast,update,"[https://media.rss.com/peacethebible/feed.xml,...",2023-03-08 09:30:00.267,9291692092280165503,1,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-03-08 08:41:36,64084a420c60ff85f2060844,[podping.aaa],177,1,pp_podcast_update,202c1c994a887cfab688b6ea0fa749b30d0c6c42,72929715,50,1.1,podcast,update,[https://media.rss.com/peacethebible/feed.xml],2023-03-08 08:41:29.248,13366996297696889532,1,True,True
2023-03-08 08:39:39,640849cd0c60ff85f20607fe,[podping.aaa],177,1,pp_podcast_update,7cdd01a9292bfa58cc93cac948908f809c524b2f,72929676,49,1.1,podcast,update,[https://media.rss.com/peacethebible/feed.xml],2023-03-08 08:39:30.075,13366996297696889532,1,True,True
2023-03-08 08:37:33,6408494f0c60ff85f20607bb,[podping.aaa],177,1,pp_podcast_update,34b913ca4eb361b7900a1137c0672d3859a0b355,72929634,33,1.1,podcast,update,[https://media.rss.com/peacethebible/feed.xml],2023-03-08 08:37:23.301,13366996297696889532,1,True,True
2023-03-08 08:37:03,640849300c60ff85f20607ac,[podping.ccc],177,1,pp_podcast_update,8c590627a5fe5a4fea9a4e863bd7475424d7fd88,72929624,39,1.1,podcast,update,[https://media.rss.com/peacethebible/feed.xml],2023-03-08 08:36:55.256,13217607545254245367,1,True,True


In [97]:
pd.infer_freq(df_one_show.index)

'N'

In [98]:
df_one_show

Unnamed: 0,_id,required_posting_auths,json_size,num_iris,id,trx_id,block_num,timestamp,trx_num,version,medium,reason,iris,timestampNs,sessionId,op_id,stored_hosts,stored_meta
0,640856b40c60ff85f2061274,[podping.ccc],177,1,pp_podcast_update,67357fe47672b4e34ee45e650d41b34c216d1a5e,72930774,2023-03-08 09:34:42,30,1.1,podcast,update,[https://media.rss.com/peacethebible/feed.xml],2023-03-08 09:34:33.362,13217607545254245367,1,True,True
1,640856820c60ff85f2061247,[podping.aaa],177,1,pp_podcast_update,81235c5724eab10a2a5978b62f1a776c51e35e4f,72930758,2023-03-08 09:33:54,35,1.1,podcast,update,[https://media.rss.com/peacethebible/feed.xml],2023-03-08 09:33:44.292,13366996297696889532,1,True,True
2,640856620c60ff85f2061233,[podping.bbb],219,2,pp_podcast_update,91ec117c13dc1c2428b89874ecd76be51c99cb00,72930747,2023-03-08 09:33:21,17,1.1,podcast,update,"[https://feeds.buzzsprout.com/2126307.rss, htt...",2023-03-08 09:33:13.343,9291692092280165503,1,True,True
3,640855d40c60ff85f20611be,[podping.bbb],176,1,pp_podcast_update,09c340ced2d19051c4e67922711f9d75f3bb8aad,72930700,2023-03-08 09:31:00,17,1.1,podcast,update,[https://media.rss.com/peacethebible/feed.xml],2023-03-08 09:30:50.407,9291692092280165503,1,True,True
4,640855a20c60ff85f2061191,[podping.bbb],215,2,pp_podcast_update,e6fd86792d941fdc5652f5ccfdb1c1eb2b6008ec,72930683,2023-03-08 09:30:09,27,1.1,podcast,update,"[https://media.rss.com/peacethebible/feed.xml,...",2023-03-08 09:30:00.267,9291692092280165503,1,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
63,64084a420c60ff85f2060844,[podping.aaa],177,1,pp_podcast_update,202c1c994a887cfab688b6ea0fa749b30d0c6c42,72929715,2023-03-08 08:41:36,50,1.1,podcast,update,[https://media.rss.com/peacethebible/feed.xml],2023-03-08 08:41:29.248,13366996297696889532,1,True,True
64,640849cd0c60ff85f20607fe,[podping.aaa],177,1,pp_podcast_update,7cdd01a9292bfa58cc93cac948908f809c524b2f,72929676,2023-03-08 08:39:39,49,1.1,podcast,update,[https://media.rss.com/peacethebible/feed.xml],2023-03-08 08:39:30.075,13366996297696889532,1,True,True
65,6408494f0c60ff85f20607bb,[podping.aaa],177,1,pp_podcast_update,34b913ca4eb361b7900a1137c0672d3859a0b355,72929634,2023-03-08 08:37:33,33,1.1,podcast,update,[https://media.rss.com/peacethebible/feed.xml],2023-03-08 08:37:23.301,13366996297696889532,1,True,True
66,640849300c60ff85f20607ac,[podping.ccc],177,1,pp_podcast_update,8c590627a5fe5a4fea9a4e863bd7475424d7fd88,72929624,2023-03-08 08:37:03,39,1.1,podcast,update,[https://media.rss.com/peacethebible/feed.xml],2023-03-08 08:36:55.256,13217607545254245367,1,True,True
