In [41]:
import pandas as pd 
import numpy as np 
from datetime import date, datetime, timedelta

import requests
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry

from bs4 import BeautifulSoup

import dash
import dash_bootstrap_components as dbc
from dash import html


import gdelt # for gdelt searchs

In [42]:
gdv2 = gdelt.gdelt(version=2) # Instantiate the GDELT object for searches
current_date = date.today().strftime('%Y %b %d') # Get the current date formatted as 'YYYY MMM DD'
print(current_date) # Print the current date

# current_gkg = gdv2.Search(date=['2024 Oct 08', current_date], table='gkg', normcols = True, coverage=True) # Search most recent GKG records from October 8, 2024 to today
# current_gkg = gdv2.Search(date=[current_date], table='gkg', normcols = True, coverage=True) # Search most recent GKG records from today.
current_gkg = gdv2.Search(date=[current_date], table='gkg',
                            normcols = True, coverage=True
                            # translation=True
                            ) # Search most recent GKG records from today.
print("The number of GKG records is", len(current_gkg)) # number of GKG records returned

2024 Oct 23
The number of GKG records is 122606


In [43]:
gkg = current_gkg.copy() # Copy the GKG records to a new DataFrame

In [44]:
gkg.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 122606 entries, 0 to 122605
Data columns (total 27 columns):
 #   Column                      Non-Null Count   Dtype  
---  ------                      --------------   -----  
 0   gkgrecordid                 122606 non-null  object 
 1   date                        122606 non-null  int64  
 2   sourcecollectionidentifier  122606 non-null  int64  
 3   sourcecommonname            122606 non-null  object 
 4   documentidentifier          122606 non-null  object 
 5   counts                      13887 non-null   object 
 6   v2counts                    13887 non-null   object 
 7   themes                      110767 non-null  object 
 8   v2themes                    110767 non-null  object 
 9   locations                   91415 non-null   object 
 10  v2locations                 91242 non-null   object 
 11  persons                     94326 non-null   object 
 12  v2persons                   93207 non-null   object 
 13  organizations 

In [45]:
# subset gkg where v2persons contains Eiichiro Oda Shueisha or One Piece
gkg_op = gkg[gkg['v2persons'].str.contains('Eiichiro Oda|Shueisha|One Piece', case=False, na=False)].copy()
gkg_op['date'] = pd.to_datetime(gkg_op['date'], format='%Y%m%d%H%M%S')
gkg_op

Unnamed: 0,gkgrecordid,date,sourcecollectionidentifier,sourcecommonname,documentidentifier,counts,v2counts,themes,v2themes,locations,...,gcam,sharingimage,relatedimages,socialimageembeds,socialvideoembeds,quotations,allnames,amounts,translationinfo,extras
6409,20241023004500-1014,2024-10-23 00:45:00,1,gamerant.com,https://gamerant.com/one-piece-why-loki-called...,,,TAX_FNCACT;TAX_FNCACT_PRINCE;TAX_FNCACT_FATHER...,"DELAY,867;USPEC_UNCERTAINTY1,867;CRISISLEX_C04...",,...,"wc:1304,c1.4:3,c12.1:170,c12.10:157,c12.12:49,...",https://static0.gamerantimages.com/wordpress/w...,https://static0.gamerantimages.com/wordpress/w...,,https://youtube.com/channel/UCkZjsmAQnXfS-_5lw...,,"Accursed Prince,43;Devil Fruit,139;Gura Gura,1...","2,children,1293;",,<PAGE_LINKS>https://gamerant.com/one-piece-cha...
12389,20241023014500-1130,2024-10-23 01:45:00,1,comicbook.com,https://comicbook.com/anime/news/one-piece-mar...,,,,,,...,"wc:632,c1.4:1,c12.1:44,c12.10:71,c12.12:23,c12...",https://comicbook.com/wp-content/uploads/sites...,,,https://youtube.com/user/ComicBookCom/playlists;,,"One Piece Fan Letter,21;One Piece,102;Egghead ...","7,Warlords system not long,1717;7,Warlords,2721;",,<PAGE_LINKS>https://comicbook.com/anime/news/a...
30350,20241023053000-599,2024-10-23 05:30:00,1,fandompost.com,https://www.fandompost.com/2024/10/22/6th-one-...,,,MEDIA_MSM;MANMADE_DISASTER_IMPLIED;PUBLIC_TRAN...,"TAX_ETHNICITY_JAPANESE,142;TAX_WORLDLANGUAGES_...",1#South Africa#SF#SF#-30#26#SF;1#Australia#AS#...,...,"wc:232,c12.1:11,c12.10:16,c12.12:2,c12.13:3,c1...",,,,,,"Eiichiro Oda,24;Fuji Television,62;One Piece,1...","500,yen for the Blu,334;500,yen for the DVD,358;",,<PAGE_LINKS>https://fandompost.vbulletin.net/f...
65543,20241023120000-107,2024-10-23 12:00:00,1,screenrant.com,https://screenrant.com/one-piece-new-chapter-1...,,,TAX_FNCACT;TAX_FNCACT_PRINCE;MANMADE_DISASTER_...,"TAX_FNCACT_PRINCE,600;TAX_FNCACT_PRINCE,2328;T...",1#Japan#JA#JA#36#138#JA,...,"wc:881,c1.4:3,c12.1:79,c12.10:88,c12.12:17,c12...",https://static1.srcdn.com/wordpress/wp-content...,,,https://youtube.com/user/screenrant?sub_confir...,,"One Piecechapter,17;Straw Hat,216;Sun God,294;...","2,issues of Shueisha Weekly,1126;2,consecutive...",,<PAGE_LINKS>https://screenrant.com/one-piece-1...
117995,20241023181500-194,2024-10-23 18:15:00,1,comicbook.com,https://comicbook.com/anime/news/one-piece-man...,,,TAX_WORLDLANGUAGES;TAX_WORLDLANGUAGES_MANGA;AR...,"TAX_FNCACT_FATHER,2214;MEDIA_MSM,1622;TAX_FNCA...",,...,"wc:596,c1.3:2,c12.1:42,c12.10:57,c12.12:15,c12...",https://comicbook.com/wp-content/uploads/sites...,,,https://youtube.com/user/ComicBookCom/playlists;,,"One Piece,10;Future Island,224;Straw Hat Pirat...","2,season television series on,1346;",,<PAGE_AUTHORS>Evan Valentine</PAGE_AUTHORS><PA...


In [46]:
gkg_op['documentidentifier'].value_counts() # Count the number of records for each document identifier

https://gamerant.com/one-piece-why-loki-called-accursed-prince-explained/                                  1
https://comicbook.com/anime/news/one-piece-marineford-remake-anime-fan-letter/                             1
https://www.fandompost.com/2024/10/22/6th-one-piece-blu-ray-anime-egghead-island-arc-tv-disc-scheduled/    1
https://screenrant.com/one-piece-new-chapter-1131-release-date-time/                                       1
https://comicbook.com/anime/news/one-piece-manga-loki-marvel-easter-egg/                                   1
Name: documentidentifier, dtype: int64

We see that every document in the gkg network is unique. Let us assume it is the case the `sourcecommonname` is the publisher of each unique document. 

This means we can track the volume published by a source in time.

# Get Article Title Using Beautiful Soup

In [47]:
def fetch_article_title(url):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
    
    session = requests.Session()
    retry = Retry(connect=2, backoff_factor=0.5)
    adapter = HTTPAdapter(max_retries=retry)
    session.mount('http://', adapter)
    session.mount('https://', adapter)
    
    try:
        response = session.get(url, headers=headers)
        response.raise_for_status()
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # Try different potential title locations
        title_tag = soup.find('h1', class_='article-header-title')
        if not title_tag:
            title_tag = soup.find('h1', class_='post-title')
        
        # If still not found, look for any h1 tag
        if not title_tag:
            title_tag = soup.find('h1')
        
        if title_tag:
            return title_tag.get_text(strip=True)
        return "Title not found"
    except requests.exceptions.RequestException as e:
        return f"Error: {e}"

# Fetch titles for all URLs
gkg_op['title'] = gkg_op['documentidentifier'].apply(fetch_article_title)

In [48]:
gkg_op['title'].value_counts()

One Piece: Why Loki Is Called The Accursed Prince, Explained          1
One Piece Fan Letter Proves Marineford Is the Anime’s Best Arc        1
6th ‘One Piece’ Blu-ray Anime Egghead Island Arc TV Disc Scheduled    1
One Piece Chapter #1131 Release Date & Time                           1
One Piece Orchestrates the Perfect Marvel Nod With Loki’s Debut       1
Name: title, dtype: int64

# One Piece News Dashboard

In [49]:
import webbrowser
from webbrowser import BackgroundBrowser

# Define the path to Microsoft Edge executable
# Note: Update the path to the actual location of your Microsoft Edge executable
edge_path = "C:/Program Files (x86)/Microsoft/Edge/Application/msedge.exe"

# Register Microsoft Edge as a browser type
# The 'BackgroundBrowser' class is used to pass the path of the browser executable
class Edge(BackgroundBrowser):
    def __init__(self, path):
        super().__init__(path)


# Register Microsoft Edge as a browser type
webbrowser.register('edge', None, webbrowser.BackgroundBrowser(edge_path))

In [50]:
# Create the Dash app
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])


# Create the image grid with titles
image_grid = html.Div(
    [
        html.Div(
            [
                html.A(
                    [
                        html.Img(src=row['sharingimage'], className="masonry-item"),
                        html.Div(row['title'], className="image-title")
                    ],
                    href=row['documentidentifier'],
                    target="_blank"
                )
            ],
            className="masonry-item-wrapper"
        )
        for _, row in gkg_op.iterrows()
    ],
    className="masonry"
)

app.layout = dbc.Container(
    [
        html.H1("One Piece Image Grid"),
        image_grid
    ],
    fluid=True,
    style={"padding": "10px"}
)

if __name__ == '__main__':
    import webbrowser
    url = "http://127.0.0.1:50779/"
    webbrowser.get('edge').open_new(url)
    app.run_server(debug=True, port=50779)