In [90]:
import gdelt # for gdelt searchs
from gkg_tools import * # for gkg searchs
# %run "../gkg_tools.py" # using magic command run to access the script from the parent directory


import nltk
from nltk.tokenize import word_tokenize, sent_tokenize


from transformers import pipeline, set_seed
import torch

# # GPU Timing (using GPU 1) else -1 for CPU
# device_id = 1 if torch.cuda.is_available() else -1 

# Revised GKG Data Query
- When using `gkg.get_gkg()` the input can be a stored gkg table for the parameter `data=dataframe_name`.
- This avoids the possibility of storing two active gkg tables.
- `coverage=true` only applies to queries from the gdelt database.

## Using a gdelt query

In [91]:
gkg = gkg_operator() # create a gkg operator


In [92]:
gkg.set_date(['2024 Oct 20', '2024 Oct 21']) # set the date range for the search
gkg.get_gkg(coverage=True)


In [117]:
# # persons = ['Eiichiro Oda', 'Shueisha', 'One Piece']
src_name = ['gamerant.com','screenrant.com']
rant = gkg.gkg_query[gkg.gkg_query['sourcecommonname'].isin(src_name)]
# rant.info()
# search rant themes for the regex pattern MANGA, count entries
manga = rant[rant['themes'].str.contains('MANGA') == True]
manga.info()
# OP = gkg.gkg_persons(persons)
# OP.reset_index(inplace=True)
# store OP in a csv file
# OP.to_csv('OP.csv', index=False)
# OP = pd.read_csv('OP.csv')

<class 'pandas.core.frame.DataFrame'>
Int64Index: 7 entries, 0 to 6
Data columns (total 28 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   index                       7 non-null      int64  
 1   gkgrecordid                 7 non-null      object 
 2   date                        7 non-null      int64  
 3   sourcecollectionidentifier  7 non-null      int64  
 4   sourcecommonname            7 non-null      object 
 5   documentidentifier          7 non-null      object 
 6   counts                      0 non-null      float64
 7   v2counts                    0 non-null      float64
 8   themes                      7 non-null      object 
 9   v2themes                    7 non-null      object 
 10  locations                   4 non-null      object 
 11  v2locations                 4 non-null      object 
 12  persons                     7 non-null      object 
 13  v2persons                   7 non-null 

## Using a saved table

In [109]:
OP = pd.read_csv('OP.csv') # We could alternatively use the subset of the queried data in the last section.
# gkg.get_gkg(data=OP) # stores in gkg.gkg_query as a dataframe
# there is an indexing mismatch somewhere, the data needs the index reset
# gkg.get_gkg(data=rant.iloc[:500]) # stores in gkg.gkg_query as a dataframe
gkg.get_gkg(data=manga)
gkg.gkg_query.head(2)

Unnamed: 0,index,gkgrecordid,date,sourcecollectionidentifier,sourcecommonname,documentidentifier,counts,v2counts,themes,v2themes,...,gcam,sharingimage,relatedimages,socialimageembeds,socialvideoembeds,quotations,allnames,amounts,translationinfo,extras
0,5370,20241020014500-92,20241020014500,1,gamerant.com,https://gamerant.com/one-piece-sun-god-loki-ni...,,,MANMADE_DISASTER_IMPLIED;ARMEDCONFLICT;UNGP_FO...,"WB_678_DIGITAL_GOVERNMENT,6929;WB_694_BROADCAS...",...,"wc:1216,c1.1:1,c1.3:1,c12.1:153,c12.10:154,c12...",https://static0.gamerantimages.com/wordpress/w...,https://static0.gamerantimages.com/wordpress/w...,,https://youtube.com/channel/UCkZjsmAQnXfS-_5lw...,,"Sun God,230;One Piece,613;Sun God Nika,985;Sun...","2,Sun Gods,146;2,Sun Gods,3619;2,legendary pir...",,<PAGE_LINKS>https://gamerant.com/one-piece-elb...
1,67774,20241020214500-270,20241020214500,1,gamerant.com,https://gamerant.com/one-piece-oda-announces-t...,,,TAX_WORLDLANGUAGES;TAX_WORLDLANGUAGES_MANGA;TA...,"UNGP_FORESTS_RIVERS_OCEANS,4031;UNGP_FORESTS_R...",...,"wc:1199,c12.1:102,c12.10:134,c12.12:41,c12.13:...",https://static0.gamerantimages.com/wordpress/w...,https://static0.gamerantimages.com/wordpress/w...,,https://youtube.com/channel/UCkZjsmAQnXfS-_5lw...,,"One Piece,59;Live Action,86;Elbaf Arc,176;One ...","2,projects during One Piece,26;10,Q A session,...",,<PAGE_LINKS>https://gamerant.com/one-piece-elb...


In [95]:
gkg.parse_urls() # parse urls in the gkg_query

In [118]:
# gkg.parse_gkg_field('amounts')
gkg.parse_gkg_field('persons')
# gkg.parse_gkg_field('v2persons')
# gkg.parse_gkg_field('themes')
# gkg.parse_gkg_field('v2tone')

Unnamed: 0,index,persons_0
0,0,eiichiro oda
1,1,eiichiro oda
2,2,eiichiro oda
3,2,king harald
4,3,eiichiro oda
5,4,queen oito
6,4,zhang lei
7,4,eiichiro oda
8,4,hunter x hunter
9,4,isaac netero


In [111]:
gkg.tokenize_field(col_idx=1) # when calling this function, the tokenized field is given by col_idx input parameter
gkg.field_tokens[:5] + gkg.field_tokens[-5:]

['Eiichiro Oda',
 'Hunter X Hunter',
 'Isaac Netero',
 'King Harald',
 'Queen Oito',
 'Hunter X Hunter',
 'Isaac Netero',
 'King Harald',
 'Queen Oito',
 'Zhang Lei']

In [112]:
gkg.vectorize_field()
# gkg.vectorized_df.columns = gkg.tokenize_field(col_idx=2) # resets columns to uncleaned field names
gkg.vectorized_df.head(2)

Unnamed: 0,eiichiro oda,hunter x hunter,isaac netero,king harald,queen oito,zhang lei
0,1.0,0.0,0.0,0.0,0.0,0.0
1,1.0,0.0,0.0,0.0,0.0,0.0


In [113]:
gkg.parsed_urls[:5]

['one piece sun god loki nika luffy',
 'one piece oda announces two major projects during manga break',
 'one piece 1131 spoilers release date leaks',
 'one piece chapter 1130 oda finally introduces loki',
 'hunter x hunter 404 spoilers release date leaks']

In [114]:
gkgvf = gkg.vectorized_df.copy()
# rename rows with urls
gkgvf.index = gkg.parsed_urls
gkgvf.head(2)


Unnamed: 0,eiichiro oda,hunter x hunter,isaac netero,king harald,queen oito,zhang lei
one piece sun god loki nika luffy,1.0,0.0,0.0,0.0,0.0,0.0
one piece oda announces two major projects during manga break,1.0,0.0,0.0,0.0,0.0,0.0


In [115]:
# get percent of non-zero float values in the vectorized dataframe
non_zero = gkgvf.astype(bool).sum(axis=0)
non_zero = non_zero[non_zero > 0]
non_zero = non_zero / gkgvf.shape[0]*100
non_zero.sort_values(ascending=False, inplace=True)
non_zero[:10]

eiichiro oda       100.000000
hunter x hunter     14.285714
isaac netero        14.285714
king harald         14.285714
queen oito          14.285714
zhang lei           14.285714
dtype: float64

In [116]:
gkg.vectorized_df.sum(axis=0).sort_values(ascending=False).head(5)

hunter x hunter    11.0
eiichiro oda        7.0
zhang lei           3.0
queen oito          2.0
isaac netero        1.0
dtype: float64

In [103]:
gkg.get_all_soup(limit_output=5,verbose=True)
# gkg.get_all_soup(limit_output='all')

0
Title: One Piece: Sun God Loki Vs Sun God Nika, Explained
Length of Header List: 11
First Header: Game Rant
Last Header: Trending Now
Length of Paragraph List: 18
First Paragraph: This article contains spoilers from One Piece's Elbaf Arc.
Last Paragraph: One Piece is available to read via Viz Media. The series can be read by the fans officially and for free on the Shonen Jump and theManga Plusapp. The release date for the next chapter of One Piece, One Piece 1130, is set to be October 20, 2024.


1
Title: One Piece Just Demonstrated The True Beauty of Oda's Writing Through Its Most Unexpected Character
Length of Header List: 8
First Header: Screen Rant
Last Header: Trending Now
Length of Paragraph List: 14
First Paragraph: Despite occasional complaints about its pacing,One Pieceis often agreed to be one of the most well-written shōnen manga, not only in terms of its storytelling, but also its world-building, well-rounded characters, complex themes, and backstories. That said, one une

In [104]:
asd = gkg.all_soup_data
asd
# gkg.all_soup_data['Paragraphs'][0]

Unnamed: 0,URL,Parsed URL,Headers,Paragraphs
0,https://gamerant.com/one-piece-sun-god-loki-ni...,one piece sun god loki nika luffy,"[Game Rant, One Piece: Sun God Loki Vs Sun God...",[This article contains spoilers from One Piece...
1,https://screenrant.com/one-piece-unexpected-ch...,one piece unexpected character buggy beauty of...,"[Screen Rant, One Piece Just Demonstrated The ...",[Despite occasional complaints about its pacin...
2,https://screenrant.com/one-piece-1130-loki-vil...,one piece 1130 loki villain intimidating elbaph,"[Screen Rant, One Piece's New Villain Is the M...",[Warning: SPOILERS for One Piece chapter #1130...
3,https://comicbook.com/anime/news/one-piece-fan...,one piece fan letter anime watch,[One Piece: Fan Letter May Be the Anime’s Best...,[One Piece: Fan Letteris live and marks one of...
4,https://gamerant.com/one-piece-oda-announces-t...,one piece oda announces two major projects dur...,"[Game Rant, One Piece: Oda Announces Two Major...",[This article contains spoilers from One Piece...


In [105]:
asd['Paragraphs'][0][:3]

["This article contains spoilers from One Piece's Elbaf Arc.",
 'One Piecechapter 1130 proved to be absolutely stunning from start to finish and it is safe to say that this chapter shocked the fans to quite an extent withthe reveal of Loki so early into the arc. Truly, few were expecting Loki to be revealed in such fashion inOne Piecechapter 1130, and at the same time, even fewer individuals were expecting him to be as important as he has proven to be.',
 'Loki is perhaps the most powerful giant. His strength might be on par with a Yonko in One Piece.']

In [106]:
for item in asd['Paragraphs'][0]:
    idx = asd['Paragraphs'][0].index(item) + 1
    print(f'Paragraph {idx}: {item}')

Paragraph 1: This article contains spoilers from One Piece's Elbaf Arc.
Paragraph 2: One Piecechapter 1130 proved to be absolutely stunning from start to finish and it is safe to say that this chapter shocked the fans to quite an extent withthe reveal of Loki so early into the arc. Truly, few were expecting Loki to be revealed in such fashion inOne Piecechapter 1130, and at the same time, even fewer individuals were expecting him to be as important as he has proven to be.
Paragraph 3: Loki is perhaps the most powerful giant. His strength might be on par with a Yonko in One Piece.
Paragraph 4: In this chapter, fans even got what can be considered to be a buildup of sorts towards a fight between Luffy, who is the Sun God Nika, and Loki, who also believes himself to be the Sun God. It looks like there is going to be a clash of Sun Gods afterOne Piecechapter 1130 and it is certainly going to be one worth watching, provided Loki does break free from his chains.
Paragraph 5: Luffy and Loki m