In [1]:
import os
os.chdir("../../../")

import pandas as pd
import numpy as np

import re
import nltk
from nltk.corpus import stopwords
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk import tokenize

# Gensim
import gensim
import gensim.corpora as corpora
from gensim.utils import simple_preprocess
from gensim.models import CoherenceModel 
import spacy

from scripts.python.nlp.utils import *
from scripts.python.nlp.lda import *

In [2]:
target = os.getcwd() + "/data/text/solomon_islands/"
news_path = [filename for filename in os.listdir(target)  if "news" in filename]

In [3]:
news = pd.DataFrame()
for path in news_path:
    temp = pd.read_csv(target + path).drop("Unnamed: 0", axis=1)
    temp["from"] = path.replace("_news.csv", "")
    if news.empty:
        news = temp
    else: 
        news = pd.concat([news, temp], axis=0)

In [4]:
news = news[news.news.isna() != True].reset_index(drop=True)

In [5]:
en = spacy.load('en_core_web_sm')
spacy_stopwords = list(en.Defaults.stop_words)

In [6]:
nltk_stopwords = stopwords.words("english")
nltk_unique = [sw for sw in nltk_stopwords if sw not in spacy_stopwords]
spacy_stopwords.extend(nltk_unique)

In [7]:
data = news.news.values.tolist()
data_words = list(sent_to_words(data))

In [8]:
# nlp = spacy.load('en_core_web_sm', disable=['parser', 'ner'])
nlp = spacy.load("en_core_web_sm")

bigram = gensim.models.Phrases(data_words, min_count=5, threshold=100)
bigram_mod = gensim.models.phrases.Phraser(bigram)

trigram = gensim.models.Phrases(bigram[data_words], min_count=5, threshold=100)
trigram_mod = gensim.models.phrases.Phraser(trigram)

texts_preprocessed = preprocess_text(data_words, spacy_stopwords, bigram_mod,
                                     trigram_mod, nlp)

Stopwords has been done.


In [9]:
id2word = corpora.Dictionary(texts_preprocessed)
texts = texts_preprocessed
corpus = [id2word.doc2bow(text) for text in texts]

In [11]:
from gensim.models.wrappers import LdaMallet
mallet_path = os.getcwd() + '/../mallet-2.0.8/bin/mallet'
ldamallet = LdaMallet(mallet_path, corpus=corpus, num_topics=20, id2word=id2word)

Mallet LDA: 20 topics, 5 topic bits, 11111 topic mask
Data loaded.
max tokens: 2592
total tokens: 4283306
<10> LL/token: -10.3998
<20> LL/token: -9.25882
<30> LL/token: -8.68029
<40> LL/token: -8.48979

0	2.5	country economic business system increase economy percent sector service investment growth provide financial access population improve rate impact level opportunity 
1	2.5	police officer rsipf incident force report investigation vehicle arrest medium suspect man operation station commissioner public information law involve continue 
2	2.5	service health medical provide work support disaster hospital people supply care equipment receive facility staff clinic patient centre include province 
3	2.5	court case charge accuse year allege victim man high prosecution matter public face yesterday allegedly order hear trial time evidence 
4	2.5	government policy national budget development plan ministry year address ensure fund process province report review priority corruption issue consul

<150> LL/token: -8.25721
<160> LL/token: -8.25357
<170> LL/token: -8.25159
<180> LL/token: -8.25008
<190> LL/token: -8.24925

0	2.5	business economic increase year country service percent economy investment growth financial sector high cost access rate expect provide revenue opportunity 
1	2.5	police officer rsipf force report incident investigation vehicle operation arrest medium suspect public station law commissioner continue information involve death 
2	2.5	service health medical provide support hospital disaster care work people equipment supply receive team include clinic patient centre nurse staff 
3	2.5	court case charge accuse year man allege victim prosecution high matter allegedly face public yesterday hear trial order time lawyer 
4	2.5	government policy development national plan ensure budget system address ministry strategy process priority consultation review stakeholder important implementation reform corruption 
5	2.5	issue company land report claim log question concer

<300> LL/token: -8.239
<310> LL/token: -8.23845
<320> LL/token: -8.23922
<330> LL/token: -8.23946
<340> LL/token: -8.23862

0	2.5	business year economic increase country service percent economy investment growth financial sector high cost access expect rate opportunity revenue provide 
1	2.5	police officer rsipf force report incident vehicle investigation operation medium arrest suspect public station law commissioner continue involve information act 
2	2.5	service health medical provide support hospital work care people disaster equipment supply team include receive child clinic patient nurse staff 
3	2.5	court case charge accuse year man allege victim prosecution matter high allegedly face public yesterday hear trial time order lawyer 
4	2.5	government policy development plan national ensure budget system address ministry strategy stakeholder priority consultation process important review implement implementation reform 
5	2.5	issue land company report claim log medium question conce

<450> LL/token: -8.23836
<460> LL/token: -8.23823
<470> LL/token: -8.23719
<480> LL/token: -8.23735
<490> LL/token: -8.23762

0	2.5	business year economic increase country service percent economy investment growth financial sector high cost access rate expect opportunity revenue provide 
1	2.5	police officer rsipf force report investigation vehicle incident operation medium public suspect arrest station law commissioner continue information involve community 
2	2.5	service health medical provide support hospital care work include supply receive equipment people child team clinic patient staff nurse centre 
3	2.5	court case charge accuse year man allege victim prosecution matter high allegedly public face yesterday hear trial order count lawyer 
4	2.5	government policy development plan national ensure budget system address strategy ministry stakeholder priority consultation important process implementation implement review key 
5	2.5	issue land company report claim medium log concern qu

<600> LL/token: -8.23797
<610> LL/token: -8.23895
<620> LL/token: -8.23949
<630> LL/token: -8.23861
<640> LL/token: -8.23917

0	2.5	business economic year increase country percent economy service investment growth financial sector high cost rate access revenue opportunity expect provide 
1	2.5	police officer rsipf force report vehicle investigation operation incident medium public suspect arrest station commissioner law continue information involve act 
2	2.5	service health medical provide support hospital people work care child receive include equipment supply clinic patient staff team nurse centre 
3	2.5	court case charge accuse man year allege victim prosecution matter high allegedly face public yesterday hear trial time order lawyer 
4	2.5	government development policy plan national ensure budget system address strategy priority stakeholder ministry important sector key consultation implement implementation process 
5	2.5	issue land company report medium claim log concern question 

<750> LL/token: -8.23958
<760> LL/token: -8.24139
<770> LL/token: -8.241
<780> LL/token: -8.24088
<790> LL/token: -8.24017

0	2.5	business year increase economic country percent economy service investment growth financial sector cost high rate expect total revenue government access 
1	2.5	police officer rsipf force report vehicle investigation operation incident medium suspect public arrest station law commissioner continue involve information community 
2	2.5	service health medical provide support hospital work care people child include receive equipment supply clinic patient staff nurse team centre 
3	2.5	court case charge accuse man year allege victim prosecution matter high allegedly face public hear yesterday trial order lawyer count 
4	2.5	government development policy national plan ensure system address strategy budget stakeholder sector priority ministry important key consultation implementation process implement 
5	2.5	issue land company medium claim log concern question deal 

<900> LL/token: -8.23998
<910> LL/token: -8.24133
<920> LL/token: -8.24159
<930> LL/token: -8.24036
<940> LL/token: -8.2421

0	2.5	business year increase economic country budget percent economy investment service financial growth government cost sector high rate expect revenue total 
1	2.5	police officer rsipf force report vehicle operation investigation incident medium suspect public arrest station commissioner law continue involve information community 
2	2.5	service health medical provide support hospital work people care child equipment include supply receive clinic staff patient nurse centre team 
3	2.5	court case charge accuse man year allege victim prosecution matter high allegedly face public yesterday hear trial count order time 
4	2.5	government development policy national plan ensure system address sector strategy key priority stakeholder support implement consultation important implementation ministry management 
5	2.5	issue land company medium claim log concern report ques

In [13]:
from gensim.models.wrappers import LdaMallet
mallet_path = os.getcwd() + '/../mallet-2.0.8/bin/mallet'
model_list, coherence_values = compute_coherence_values(dictionary=id2word,
                                                        corpus=corpus,
                                                        texts=texts_preprocessed,
                                                        mallet_path=mallet_path,
                                                        start=2,
                                                        limit=20,
                                                        step=1)

Mallet LDA: 2 topics, 1 topic bits, 1 topic mask
Data loaded.
max tokens: 2592
total tokens: 4283306
<10> LL/token: -8.39325
<20> LL/token: -8.26887
<30> LL/token: -8.0447
<40> LL/token: -7.9616

0	25	government country support work development project provincial national people include issue provide service year student island leader member community province 
1	25	police people year officer time public report case day week team yesterday community court follow child family man high good 

<50> LL/token: -7.92392
<60> LL/token: -7.90241
<70> LL/token: -7.88851
<80> LL/token: -7.87863
<90> LL/token: -7.87212

0	25	government country support work development project people community provincial national include provide service year island woman student issue leader important 
1	25	police people year public time officer report week case day yesterday team court follow medium member family month call election 

<100> LL/token: -7.86749
<110> LL/token: -7.8633
<120> LL/token: -7.86055
<130>

<960> LL/token: -7.83878
<970> LL/token: -7.83838
<980> LL/token: -7.83892
<990> LL/token: -7.839

0	25	government country work support people community development project island include year national provide service woman student school leader provincial important 
1	25	police people public year officer time report case week member yesterday issue day follow court team medium company add call 

<1000> LL/token: -7.83949

Total time: 1 minutes 59 seconds
Mallet LDA: 3 topics, 2 topic bits, 11 topic mask
Data loaded.
max tokens: 2592
total tokens: 4283306
<10> LL/token: -8.77387
<20> LL/token: -8.40353
<30> LL/token: -8.11343
<40> LL/token: -8.03293

0	16.66667	government country support development project work woman provide community include island national program service people important policy ensure plan training 
1	16.66667	police government public issue officer member provincial report year court case company medium parliament election statement people land yesterday man 
2	16.

<650> LL/token: -7.94207
<660> LL/token: -7.94179
<670> LL/token: -7.94065
<680> LL/token: -7.94066
<690> LL/token: -7.94133

0	16.66667	government country support development project work woman provide include national program service island ensure important community policy plan fund training 
1	16.66667	police public government officer member issue report provincial year case court medium company yesterday parliament election statement man people call 
2	16.66667	people year community student team school covid time health week day child work family good water start province area island 

<700> LL/token: -7.94076
<710> LL/token: -7.94106
<720> LL/token: -7.94041
<730> LL/token: -7.94009
<740> LL/token: -7.94064

0	16.66667	government country support development project work woman provide include national program service island ensure important community plan policy fund training 
1	16.66667	police public government officer member issue report provincial year case court medium company

<310> LL/token: -7.98428
<320> LL/token: -7.98274
<330> LL/token: -7.98168
<340> LL/token: -7.97924

0	12.5	country support community development project service provide people work include covid health island government ensure plan area water economic increase 
1	12.5	year work woman student team school good local visit training time country day child island event people start week education 
2	12.5	government issue provincial member people leader company parliament national election land province process statement add public state decision office meeting 
3	12.5	police officer case report public court people year man yesterday time charge operation rsipf accuse high follow week area force 

<350> LL/token: -7.97916
<360> LL/token: -7.97871
<370> LL/token: -7.97853
<380> LL/token: -7.97922
<390> LL/token: -7.97942

0	12.5	country support development project community service provide include work people covid health government island ensure plan area water increase economic 
1	12.5	yea

<910> LL/token: -7.98072
<920> LL/token: -7.98171
<930> LL/token: -7.98153
<940> LL/token: -7.98141

0	12.5	country support development project community provide service work include government covid health island ensure plan increase area economic improve water 
1	12.5	year woman work people student team school good country visit local training time child day island event start today education 
2	12.5	government issue provincial member people leader company parliament election land national statement process add province state public decision office year 
3	12.5	police officer case report public court people man year yesterday operation charge time rsipf accuse week area high follow force 

<950> LL/token: -7.98211
<960> LL/token: -7.98183
<970> LL/token: -7.98065
<980> LL/token: -7.98133
<990> LL/token: -7.98123

0	12.5	country support development project community provide service work include government covid health island plan ensure area increase economic improve water 
1	12.5	yea

<410> LL/token: -8.03515
<420> LL/token: -8.03491
<430> LL/token: -8.03475
<440> LL/token: -8.03564

0	10	police officer year court public case man yesterday report time charge rsipf accuse high force allege incident follow find matter 
1	10	people covid health community service country province water week area medical work continue report travel number affect western island include 
2	10	country government development project support provide include work policy business economic island plan improve fund ensure develop service sector region 
3	10	government issue provincial member people company parliament election land leader statement public process national add decision state province office constituency 
4	10	woman year work student people school community country team visit training good child program event education support day time today 

<450> LL/token: -8.03487
<460> LL/token: -8.03457
<470> LL/token: -8.03491
<480> LL/token: -8.03416
<490> LL/token: -8.034

0	10	police offic

<910> LL/token: -8.03199
<920> LL/token: -8.03102
<930> LL/token: -8.0311
<940> LL/token: -8.03095

0	10	police officer public year court case man report yesterday charge rsipf accuse time high force order matter allege medium incident 
1	10	people community covid health service country water province week island area medical travel continue include western work affect number food 
2	10	country support development project government provide work include policy business economic plan service island improve ensure fund develop sector region 
3	10	government issue provincial member people company parliament leader election land statement national process province decision add state public office constituency 
4	10	year woman student work school people team community good training visit country child event time education program today day young 

<950> LL/token: -8.03014
<960> LL/token: -8.0312
<970> LL/token: -8.03156
<980> LL/token: -8.03178
<990> LL/token: -8.03137

0	10	police officer 

<310> LL/token: -8.08036
<320> LL/token: -8.07978
<330> LL/token: -8.0795
<340> LL/token: -8.07966

0	8.33333	work community support woman student school service program provide child project training people year education include health youth staff complete 
1	8.33333	country development government project support island policy economic include business sector provide increase region develop resource infrastructure ensure national regional 
2	8.33333	year country team people time visit day good island event today play place hold local start week open lead host 
3	8.33333	police officer public court case year man report charge rsipf accuse yesterday force matter money high allege incident order time 
4	8.33333	government provincial issue member people leader parliament election national statement decision process public state medium province add meeting constituency executive 
5	8.33333	covid people company land water health area operation medical province report travel road week marke

<710> LL/token: -8.0723
<720> LL/token: -8.07211
<730> LL/token: -8.07376
<740> LL/token: -8.07361

0	8.33333	work community woman support student school service program child people year training provide education complete important include youth staff family 
1	8.33333	country government development project support business policy include economic provide island sector develop region resource plan fund infrastructure ensure national 
2	8.33333	country team year people time visit good day island today event hold place play local start open lead host group 
3	8.33333	police officer public court case year man report charge rsipf accuse yesterday force money order matter allege incident arrest time 
4	8.33333	government issue provincial member people leader parliament election land national statement process public decision state province add medium constituency executive 
5	8.33333	people covid company health water area medical report province operation country market week travel road f

<100> LL/token: -8.1329
<110> LL/token: -8.12444
<120> LL/token: -8.11917
<130> LL/token: -8.11507
<140> LL/token: -8.11125

0	7.14286	year team visit time island good week event start day today open country local place hold play tourism official end 
1	7.14286	people community woman work country support leader training program member family important child speak youth young life good continue nation 
2	7.14286	student school covid health service country week people medical province case provide continue receive staff include response number situation child 
3	7.14286	government development project country support provide policy economic include improve develop sector fund plan region ensure rural national island management 
4	7.14286	government provincial member issue parliament election public statement decision process national state office constituency executive meeting add question interest political 
5	7.14286	police officer court public man case charge year rsipf accuse report y

<450> LL/token: -8.0907
<460> LL/token: -8.09237
<470> LL/token: -8.09061
<480> LL/token: -8.09087
<490> LL/token: -8.09144

0	7.14286	year team visit time island event country good day week today start open place play hold local end official host 
1	7.14286	people community work woman support training country program leader family important member child speak youth young good life continue encourage 
2	7.14286	student school covid health service country week province people medical case receive staff provide continue response number include situation child 
3	7.14286	government development project country support provide policy include economic improve fund develop sector plan service national ensure region rural infrastructure 
4	7.14286	government provincial issue member parliament election public statement decision process state national office constituency add executive leader province meeting political 
5	7.14286	police officer court public man case year charge rsipf report accus

<800> LL/token: -8.09405
<810> LL/token: -8.0939
<820> LL/token: -8.09421
<830> LL/token: -8.09487
<840> LL/token: -8.09407

0	7.14286	year team visit time island good event country day week today start place open play hold end lead local host 
1	7.14286	people community work woman support training program leader country important family child member speak good young youth life continue add 
2	7.14286	student school covid health service country week province people medical case receive continue staff number provide response year situation include 
3	7.14286	government development project country support provide policy economic include improve fund develop service sector plan national region ensure infrastructure island 
4	7.14286	government provincial issue member parliament election statement public decision process national state office constituency executive add leader province political meeting 
5	7.14286	police officer court public man case year charge rsipf report accuse force ye

<150> LL/token: -8.14816
<160> LL/token: -8.14652
<170> LL/token: -8.14461
<180> LL/token: -8.14415
<190> LL/token: -8.14242

0	6.25	government development country support policy economic national include sector project provide develop ensure region plan partner regional address important work 
1	6.25	provincial province week people government operation service western work travel yesterday road confirm add national follow month guadalcanal expect premier 
2	6.25	police officer court man case year charge public rsipf accuse force report allege incident arrest victim yesterday investigation matter vehicle 
3	6.25	government issue parliament member land public statement decision state medium process pay claim leader add question concern interest company law 
4	6.25	woman people country covid health service child case medical family community continue include response provide support worker work number hospital 
5	6.25	team election people year event member good today day leader hold time


0	6.25	government country development support policy economic national include sector ensure develop region island provide partner address regional important key security 
1	6.25	provincial province people week government operation western work travel road service yesterday confirm follow add month national expect guadalcanal area 
2	6.25	police officer court man case year public charge rsipf accuse force report allege incident arrest victim investigation yesterday matter order 
3	6.25	government issue member parliament public land statement decision medium process state pay leader add question claim concern interest call group 
4	6.25	woman people country covid health child community family service case medical continue include response number provide supply public worker hospital 
5	6.25	team people election year event time good day today hold visit member play leader lead country group host church game 
6	6.25	business company water local island year market increase food area high 

<800> LL/token: -8.1249
<810> LL/token: -8.12478
<820> LL/token: -8.12529
<830> LL/token: -8.12489
<840> LL/token: -8.12629

0	6.25	government country development support policy economic national include sector island region ensure develop address partner regional important provide key security 
1	6.25	provincial province people week government western work travel yesterday road area service confirm operation national add follow guadalcanal month expect 
2	6.25	police officer court man case public year charge rsipf accuse force allege report incident arrest victim matter investigation order yesterday 
3	6.25	government issue member parliament public statement land decision process medium state leader pay question add claim concern interest political group 
4	6.25	woman people country covid health child community family service case medical continue include response number provide public hospital report ensure 
5	6.25	team election people year event time day good today visit hold play m

<100> LL/token: -8.17416
<110> LL/token: -8.16696
<120> LL/token: -8.15998
<130> LL/token: -8.15553
<140> LL/token: -8.15271

0	5.55556	year student school visit people country child education speak time family study church today day teacher life work leader event 
1	5.55556	team training work good week time group start play youth year today event hold lead host end open day place 
2	5.55556	covid service health country case medical provide include number week response continue public risk hospital total receive border ensure report 
3	5.55556	police officer court man case public charge rsipf accuse year allege incident arrest force victim matter yesterday investigation report vehicle 
4	5.55556	member parliament election issue public leader medium decision statement government constituency office people provincial process question report executive claim premier 
5	5.55556	business company land local market year log pay operation money increase industry high cost product ship activity 

<400> LL/token: -8.11518
<410> LL/token: -8.11552
<420> LL/token: -8.1143
<430> LL/token: -8.11459
<440> LL/token: -8.1136

0	5.55556	year student school people child visit country education work family young speak study life church teacher day learn today event 
1	5.55556	team training week good time work start year group today play youth hold day yesterday event host lead end open 
2	5.55556	covid health service country case medical provide include week response continue number public hospital ensure risk receive border people confirm 
3	5.55556	police officer court man case charge public rsipf accuse year allege incident force arrest victim matter investigation vehicle yesterday order 
4	5.55556	member parliament election public issue medium statement decision constituency report office claim people process question call act law group bill 
5	5.55556	business company land local year market increase pay tourism cost log high percent money industry product operation food export sell 


<700> LL/token: -8.11514
<710> LL/token: -8.11489
<720> LL/token: -8.1147
<730> LL/token: -8.11494
<740> LL/token: -8.11586

0	5.55556	student year school people child country education work family visit young speak life study church teacher learn day event ceremony 
1	5.55556	team training week good time start year today work group day play yesterday hold youth open end event lead host 
2	5.55556	covid health service country case medical provide include week response continue number public people ensure hospital receive border risk confirm 
3	5.55556	police officer court man case charge rsipf public accuse year allege incident force arrest victim investigation matter yesterday vehicle report 
4	5.55556	member parliament election public medium issue statement decision people constituency report process office question claim call law act group bill 
5	5.55556	business company land year local market increase pay cost tourism money high log percent industry product operation country expor

<1000> LL/token: -8.11718

Total time: 1 minutes 50 seconds
Mallet LDA: 10 topics, 4 topic bits, 1111 topic mask
Data loaded.
max tokens: 2592
total tokens: 4283306
<10> LL/token: -9.83627
<20> LL/token: -8.92531
<30> LL/token: -8.50329
<40> LL/token: -8.36224

0	5	support work community program training government project provide assistance visit security include service australian important continue country build acknowledge deliver 
1	5	covid health country service case medical week people province travel hospital include number continue border risk response emergency test vaccine 
2	5	government land year budget fund add log expect cost ministry percent site project issue current company report province local increase 
3	5	people woman community area water family live road market village affect work island life home call problem day province honiara 
4	5	police officer court man case charge accuse year public allege incident arrest victim matter investigation yesterday vehicle pros

<250> LL/token: -8.15346
<260> LL/token: -8.1506
<270> LL/token: -8.14986
<280> LL/token: -8.14809
<290> LL/token: -8.14686

0	5	support work community country people government visit continue assistance provide program security island include australian official service acknowledge important rsipf 
1	5	covid health country service case medical week include number hospital travel border province people confirm continue ship response emergency test 
2	5	government project year company fund land development infrastructure budget ministry cost log increase percent include expect service funding plan financial 
3	5	people community area water live road market island family village affect food disaster report problem home boat province violence call 
4	5	police officer court man case charge year accuse public allege incident arrest victim yesterday investigation vehicle matter high prosecution appeal 
5	5	student school year work training child education program local week good staff learn 

<500> LL/token: -8.13659
<510> LL/token: -8.13626
<520> LL/token: -8.13677
<530> LL/token: -8.13666
<540> LL/token: -8.13747

0	5	support country work government people community visit continue island assistance security provide australian include official acknowledge important rsipf cooperation member 
1	5	covid health country service case medical week people include travel hospital number confirm province continue team risk response emergency test 
2	5	project government year company fund land development infrastructure service business cost ministry budget increase log percent financial funding expect benefit 
3	5	people community area water market live island road food family village affect disaster report problem home boat find time violence 
4	5	police officer court man case charge accuse year public allege incident arrest victim yesterday investigation vehicle high matter prosecution appeal 
5	5	student school year work training child program education youth week young staff lea

<750> LL/token: -8.13181
<760> LL/token: -8.13066
<770> LL/token: -8.13055
<780> LL/token: -8.13127
<790> LL/token: -8.13122

0	5	country support government people work visit continue island assistance security australian official include provide leader acknowledge nation community important meet 
1	5	covid health country service case medical week people include number travel hospital response province continue team confirm emergency test vaccine 
2	5	project government business company year fund land service infrastructure development cost increase ministry log budget percent financial funding local benefit 
3	5	people community area water island road live food market village affect disaster problem boat local find report family time sell 
4	5	police officer court man case charge accuse year public allege incident arrest victim yesterday investigation rsipf vehicle matter high prosecution 
5	5	student work school year child program training community education youth week young staff l

<1000> LL/token: -8.13399

Total time: 1 minutes 46 seconds
Mallet LDA: 11 topics, 4 topic bits, 1111 topic mask
Data loaded.
max tokens: 2592
total tokens: 4283306
<10> LL/token: -9.91868
<20> LL/token: -9.05861
<30> LL/token: -8.61531
<40> LL/token: -8.44342

0	4.54545	community support country people child provide continue assistance health include government province work ensure response service effort covid disaster australian 
1	4.54545	project student school year government fund education work development complete infrastructure road building study build teacher provincial plan facility national 
2	4.54545	water covid health medical case travel supply hospital food report day risk ship boat situation test number people western home 
3	4.54545	people issue country add market concern good security public area call address law state problem place talk continue sell deal 
4	4.54545	team event young year time day good play youth today host game island group woman hold start lead plac

<250> LL/token: -8.16291
<260> LL/token: -8.16111
<270> LL/token: -8.15955
<280> LL/token: -8.15847
<290> LL/token: -8.15842

0	4.54545	community support people province continue provide child covid assistance government health include ensure response work service operation rsipf team family 
1	4.54545	project student school year government development fund work education infrastructure complete build rural road building area plan study facility teacher 
2	4.54545	water health covid medical case travel food supply report week hospital day area ship number home boat risk test passenger 
3	4.54545	people country issue add public call leader good concern address time state continue law thing place situation urge nation speak 
4	4.54545	team event year young time day play youth island good today game woman host start group culture place hold competition 
5	4.54545	country development woman policy support region island develop regional include partner address key partnership national resour

<500> LL/token: -8.15263
<510> LL/token: -8.15224
<520> LL/token: -8.15133
<530> LL/token: -8.15207
<540> LL/token: -8.15129

0	4.54545	community support people provide continue province child covid health assistance include work ensure response operation team family rsipf government service 
1	4.54545	project student school government year fund development education work infrastructure complete rural build road area building plan study facility teacher 
2	4.54545	water medical health case covid travel report week day hospital area supply number home ship boat test confirm passenger risk 
3	4.54545	people country issue add leader public call medium concern good state address law time continue nation thing situation urge citizen 
4	4.54545	team event year time young day youth play woman good today island game host place start group culture hold competition 
5	4.54545	country development woman policy support region island develop regional include partner key address economic partnership 

<750> LL/token: -8.15021
<760> LL/token: -8.15066
<770> LL/token: -8.15141
<780> LL/token: -8.15155
<790> LL/token: -8.15165

0	4.54545	community support people continue provide child health covid include province assistance response operation ensure work government rsipf team service family 
1	4.54545	project student school government year fund development work education infrastructure complete build rural service road building plan area study facility 
2	4.54545	water medical case covid travel health report week day hospital area supply number people home ship boat test confirm risk 
3	4.54545	people country issue leader add public medium call concern good state law address nation time raise respect thing situation citizen 
4	4.54545	team event year time young woman day youth play good today island game host start place culture group hold family 
5	4.54545	country development woman policy support region develop island regional include economic address key partner partnership resource

<1000> LL/token: -8.15317

Total time: 1 minutes 53 seconds
Mallet LDA: 12 topics, 4 topic bits, 1111 topic mask
Data loaded.
max tokens: 2592
total tokens: 4283306
<10> LL/token: -10.00033
<20> LL/token: -9.15071
<30> LL/token: -8.57715
<40> LL/token: -8.39918

0	4.16667	government provincial leader issue people parliament member national decision executive province political premier bill interest current group state policy statement 
1	4.16667	country island increase water resource economic develop impact system management region level food global world economy high sustainable include environment 
2	4.16667	police officer covid community rsipf continue case force operation medium public response report people country border information conduct test vaccine 
3	4.16667	project government development fund support work service provide infrastructure plan rural improve ministry year budget tourism sector financial include funding 
4	4.16667	member people election day process constituency

<200> LL/token: -8.1778
<210> LL/token: -8.17709
<220> LL/token: -8.1755
<230> LL/token: -8.17414
<240> LL/token: -8.17307

0	4.16667	government provincial people province national issue leader parliament decision political add premier executive policy bill current opposition group statement law 
1	4.16667	country economic increase development policy develop sector resource impact percent level economy island global key management region system address include 
2	4.16667	covid country rsipf case continue operation response public travel week province border include health ensure officer community disaster force western 
3	4.16667	project government support development service work fund provide plan rural infrastructure improve year ministry build road program building include complete 
4	4.16667	member year election week yesterday day officer today chief general number constituency office month follow process hold late expect candidate 
5	4.16667	country support visit island leader wor

<450> LL/token: -8.16542
<460> LL/token: -8.16541
<470> LL/token: -8.16437
<480> LL/token: -8.16563
<490> LL/token: -8.16541

0	4.16667	government provincial people province national issue leader parliament decision political add premier executive current bill group policy opposition address law 
1	4.16667	country development economic increase policy sector develop resource impact percent economy level key global island include region system growth strategy 
2	4.16667	covid country case operation travel continue response health week public include border province ensure number boat team test vaccine follow 
3	4.16667	project support government service work development fund provide rural infrastructure plan year improve ministry build road include building deliver program 
4	4.16667	member year election week yesterday officer day today chief office general month expect number follow constituency process hold parliament late 
5	4.16667	country visit island support leader security people 

<700> LL/token: -8.15749
<710> LL/token: -8.15836
<720> LL/token: -8.15816
<730> LL/token: -8.15832
<740> LL/token: -8.1588

0	4.16667	government provincial people province national issue parliament leader decision add premier current political executive bill group opposition policy statement address 
1	4.16667	country development economic increase sector policy develop resource percent key economy level impact global region system address include growth challenge 
2	4.16667	covid country case health travel continue operation response week include number border public province ensure boat test vaccine ship emergency 
3	4.16667	project support service government work development fund provide rural infrastructure plan year build improve ministry road building include facility deliver 
4	4.16667	member year election week yesterday officer day month chief today general follow office number constituency process expect candidate hold vote 
5	4.16667	country visit island support leader securi

<950> LL/token: -8.15957
<960> LL/token: -8.15935
<970> LL/token: -8.15804
<980> LL/token: -8.15941
<990> LL/token: -8.15896

0	4.16667	government provincial people province national issue leader parliament add decision premier current political executive bill group opposition address guadalcanal policy 
1	4.16667	country development economic sector policy increase develop key percent economy resource level impact global system address region challenge growth investment 
2	4.16667	covid country case health travel continue operation week response number border include public ensure province ship boat follow test vaccine 
3	4.16667	project support service work government development fund provide rural infrastructure plan year build ministry improve building road include facility deliver 
4	4.16667	member year election week yesterday officer day chief office general today number process constituency follow expect month confirm candidate parliament 
5	4.16667	country visit island support l

<150> LL/token: -8.20178
<160> LL/token: -8.19668
<170> LL/token: -8.19131
<180> LL/token: -8.18712
<190> LL/token: -8.18408

0	3.84615	court man case charge year accuse police allege incident arrest victim matter investigation prosecution yesterday high public suspect face allegedly 
1	3.84615	government parliament issue decision public statement member medium state political question process group bill concern opposition claim law interest act 
2	3.84615	project student school year work fund education development complete infrastructure building build study staff road constituency facility teacher provide plan 
3	3.84615	government provincial people province national leader member issue meeting western executive premier guadalcanal support add work continue church address hold 
4	3.84615	woman community program support training work child service provide youth young include programme launch improve train important activity access participant 
5	3.84615	country development support pol

<350> LL/token: -8.17032
<360> LL/token: -8.16983
<370> LL/token: -8.16971
<380> LL/token: -8.16959
<390> LL/token: -8.16919

0	3.84615	court man case year charge accuse allege victim matter yesterday arrest high prosecution public suspect face time allegedly police hear 
1	3.84615	parliament public decision issue member statement government medium election state political question process concern group bill opposition act law claim 
2	3.84615	project student school year fund work development education complete infrastructure building constituency rural build study support staff road teacher provide 
3	3.84615	government provincial people province national leader issue member meeting executive western add premier guadalcanal continue address support work meet hold 
4	3.84615	community woman program work support training child provide youth service young include programme important opportunity launch train role activity participant 
5	3.84615	country development policy economic support 

<550> LL/token: -8.17017
<560> LL/token: -8.17094
<570> LL/token: -8.17099
<580> LL/token: -8.16967
<590> LL/token: -8.17161

0	3.84615	court man case year charge accuse allege victim matter high yesterday prosecution public time face allegedly hear arrest order represent 
1	3.84615	parliament election decision member public medium statement process political question bill issue state group opposition act law candidate vote claim 
2	3.84615	project student school year fund work development education complete infrastructure support building rural build study road teacher staff provide facility 
3	3.84615	government provincial province people national leader issue member meeting western executive add premier guadalcanal address support meet continue work hold 
4	3.84615	community woman work program support training child provide youth young opportunity service programme important include train activity learn australian participant 
5	3.84615	country development policy economic support se

<750> LL/token: -8.17173
<760> LL/token: -8.17174
<770> LL/token: -8.17209
<780> LL/token: -8.17267
<790> LL/token: -8.17359

0	3.84615	court man case year charge accuse allege victim matter yesterday high public prosecution time face allegedly order hear trial arrest 
1	3.84615	parliament election member decision medium statement public process political bill question group law act opposition state candidate vote general party 
2	3.84615	project student school year work fund development education complete support rural infrastructure building provide build study service road teacher improve 
3	3.84615	government provincial province people issue national leader meeting member executive add western premier guadalcanal address current work plan meet policy 
4	3.84615	community woman work program support training child youth provide young opportunity important include programme service train activity role encourage learn 
5	3.84615	country development economic support sector policy region

<950> LL/token: -8.17019
<960> LL/token: -8.17005
<970> LL/token: -8.17158
<980> LL/token: -8.17064
<990> LL/token: -8.17056

0	3.84615	court case man year charge accuse allege matter victim high yesterday public prosecution face time order allegedly hear trial count 
1	3.84615	parliament election member decision medium public statement political process bill question law group opposition act state candidate vote general call 
2	3.84615	project student school year work fund development education complete rural infrastructure support building build service study road provide teacher improve 
3	3.84615	government provincial province issue people national leader meeting member executive western add premier address guadalcanal current budget meet plan work 
4	3.84615	community woman work program support training child youth opportunity provide young important programme include activity train encourage role learn service 
5	3.84615	country development support economic sector policy region d

<150> LL/token: -8.19035
<160> LL/token: -8.186
<170> LL/token: -8.18353
<180> LL/token: -8.18078
<190> LL/token: -8.17833

0	3.57143	team water event time good play host game start week year hold place competition final player match open today lead 
1	3.57143	country development economic support region island develop regional partner include sector key policy global strengthen cooperation resource strategy level address 
2	3.57143	people country visit year island nation church world today day speak leader time live good culture peace islander violence life 
3	3.57143	government provincial province national land issue policy process budget premier executive propose ministry system guadalcanal consultation add current plan sign 
4	3.57143	court case charge year accuse allege man matter high public prosecution order yesterday victim allegedly money face date time hear 
5	3.57143	police officer medium rsipf operation force information law community report continue security public investig

<350> LL/token: -8.15769
<360> LL/token: -8.15793
<370> LL/token: -8.15818
<380> LL/token: -8.15727
<390> LL/token: -8.15666

0	3.57143	team good event time play game host start year place week final today hold competition player match group goal open 
1	3.57143	country development economic support region island develop regional sector partner include key policy global resource cooperation strengthen strategy level address 
2	3.57143	people country visit year island leader nation church today good speak work peace islander culture time day ceremony traditional continue 
3	3.57143	government provincial province national land policy budget issue process executive premier western ministry current propose development guadalcanal plan consultation system 
4	3.57143	court case year charge accuse man allege victim matter high public yesterday prosecution order face allegedly time money hear date 
5	3.57143	police officer medium rsipf operation force law report community information continue p

<550> LL/token: -8.14992
<560> LL/token: -8.1502
<570> LL/token: -8.15068
<580> LL/token: -8.14992
<590> LL/token: -8.15095

0	3.57143	team good event time play year game start host week place final competition lead player today match hold open goal 
1	3.57143	country development economic region island support develop regional include partner sector key resource strengthen policy cooperation global strategy level challenge 
2	3.57143	people country visit year leader island nation today church speak good peace culture ceremony islander work time serve continue traditional 
3	3.57143	government provincial province national land policy budget issue premier executive process ministry western development propose current add plan guadalcanal meeting 
4	3.57143	court case man charge year accuse allege victim high matter prosecution public yesterday face time allegedly order hear money trial 
5	3.57143	police officer medium rsipf operation force report information continue community public law

<750> LL/token: -8.15626
<760> LL/token: -8.15585
<770> LL/token: -8.1554
<780> LL/token: -8.15657
<790> LL/token: -8.15644

0	3.57143	team good time event play game year start host place final week competition group player match stage lead today open 
1	3.57143	country development region support economic island develop regional include partner sector policy key resource global strengthen strategy address level challenge 
2	3.57143	people country visit year leader island nation today church good speak peace culture continue work ceremony time islander serve event 
3	3.57143	government provincial province national land policy budget issue premier western executive plan ministry people process current development guadalcanal propose add 
4	3.57143	court case man year charge accuse allege victim high matter yesterday prosecution public face time order allegedly hear money trial 
5	3.57143	police officer rsipf operation force continue public report medium vehicle community law investigatio

<950> LL/token: -8.15847
<960> LL/token: -8.15942
<970> LL/token: -8.16005
<980> LL/token: -8.15971
<990> LL/token: -8.15932

0	3.57143	team good time event play game start year place host final week competition hold player match today stage lead goal 
1	3.57143	country development region economic support island develop regional sector include partner policy key resource strengthen global level address strategy ensure 
2	3.57143	people country visit year leader island nation today church speak good peace work culture ceremony event islander continue serve time 
3	3.57143	government provincial province national land policy budget issue people executive premier western add plan ministry current guadalcanal development propose process 
4	3.57143	court case man year charge accuse allege victim high matter public prosecution yesterday face time allegedly hear order money trial 
5	3.57143	police officer rsipf operation force public continue vehicle report medium investigation law incident co

<150> LL/token: -8.22321
<160> LL/token: -8.22035
<170> LL/token: -8.21689
<180> LL/token: -8.21238
<190> LL/token: -8.20916

0	3.33333	police officer man rsipf accuse charge allege incident arrest victim investigation vehicle court prosecution force case report suspect allegedly year 
1	3.33333	project service development fund government support work provide improve infrastructure rural access build budget road facility funding ministry percent building 
2	3.33333	government issue statement decision report state medium concern question interest add call group claim opposition raise deal recent cabinet source 
3	3.33333	water community market area food management disaster product report environment fishery affect local agriculture village resource supply assessment farmer provide 
4	3.33333	public court money pay order high case law payment legal office receive give month application account time regulation document rule 
5	3.33333	company land operation issue business area people log 

<350> LL/token: -8.18693
<360> LL/token: -8.18638
<370> LL/token: -8.18525
<380> LL/token: -8.18499
<390> LL/token: -8.18563

0	3.33333	police officer man rsipf accuse charge allege incident arrest victim investigation force vehicle report prosecution suspect year court case allegedly 
1	3.33333	project service development government fund support work provide infrastructure improve rural access build budget road ministry building facility funding plan 
2	3.33333	government issue statement report medium decision state concern call question add interest group raise opposition claim deal leader public situation 
3	3.33333	water community market area food island disaster village product fishery affect agriculture local supply management environment report farmer clean live 
4	3.33333	public court money order pay case high law office payment legal receive give month application corruption matter time document account 
5	3.33333	company land operation issue business people log area local sit

<550> LL/token: -8.18184
<560> LL/token: -8.18205
<570> LL/token: -8.1825
<580> LL/token: -8.18218
<590> LL/token: -8.18122

0	3.33333	police officer man rsipf accuse allege incident arrest victim force investigation vehicle charge report suspect allegedly year station count medium 
1	3.33333	project service development government fund support work provide infrastructure improve rural build ministry road budget access plan building facility funding 
2	3.33333	government issue report statement decision medium state concern call question add interest leader public raise group opposition claim deal situation 
3	3.33333	water area market community food island disaster people product village fishery affect agriculture local supply live farmer clean small environment 
4	3.33333	public court case money order pay high office payment law matter legal receive charge give month application year prosecution time 
5	3.33333	company land operation issue log people local area ship business operate ad

<750> LL/token: -8.17557
<760> LL/token: -8.17552
<770> LL/token: -8.17549
<780> LL/token: -8.17634
<790> LL/token: -8.17665

0	3.33333	police officer man rsipf force report incident arrest victim investigation vehicle suspect station medium commissioner operation public morning boat area 
1	3.33333	project service government development fund support work provide infrastructure rural improve ministry build budget plan building road access facility funding 
2	3.33333	government issue statement report decision medium public state concern call question add interest raise opposition claim leader deal group action 
3	3.33333	water area market food island community people disaster village affect local supply live small agriculture farmer clean environment fishery produce 
4	3.33333	court case public accuse charge year money high matter order prosecution payment allege pay face time yesterday hear application month 
5	3.33333	company land issue operation log ship operate add business local la

<950> LL/token: -8.18126
<960> LL/token: -8.18133
<970> LL/token: -8.18219
<980> LL/token: -8.18251
<990> LL/token: -8.18322

0	3.33333	police officer rsipf man report force incident arrest victim investigation vehicle suspect station operation medium public commissioner boat continue morning 
1	3.33333	project service development fund government work support provide infrastructure rural improve ministry build building budget plan road access facility area 
2	3.33333	government issue public statement medium report decision state concern question call interest add raise opposition claim leader deal action group 
3	3.33333	water area people island market food community disaster village affect live supply local farmer clean small fishery find environment damage 
4	3.33333	court case charge year accuse public money high allege matter prosecution order yesterday face time payment hear month represent trial 
5	3.33333	company land issue operation log ship operate landowner add business pay l

<150> LL/token: -8.2396
<160> LL/token: -8.23471
<170> LL/token: -8.23057
<180> LL/token: -8.22649
<190> LL/token: -8.22314

0	3.125	development country policy economic region develop regional sector resource key economy challenge address strategy global level sustainable growth achieve partner 
1	3.125	report yesterday week people call leave early day accord late morning boat follow find night time province area close body 
2	3.125	court man case charge accuse year police allege arrest victim investigation matter incident prosecution public suspect high allegedly face hear 
3	3.125	government public decision parliament statement issue budget question bill state opposition add concern political corruption raise medium current power cabinet 
4	3.125	project market development rural infrastructure fund year increase percent business improve work local road build access product benefit cost agriculture 
5	3.125	woman training community program people work youth young opportunity train par

<350> LL/token: -8.19651
<360> LL/token: -8.19582
<370> LL/token: -8.1952
<380> LL/token: -8.1952
<390> LL/token: -8.19557

0	3.125	development country policy economic develop sector regional region resource tourism economy key challenge global strategy address level growth sustainable achieve 
1	3.125	report people yesterday family call time leave late home happen morning week find accord boat early night day body close 
2	3.125	court case man charge accuse year police allege arrest victim investigation vehicle incident matter public prosecution suspect high allegedly face 
3	3.125	government public parliament decision statement issue budget state bill question opposition add political concern group medium leader current corruption raise 
4	3.125	project market rural year infrastructure road development increase business fund local percent improve cost build work access product building benefit 
5	3.125	woman community training people program work youth young opportunity train group f

<550> LL/token: -8.1951
<560> LL/token: -8.19407
<570> LL/token: -8.19589
<580> LL/token: -8.19621
<590> LL/token: -8.19628

0	3.125	development country policy economic develop sector regional resource economy region challenge key level growth strategy global sustainable address investment opportunity 
1	3.125	people report family call time home leave yesterday late happen morning find boat night day body village live early life 
2	3.125	court case man charge accuse year allege victim matter arrest prosecution police public high suspect incident face allegedly yesterday hear 
3	3.125	government public parliament decision issue statement bill state question opposition concern add medium political group leader corruption budget raise power 
4	3.125	project market year rural infrastructure fund increase road development work build percent improve access cost building local product facility complete 
5	3.125	woman community training people program work youth young opportunity child train r

<750> LL/token: -8.19548
<760> LL/token: -8.19436
<770> LL/token: -8.19517
<780> LL/token: -8.19429
<790> LL/token: -8.19387

0	3.125	country development policy economic develop sector economy resource key region challenge system address growth global level strategy sustainable investment regional 
1	3.125	people family home time call leave report late village happen morning find yesterday boat day life body live night early 
2	3.125	court case charge accuse year man allege victim matter high public prosecution face yesterday allegedly hear order trial arrest time 
3	3.125	government public issue decision parliament statement medium state question bill add concern group opposition political leader call raise corruption power 
4	3.125	project market infrastructure work year rural build road increase local access building percent improve facility cost product complete area construction 
5	3.125	woman community training program people work youth young opportunity important train role chil

<950> LL/token: -8.19668
<960> LL/token: -8.19549
<970> LL/token: -8.19494
<980> LL/token: -8.19532
<990> LL/token: -8.19462

0	3.125	country development policy economic develop sector economy resource key system region challenge growth global strategy level address sustainable investment achieve 
1	3.125	people family time home call leave village late morning night find happen live boat day life body yesterday man stop 
2	3.125	court case charge year accuse man allege victim high matter public prosecution yesterday face allegedly order hear trial count represent 
3	3.125	government public issue decision parliament statement medium state bill concern question group opposition political add leader raise report call corruption 
4	3.125	project market work infrastructure road build year local building rural improve access increase percent facility complete product cost area construction 
5	3.125	woman community training people work program youth young opportunity child support train role 

<100> LL/token: -8.26088
<110> LL/token: -8.25288
<120> LL/token: -8.24586
<130> LL/token: -8.23918
<140> LL/token: -8.2343

0	2.94118	woman officer police rsipf security force work operation continue community support ramsi mission commissioner assistance include family violence law ensure 
1	2.94118	country development policy region plan regional island partner develop national address support include key ensure strategy strengthen stakeholder cooperation priority 
2	2.94118	court case charge accuse year public allege high matter prosecution order face yesterday time man hear victim allegedly trial represent 
3	2.94118	covid health country medical case service hospital people emergency test vaccine response care risk include week clinic patient nurse measure 
4	2.94118	issue medium public statement concern state decision report country question call raise add information action law interest deal recent fact 
5	2.94118	company land pay issue log money business payment process sign cla

<250> LL/token: -8.2165
<260> LL/token: -8.21428
<270> LL/token: -8.21492
<280> LL/token: -8.21405
<290> LL/token: -8.21419

0	2.94118	woman police officer community rsipf force security work operation continue ramsi commissioner law include mission violence ensure support member order 
1	2.94118	country development policy region regional island develop partner address plan national include support strategy key stakeholder cooperation ensure international strengthen 
2	2.94118	court case charge accuse year public allege high prosecution matter face order man yesterday time hear victim allegedly trial represent 
3	2.94118	covid health country medical case people service hospital emergency test vaccine risk response care include week clinic patient nurse measure 
4	2.94118	issue medium public statement concern state decision call report question add raise information action interest opposition law deal country power 
5	2.94118	company land pay issue money log business payment process sig

<400> LL/token: -8.21168
<410> LL/token: -8.21058
<420> LL/token: -8.21085
<430> LL/token: -8.21211
<440> LL/token: -8.21117

0	2.94118	woman police officer community rsipf force work security operation continue law ramsi commissioner ensure violence member mission public include activity 
1	2.94118	country development policy region island regional address develop partner include plan strategy national key stakeholder cooperation support strengthen international global 
2	2.94118	court case charge accuse year public high allege matter prosecution order face man yesterday hear victim time allegedly trial represent 
3	2.94118	covid health country medical case people service hospital emergency test vaccine response risk care clinic patient include week nurse positive 
4	2.94118	issue medium statement public concern decision state call people question report add raise information opposition action interest country deal law 
5	2.94118	company land issue pay business log money payment proces

<550> LL/token: -8.20986
<560> LL/token: -8.21058
<570> LL/token: -8.20987
<580> LL/token: -8.21071
<590> LL/token: -8.2096

0	2.94118	woman police officer community rsipf force work operation security continue ramsi commissioner law violence member ensure order mission include activity 
1	2.94118	country development region policy island regional address develop include partner strategy key cooperation national plan stakeholder international global support strengthen 
2	2.94118	court case charge accuse year public allege high matter prosecution face order man yesterday hear time victim money trial allegedly 
3	2.94118	covid health country medical case people service hospital emergency test vaccine response care risk clinic patient include nurse week measure 
4	2.94118	issue medium public people statement concern decision call state question add report raise opposition country action information law interest deal 
5	2.94118	company land issue pay business log process payment money sign 

<700> LL/token: -8.21154
<710> LL/token: -8.21203
<720> LL/token: -8.21137
<730> LL/token: -8.21164
<740> LL/token: -8.21186

0	2.94118	woman police officer community rsipf force work operation security continue commissioner law ramsi violence member ensure activity public order mission 
1	2.94118	country development region island policy regional address develop include partner strategy cooperation key support international global strengthen national meeting challenge 
2	2.94118	court case charge accuse year public allege high prosecution matter face order man yesterday time hear victim allegedly trial money 
3	2.94118	covid health country medical case people service hospital emergency test vaccine response care risk clinic patient week include nurse measure 
4	2.94118	issue people medium public concern statement decision call state question add raise opposition report country interest action information deal law 
5	2.94118	company land issue business pay log process sign payment lando

<850> LL/token: -8.2138
<860> LL/token: -8.21355
<870> LL/token: -8.2137
<880> LL/token: -8.21344
<890> LL/token: -8.21324

0	2.94118	woman police officer community rsipf force work operation security continue law ramsi commissioner member violence public ensure order include mission 
1	2.94118	country development region island regional policy address include develop partner strategy meeting support cooperation global international challenge key strengthen issue 
2	2.94118	court case charge accuse year public allege high matter prosecution face man order yesterday time hear victim money allegedly trial 
3	2.94118	covid health country medical case people service hospital emergency test vaccine response care risk clinic patient nurse include week positive 
4	2.94118	issue medium people public concern statement decision call state question leader add raise report opposition information action interest country deal 
5	2.94118	company land issue business pay log process payment sign claim l

<1000> LL/token: -8.21438

Total time: 2 minutes 7 seconds
Mallet LDA: 18 topics, 5 topic bits, 11111 topic mask
Data loaded.
max tokens: 2592
total tokens: 4283306
<10> LL/token: -10.3119
<20> LL/token: -9.30486
<30> LL/token: -8.72759
<40> LL/token: -8.50534

0	2.77778	country increase year island percent tourism industry economy high international fishery product export cost economic total rate investment world growth 
1	2.77778	government land issue parliament decision member executive minister group meeting bill statement opposition claim interest landowner sign agreement confidence cabinet 
2	2.77778	development support policy government economic develop sector partner provide improve regional key include priority strategy strengthen implementation sustainable implement plan 
3	2.77778	water food island area people disaster supply affect community live clean system damage impact source small access resident experience problem 
4	2.77778	man year family time yesterday late victim 

<150> LL/token: -8.19878
<160> LL/token: -8.19693
<170> LL/token: -8.19449
<180> LL/token: -8.19056
<190> LL/token: -8.18784

0	2.77778	country increase business year percent tourism economy economic industry investment product sector financial high growth market cost fishery international export 
1	2.77778	government land decision parliament issue group bill minister opposition power member political interest landowner sign agreement claim cabinet leader confidence 
2	2.77778	development policy country develop regional economic key include system address region ensure resource management partner sector support strategy plan strengthen 
3	2.77778	water people island area food community disaster affect supply live environment clean damage village small resident sea impact world problem 
4	2.77778	man family victim time leave home year late night day people call find girl village violence life body happen morning 
5	2.77778	woman community training program work youth opportunity programm

<300> LL/token: -8.17887
<310> LL/token: -8.17937
<320> LL/token: -8.17995
<330> LL/token: -8.17924
<340> LL/token: -8.17939

0	2.77778	business country increase year market percent tourism economic economy industry sector investment product growth financial high cost fishery benefit export 
1	2.77778	government land parliament decision issue group bill minister opposition member meeting political sign leader agreement current interest cabinet propose confidence 
2	2.77778	development policy country develop regional region include key address system resource economic management plan strategy partner sector ensure national sustainable 
3	2.77778	water people area island food community disaster affect supply live environment clean damage village small world local fish impact resident 
4	2.77778	family man time people leave home victim day night village late life girl call body find happen morning year death 
5	2.77778	woman community training program work youth young opportunity programm

<450> LL/token: -8.17757
<460> LL/token: -8.17707
<470> LL/token: -8.17709
<480> LL/token: -8.17739
<490> LL/token: -8.17679

0	2.77778	business country increase market year tourism percent economic economy industry sector investment product growth financial local high agriculture cost farmer 
1	2.77778	government land parliament decision issue group meeting member bill minister opposition political sign leader cabinet process interest agreement payment propose 
2	2.77778	development country policy develop regional region economic include address key resource plan management system strategy national sector ensure sustainable partner 
3	2.77778	water area island people food community disaster affect supply live environment clean damage small report problem fish world local village 
4	2.77778	family people time man leave home village late day night find life body call morning happen year live death yesterday 
5	2.77778	woman community training program work youth young opportunity people 

<600> LL/token: -8.17896
<610> LL/token: -8.18036
<620> LL/token: -8.17879
<630> LL/token: -8.1799
<640> LL/token: -8.18052

0	2.77778	business country increase market year tourism economic percent economy sector industry investment product growth local financial high agriculture benefit farmer 
1	2.77778	government land parliament decision issue group meeting member bill opposition minister political leader process cabinet payment sign interest propose confidence 
2	2.77778	development country policy develop region regional economic resource key include plan address system management strategy sustainable national sector ensure level 
3	2.77778	water area island people food community disaster affect supply environment live clean damage report world small local problem fish village 
4	2.77778	family people time home man leave late day village life night find body call happen morning year live death child 
5	2.77778	woman community training program work youth people young opportunity tra

<750> LL/token: -8.18332
<760> LL/token: -8.1838
<770> LL/token: -8.18315
<780> LL/token: -8.18404
<790> LL/token: -8.18475

0	2.77778	business country increase market year tourism percent economy economic industry sector investment product local growth financial agriculture high sell cost 
1	2.77778	government land parliament decision issue group meeting member bill minister opposition leader political cabinet process payment propose current position motion 
2	2.77778	development country policy develop region regional economic resource address key include system management plan strategy sector sustainable national partner level 
3	2.77778	water island area people community food disaster affect supply environment live clean damage world report small fish impact local source 
4	2.77778	family people time home man leave village day late night life year find body call happen morning live child death 
5	2.77778	woman community training program work youth child young people opportunity trai

<900> LL/token: -8.18313
<910> LL/token: -8.18263
<920> LL/token: -8.18361
<930> LL/token: -8.18401
<940> LL/token: -8.1844

0	2.77778	business market country increase year tourism percent economy economic industry local sector product investment financial growth high agriculture sell farmer 
1	2.77778	government land parliament decision issue member group meeting bill minister opposition leader political process cabinet propose current motion payment position 
2	2.77778	development country policy develop region economic regional key resource include address sector system strategy management plan sustainable ensure national level 
3	2.77778	water area island people community food disaster affect supply live environment report clean damage world small fish impact fishing sea 
4	2.77778	family people time home man leave day village late life night year find call body happen morning live child death 
5	2.77778	woman community training program work youth child people young opportunity trai

<50> LL/token: -8.39191
<60> LL/token: -8.33668
<70> LL/token: -8.30485
<80> LL/token: -8.28305
<90> LL/token: -8.26885

0	2.63158	country island visit support government tourism meet region regional meeting assistance include international security cooperation official australian sign agreement partnership 
1	2.63158	project government development fund support work infrastructure rural plan build building funding area ministry complete improve provide construction road facility 
2	2.63158	woman community training program support youth young work programme participant train awareness violence important role launch workshop knowledge activity include 
3	2.63158	court case charge accuse man allege arrest victim year matter prosecution incident police suspect public high allegedly face trial hear 
4	2.63158	work year study local job opportunity good award serve islander country hard offer time challenge staff experience continue receive add 
5	2.63158	covid country case public record test

<200> LL/token: -8.21976
<210> LL/token: -8.21762
<220> LL/token: -8.21724
<230> LL/token: -8.21525
<240> LL/token: -8.21417

0	2.63158	country visit island support meeting meet region government tourism official regional include security cooperation sign assistance international agreement partnership discuss 
1	2.63158	project government development fund support work infrastructure rural plan build building funding complete ministry area road improve construction provide facility 
2	2.63158	woman community training program support youth young programme work participant train family awareness violence activity child include role knowledge important 
3	2.63158	court case charge accuse man year allege arrest victim matter prosecution high public allegedly face yesterday trial hear lawyer evidence 
4	2.63158	work business year local opportunity service good job worker country islander australian offer award serve time continue provide hard add 
5	2.63158	covid country case border record n

<350> LL/token: -8.21301
<360> LL/token: -8.21157
<370> LL/token: -8.21089
<380> LL/token: -8.21152
<390> LL/token: -8.21204

0	2.63158	country visit island meeting support meet region official tourism regional government include security cooperation leader sign international agreement state discuss 
1	2.63158	project government development fund support infrastructure work rural plan build building funding complete ministry area road improve construction provide include 
2	2.63158	woman community training program support youth young programme child participant train work family activity awareness violence role knowledge include workshop 
3	2.63158	court case charge accuse year allege man victim matter public high prosecution allegedly face yesterday trial order hear evidence lawyer 
4	2.63158	work business year service local opportunity good country worker australian job serve islander continue time award provide offer add support 
5	2.63158	covid country case border number record test

<500> LL/token: -8.21179
<510> LL/token: -8.213
<520> LL/token: -8.21265
<530> LL/token: -8.2126
<540> LL/token: -8.21218

0	2.63158	country visit island meeting support meet official region government leader regional include security tourism cooperation sign agreement assistance discuss international 
1	2.63158	project government development fund support infrastructure work rural build plan building funding complete ministry area road construction year provide improve 
2	2.63158	woman community training program support youth young child work programme train participant activity family awareness violence important include knowledge workshop 
3	2.63158	court case charge accuse year allege man victim high matter public prosecution allegedly yesterday face order hear trial evidence lawyer 
4	2.63158	work business year service local opportunity good country australian job worker provide time continue serve islander award offer support add 
5	2.63158	covid country case border test public va

<650> LL/token: -8.21467
<660> LL/token: -8.21522
<670> LL/token: -8.2149
<680> LL/token: -8.21547
<690> LL/token: -8.21576

0	2.63158	country visit island meeting meet leader official support region government regional include security cooperation sign discuss agreement assistance state delegation 
1	2.63158	project government development fund support infrastructure work rural build plan building funding complete area ministry year road budget improve construction 
2	2.63158	woman community training program support youth young child programme work participant train activity family awareness violence knowledge important include skill 
3	2.63158	court case charge accuse year allege man victim high public matter prosecution allegedly face yesterday order hear trial represent lawyer 
4	2.63158	work business service year local opportunity country australian good provide worker job continue islander support time add award serve hard 
5	2.63158	covid country case border test public vaccine n

<800> LL/token: -8.21731
<810> LL/token: -8.2177
<820> LL/token: -8.21612
<830> LL/token: -8.21505
<840> LL/token: -8.21688

0	2.63158	country visit island meeting leader meet official support government region include regional security cooperation sign state discuss issue agreement assistance 
1	2.63158	government project development fund support work infrastructure rural build budget year building plan funding complete ministry road area construction provide 
2	2.63158	woman community training program support youth young child work programme participant train activity awareness family violence knowledge skill important people 
3	2.63158	court case charge accuse year allege man victim public matter high prosecution allegedly face yesterday hear order trial evidence lawyer 
4	2.63158	work business service local year country opportunity tourism australian good provide job continue worker support islander time add serve hard 
5	2.63158	covid country case border public test vaccine number

<950> LL/token: -8.2152
<960> LL/token: -8.21571
<970> LL/token: -8.21584
<980> LL/token: -8.2147
<990> LL/token: -8.21498

0	2.63158	country visit island meeting leader official support meet government region regional security cooperation include sign issue state agreement discuss assistance 
1	2.63158	government project development fund support work infrastructure rural budget build plan year building funding ministry complete road area construction improve 
2	2.63158	woman community training program support youth work young child programme train participant important awareness skill family activity violence knowledge role 
3	2.63158	court case charge accuse year allege man public victim high prosecution matter allegedly face yesterday hear order trial evidence represent 
4	2.63158	work business service local country opportunity tourism year provide australian worker job good islander continue time add experience support offer 
5	2.63158	covid country case border test vaccine public 

In [14]:
best_model = model_list[np.argmax(coherence_values)]
best_model.print_topics()

[(0,
  '0.030*"business" + 0.023*"country" + 0.022*"market" + 0.022*"increase" + 0.015*"year" + 0.015*"tourism" + 0.014*"percent" + 0.013*"economy" + 0.012*"economic" + 0.012*"industry"'),
 (1,
  '0.052*"government" + 0.030*"land" + 0.024*"parliament" + 0.019*"decision" + 0.018*"member" + 0.018*"issue" + 0.015*"group" + 0.015*"meeting" + 0.013*"bill" + 0.012*"minister"'),
 (2,
  '0.032*"development" + 0.026*"country" + 0.021*"policy" + 0.014*"develop" + 0.013*"region" + 0.012*"regional" + 0.012*"economic" + 0.011*"resource" + 0.010*"key" + 0.010*"plan"'),
 (3,
  '0.031*"water" + 0.022*"island" + 0.022*"area" + 0.018*"people" + 0.017*"food" + 0.017*"community" + 0.014*"disaster" + 0.011*"affect" + 0.009*"supply" + 0.008*"environment"'),
 (4,
  '0.026*"family" + 0.026*"people" + 0.019*"time" + 0.017*"home" + 0.015*"leave" + 0.015*"man" + 0.013*"day" + 0.013*"village" + 0.012*"late" + 0.012*"life"'),
 (5,
  '0.055*"woman" + 0.044*"community" + 0.032*"training" + 0.024*"program" + 0.020*"w

In [17]:
topics_df = pd.DataFrame()
for idx, val in enumerate(best_model.load_document_topics()):
    temp_df = pd.DataFrame(val)
    if topics_df.empty:
        topics_df = temp_df
    else:
        topics_df = pd.concat([topics_df, temp_df], axis=1)

KeyboardInterrupt: 

In [33]:
gensim.__version__

'4.3.2'

In [32]:
train_mlt_path = os.getcwd() + "/../mallet/mallet-2.0.8/bin/mallet"

model = gensim.models.wrappers.load(train_mlt_path)

AttributeError: module 'gensim.models.wrappers' has no attribute 'load'

In [16]:
import pyLDAvis.gensim_models as gensimvis
import pyLDAvis

mallet_lda_model = gensim.models.wrappers.ldamallet.malletmodel2ldamodel(best_model)
vis_data = gensimvis.prepare(mallet_lda_model, corpus, id2word)
ldaviz_path = os.getcwd() + "/data/text/solomon_islands/st_ldavis.html"
pyLDAvis.save_html(vis_data, ldaviz_path)
pyLDAvis.display(vis_data)

  result = func(self.values, **kwargs)
  result = func(self.values, **kwargs)
  result = func(self.values, **kwargs)


In [18]:
topics_df_new = topics_df.drop(0, axis=1).T.reset_index(drop=True)
topics_df_new["dominant_topic"] = [np.argmax(row) for _, row in topics_df_new.iterrows()]
inclu_indexs = topics_df_new[topics_df_new.dominant_topic.isin([0, 13])].index.to_list()

In [19]:
busecon_news = news.iloc[inclu_indexs].reset_index(drop=True)
busecon_news

Unnamed: 0,url,title,date,news
0,https://www.solomonstarnews.com/taro-airport-u...,"Taro airport upgrade steps up, flights to be s...",2023-08-04,\nALL Solomon Airlines flights to Taro Airport...
1,https://www.solomonstarnews.com/hcc-chc-submit...,"HCC, CHC submit multipurpose hall proposal to ...",2023-08-03,\nTHE Honiara City Council (HCC) and Central H...
2,https://www.solomonstarnews.com/in-court-with-...,In court with Assumpta,2023-08-03,\nHenderson murder trial moved to Friday\nTHE ...
3,https://www.solomonstarnews.com/bus-stabber-aw...,Bus stabber awaits sentence,2023-08-03,\nTHE young man who allegedly stole another ma...
4,https://www.solomonstarnews.com/health-scandal...,HEALTH SCANDAL AWAITS TRIAL,2023-07-11,\nFIVE people charged in connection to the $10...
...,...,...,...,...
2004,https://www.solomonstarnews.com/murder-case-ad...,Murder case adjourned,2014-04-10,AN accused charged with murder following a kil...
2005,https://www.solomonstarnews.com/where-are-the-...,Where are the solicitors?,2014-04-10,THE Public Solicitor’s Office (PSO) has been r...
2006,https://www.solomonstarnews.com/accused-jailed...,Accused jailed for life,2014-04-11,A YOUNG man in his 20’s has been convicted of ...
2007,https://www.solomonstarnews.com/accused-asks-c...,Accused asks court to grant him bail,2014-04-11,A MAN who was remanded for simple larceny has ...


In [30]:
# ss_df = pd.DataFrame()
# for news in text_lst[:-1]:
#     news_lst = news.split("All rights reserved.")
#     header = news_lst[0]
#     header_lst = header.split("\n")
#     topic = header_lst[0]
#     sample = news_lst[-1].replace("\n", " ").strip()
    
#     sid = SentimentIntensityAnalyzer()
#     ss = sid.polarity_scores(sample)
#     ss_temp = pd.DataFrame(ss, columns=ss.keys(), index=[0])
#     date_pattern = r'\b\d{1,2} (?:January|February|March|April|May|June|July|August|September|October|November|December) \d{4}\b'
#     ss_temp["date"] = re.findall(date_pattern, c[0])[0]
#     ss_temp["text"] = sample
#     ss_df = pd.concat([ss_df, ss_temp], axis=0)

ss_df = pd.DataFrame()
for news in busecon_news.news:
    sample = news.replace("\n", " ").strip()
    
    sid = SentimentIntensityAnalyzer()
    ss = sid.polarity_scores(sample)
    ss_temp = pd.DataFrame(ss, columns=ss.keys(), index=[0])
    ss_df = pd.concat([ss_df, ss_temp], axis=0)

In [31]:
busecon_news["sentiment"] = ss_df["compound"].tolist()
busecon_news["date"] = pd.to_datetime(busecon_news["date"])

In [53]:
sent_by_month = (busecon_news.set_index("date").groupby(pd.Grouper(freq="m"))[["sentiment"]].mean().reset_index())
sent_by_month['date'] = (sent_by_month['date'].dt.floor('d') + pd.offsets.MonthEnd(0) - pd.offsets.MonthBegin(1))

In [54]:
png_infl_path = os.getcwd() + "/data/prices/outputs/2017/RTFP_country_2017_2022-08-22.csv"
png_infl = pd.read_csv(png_infl_path)
png_infl["date"] = pd.to_datetime(png_infl["date"], dayfirst=True)
png_infl_sent = png_infl.merge(sent_by_month, how="left", on="date")
png_infl_sent.head(5)

Unnamed: 0,Open,High,Low,Close,Inflation,country,ISO3,date,sentiment
0,0.997417,1.035504,0.960535,0.997725,,Papua New Guinea,PNG,2017-01-01,0.617313
1,0.998328,1.037044,0.960832,1.037044,,Papua New Guinea,PNG,2017-02-01,0.861838
2,1.029174,1.066052,0.992296,1.012038,,Papua New Guinea,PNG,2017-03-01,0.697121
3,1.019978,1.059257,0.9807,1.028941,,Papua New Guinea,PNG,2017-04-01,0.889195
4,1.018736,1.061383,0.976089,1.0093,,Papua New Guinea,PNG,2017-05-01,0.717284


In [90]:
sent_change = []
for i in range(len(png_infl_sent)):
    if i < 12:
        sent_change.append(np.NaN)
    else:
        a = (png_infl_sent["sentiment"][i] - png_infl_sent["sentiment"][i-12])/png_infl_sent["sentiment"][i-12]
        sent_change.append(a)

In [93]:
png_infl_sent["sent_change"] = sent_change

In [113]:
png_infl_sent["infl_diff"] = png_infl_sent["Close"].diff()
png_infl_sent["sent_diff"] = png_infl_sent["sentiment"].diff()

In [112]:
from scipy import signal
correlation = signal.correlate(c["sent_change"], c["Inflation"], mode="full")
lags = signal.correlation_lags(len(c["sent_change"]), len(c["Inflation"]), mode="full")
lag = lags[np.argmax(correlation)]
lag

27

In [118]:
import statsmodels.formula.api as smf
mod = smf.ols("infl_diff~sent_diff", data=png_infl_sent)
res = mod.fit(cov_type='HAC',cov_kwds={'maxlags':12})
res.summary()

0,1,2,3
Dep. Variable:,infl_diff,R-squared:,0.016
Model:,OLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,1.028
Date:,"Mon, 21 Aug 2023",Prob (F-statistic):,0.314
Time:,17:56:57,Log-Likelihood:,108.42
No. Observations:,67,AIC:,-212.8
Df Residuals:,65,BIC:,-208.4
Df Model:,1,,
Covariance Type:,HAC,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0048,0.003,1.422,0.155,-0.002,0.011
sent_diff,-0.0225,0.022,-1.014,0.311,-0.066,0.021

0,1,2,3
Omnibus:,0.098,Durbin-Watson:,2.491
Prob(Omnibus):,0.952,Jarque-Bera (JB):,0.062
Skew:,-0.062,Prob(JB):,0.969
Kurtosis:,2.917,Cond. No.,3.69
