# ProdLDA

In [1]:
from octis.models.LDA import LDA
from octis.dataset.dataset import Dataset
from octis.evaluation_metrics.diversity_metrics import TopicDiversity
from octis.evaluation_metrics.coherence_metrics import Coherence

In [3]:
dataset_save = r'C:\Users\franc\Desktop\PROJECTS\PROJECTS\RASTA\dataset'

In [4]:
dataset = Dataset()
dataset.load_custom_dataset_from_folder(dataset_save)

In [7]:
dataset.get_corpus()

[['qui',
  'possiamo',
  'ammirare',
  'resti',
  'porta',
  'accesso',
  'antico',
  'maniero',
  'costruito',
  'epoca',
  'longobarda'],
 ['posizione',
  'dominante',
  'valle',
  'risacco',
  'val',
  'rasina',
  'rispettivamente',
  'affluente',
  'chiascio',
  'torrente',
  'proveniente',
  'gualdo',
  'tadino',
  'casacastalda',
  'abitato',
  'originato',
  'castello',
  'fondato',
  'secolo',
  'longobardo',
  'discendente',
  'totila',
  'ernero',
  'castaldo',
  'nome',
  'casa',
  'castaldo'],
 ['collegiata',
  'santa',
  'maria',
  'assunta',
  'risale',
  'stile',
  'aspetto',
  'attuale',
  'seconda',
  'meta'],
 ['unica',
  'navata',
  'ampia',
  'luminosa',
  'due',
  'nicchioni',
  'due',
  'cappelle',
  'soffitto',
  'altissimo',
  'travatura',
  'capriata',
  'dedicata',
  'patrona',
  'santa',
  'marina',
  'monaca'],
 ['contesto',
  'urbano',
  'datazione',
  'secolo',
  'chiesa',
  'san',
  'francesco',
  'trova',
  'san',
  'gemini',
  'piazza',
  'san',
  'fran

## Train a Model

In [9]:
from octis.models.pytorchavitm.AVITM import AVITM
from skopt.space.space import Real
model = AVITM(model_type='prodLDA')
model.partitioning(False)

In [10]:
model_output = model.train_model(dataset)   # train the model

  from .autonotebook import tqdm as notebook_tqdm


Epoch: [1/100]	Samples: [573/57300]	Train Loss: 131.86495657449825	Time: 0:00:00.318002
Epoch: [2/100]	Samples: [1146/57300]	Train Loss: 130.70423279341188	Time: 0:00:00.133164
Epoch: [3/100]	Samples: [1719/57300]	Train Loss: 130.83600567190226	Time: 0:00:00.142005
Epoch: [4/100]	Samples: [2292/57300]	Train Loss: 130.30852216950262	Time: 0:00:00.135992
Epoch: [5/100]	Samples: [2865/57300]	Train Loss: 129.94285831151834	Time: 0:00:00.145003
Epoch: [6/100]	Samples: [3438/57300]	Train Loss: 129.70835974994546	Time: 0:00:00.128032
Epoch: [7/100]	Samples: [4011/57300]	Train Loss: 129.2185488928883	Time: 0:00:00.128985
Epoch: [8/100]	Samples: [4584/57300]	Train Loss: 128.7808094390816	Time: 0:00:00.135557
Epoch: [9/100]	Samples: [5157/57300]	Train Loss: 128.23751516824825	Time: 0:00:00.137000
Epoch: [10/100]	Samples: [5730/57300]	Train Loss: 127.79446341895724	Time: 0:00:00.125513
Epoch: [11/100]	Samples: [6303/57300]	Train Loss: 127.18818342332024	Time: 0:00:00.139003
Epoch: [12/100]	Sample

In [11]:
npmi = Coherence(texts=dataset.get_corpus(), topk=10, measure='c_npmi')
topic_diversity = TopicDiversity(topk=10)

In [12]:
# Retrieve metrics score
topic_diversity_score = topic_diversity.score(model_output)
print("Topic diversity: "+str(topic_diversity_score))

npmi_score = npmi.score(model_output)
print("Coherence: "+str(npmi_score))

Topic diversity: 0.97
Coherence: -0.23193542950394858


In [14]:
from octis.models.CTM import CTM
from octis.dataset.dataset import Dataset
from octis.optimization.optimizer import Optimizer
from skopt.space.space import Real, Categorical, Integer
from octis.evaluation_metrics.coherence_metrics import Coherence
from octis.evaluation_metrics.diversity_metrics import TopicDiversity

In [15]:
search_space = {"num_layers": Categorical({1, 2, 3}),
                "num_neurons": Categorical({100, 200, 300}),
                "activation": Categorical({'sigmoid', 'relu', 'softplus'}),
                "dropout": Real(0.0, 0.95),
                "num_topics": Integer(low=5,high=15)}

optimization_runs=30
model_runs=1

## Ottimizzazione Topic Diversity

In [32]:
save_path_TD = r'C:\Users\franc\Desktop\PROJECTS\PROJECTS\RASTA\risultati\ProdLDA\result_TD'
save_path_npmi = r'C:\Users\franc\Desktop\PROJECTS\PROJECTS\RASTA\risultati\ProdLDA\result_npmi'

In [18]:
optimizer=Optimizer()
optimization_result = optimizer.optimize(
    model, dataset, topic_diversity, search_space,
    number_of_call=optimization_runs,
    model_runs=model_runs, save_models=False,
    extra_metrics=None, # to keep track of other metrics
    save_path= save_path_TD)

Current call:  0
Epoch: [1/100]	Samples: [573/57300]	Train Loss: 132.02923722458553	Time: 0:00:00.144507
Epoch: [2/100]	Samples: [1146/57300]	Train Loss: 132.07824089223385	Time: 0:00:00.149148
Epoch: [3/100]	Samples: [1719/57300]	Train Loss: 131.76875494246292	Time: 0:00:00.152523
Epoch: [4/100]	Samples: [2292/57300]	Train Loss: 130.9397513770724	Time: 0:00:00.159190
Epoch: [5/100]	Samples: [2865/57300]	Train Loss: 131.776997777596	Time: 0:00:00.123011
Epoch: [6/100]	Samples: [3438/57300]	Train Loss: 130.68746932264398	Time: 0:00:00.126746
Epoch: [7/100]	Samples: [4011/57300]	Train Loss: 130.04545105939135	Time: 0:00:00.181997
Epoch: [8/100]	Samples: [4584/57300]	Train Loss: 129.4325430505563	Time: 0:00:00.212005
Epoch: [9/100]	Samples: [5157/57300]	Train Loss: 129.880205776887	Time: 0:00:00.251452
Epoch: [10/100]	Samples: [5730/57300]	Train Loss: 129.54601859047776	Time: 0:00:00.346617
Epoch: [11/100]	Samples: [6303/57300]	Train Loss: 129.16247409467715	Time: 0:00:00.264495
Epoch: [1

In [19]:
optimization_result.save_to_csv(r"C:\Users\franc\Desktop\PROJECTS\PROJECTS\RASTA\risultati\ProdLDA\result_TD\results.csv")

In [21]:
import pandas as pd

df = pd.read_csv(r"C:\Users\franc\Desktop\PROJECTS\PROJECTS\RASTA\risultati\ProdLDA\result_TD\results.csv")

# Trova l'indice del valore massimo di "mean" nella colonna "topic diversity"
indice_max_mean = df['Mean(model_runs)'].idxmax()

# Estrai i valori delle colonne corrispondenti ai parametri richiesti per l'indice trovato
parametri_max_mean = df.loc[indice_max_mean, ['Mean(model_runs)','num_iteration', 'activation', 'num_layers', 'num_neurons', 'num_topics']]

print("Parametri associati al valore massimo di mean nella colonna topic diversity:")
print(parametri_max_mean)

Parametri associati al valore massimo di mean nella colonna topic diversity:
Mean(model_runs)         1.0
num_iteration             12
activation          softplus
num_layers                 2
num_neurons              300
num_topics                 8
Name: 12, dtype: object


Creo il modello inserendo i valori dei parametri che ho ottenuto tramite l'ottimizzazione:

In [22]:
model = AVITM(model_type='prodLDA', activation='softplus',num_topics = 8, num_neurons = 300, num_layers=2)
model.partitioning(False)

In [23]:
output_TD = model.train_model(dataset)

Epoch: [1/100]	Samples: [573/57300]	Train Loss: 130.60300348358422	Time: 0:00:00.270175
Epoch: [2/100]	Samples: [1146/57300]	Train Loss: 129.835606866274	Time: 0:00:00.221669
Epoch: [3/100]	Samples: [1719/57300]	Train Loss: 129.01041411022032	Time: 0:00:00.237377
Epoch: [4/100]	Samples: [2292/57300]	Train Loss: 128.56853150905323	Time: 0:00:00.259178
Epoch: [5/100]	Samples: [2865/57300]	Train Loss: 128.49541458742365	Time: 0:00:00.361720
Epoch: [6/100]	Samples: [3438/57300]	Train Loss: 128.17615278686736	Time: 0:00:00.353977
Epoch: [7/100]	Samples: [4011/57300]	Train Loss: 127.24041076979712	Time: 0:00:00.412746
Epoch: [8/100]	Samples: [4584/57300]	Train Loss: 127.29181749700044	Time: 0:00:00.353266
Epoch: [9/100]	Samples: [5157/57300]	Train Loss: 126.4396423020288	Time: 0:00:00.253919
Epoch: [10/100]	Samples: [5730/57300]	Train Loss: 125.53677277623255	Time: 0:00:00.249437
Epoch: [11/100]	Samples: [6303/57300]	Train Loss: 124.48675675583551	Time: 0:00:00.234027
Epoch: [12/100]	Samples

In [24]:
for t in output_TD['topics']:
  print(" ".join(t))

pia collocato sotterranei ail find idraulica ingegneria vannucci opere the
san trova francesco piazza centro storico chiesa michele ancora lorenzo
storia domina citta scrigno storie denominazione prodotto origine torri spoletino
secolo xiv risalente romana edificata resti struttura xii sec xvi
via colle carsulae nota assisi santo flaminia parco ala sommita
medievale piccolo mondo scoperta lago natura adagiato caratteristico sottostante doc
santa assunta maria parrocchiale chiesa interno venne poco affreschi esempio
valle palazzo comune comunale situato tevere area localita antiquarium territorio


In [25]:
topics = pd.DataFrame(output_TD['topics'])
topics.to_excel(r'C:\Users\franc\Desktop\PROJECTS\PROJECTS\RASTA\risultati\ProdLDA\result_TD\topics.xlsx', index=False)  

In [26]:
vocabolario = []
with open(r'C:\Users\franc\Desktop\PROJECTS\PROJECTS\RASTA\dataset\vocabulary.txt', 'r') as file:
    for word in file:
        vocabolario.append(word)

In [27]:
# Carica la matrice topic-word in un DataFrame pandas
topic_word_matrix = pd.DataFrame(output_TD['topic-word-matrix'])

# Imposta i nomi delle colonne come parole

# Ottieni i primi k termini da ciascuna riga come nomi di colonna
topic_word_matrix.columns = [vocabolario[i] for i in range(topic_word_matrix.shape[1])]

# Stampa il DataFrame
print(topic_word_matrix)

topic_word_matrix.to_excel(r'C:\Users\franc\Desktop\PROJECTS\PROJECTS\RASTA\risultati\ProdLDA\result_TD\topic_word_matrix.xlsx', index=False) 

   abbadesse\n  abbandonato\n  abbastanza\n  abbazia\n  abbaziale\n   
0    -0.026093      -0.124558      0.005824  -0.124934     0.034755  \
1    -0.115687       0.037233     -0.073479   0.047550    -0.140429   
2    -0.029589       0.031188      0.127159  -0.134255     0.039829   
3    -0.090179      -0.231959     -0.060749   0.054376    -0.151687   
4     0.070450      -0.071514     -0.061662  -0.029781    -0.152562   
5     0.013269       0.059270      0.016363  -0.106958     0.095099   
6    -0.162645      -0.085813     -0.069295  -0.036984    -0.047083   
7    -0.036056      -0.038917     -0.044274   0.074219    -0.000217   

   abbazie\n  abitati\n  abitativo\n  abitato\n  abitazione\n  ...    with\n   
0  -0.161496  -0.004504     0.011173  -0.110068     -0.009985  ...  0.005624  \
1   0.000086  -0.076347    -0.099536   0.073355     -0.209877  ... -0.102753   
2  -0.000262   0.013646    -0.071359  -0.107009     -0.041286  ...  0.014122   
3  -0.127355  -0.206483     0.033158  -0

In [28]:
import pandas as pd

corpus_processed = pd.read_csv(r'C:\Users\franc\Desktop\PROJECTS\PROJECTS\RASTA\dataset\corpus.tsv', sep='\t', header=None)
corpus_processed
new_df = pd.DataFrame(corpus_processed[0])

In [29]:
num = 1
for topic in output_TD['topic-document-matrix']:
  for i in range(0,len(topic)):
    new_df.loc[i,num] = topic[i]
  num = num+1
new_df

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,qui possiamo ammirare resti porta accesso anti...,0.359493,0.093163,0.067093,0.098419,0.077834,0.135504,0.071000,0.097494
1,posizione dominante valle risacco val rasina r...,0.028920,0.026942,0.053369,0.392989,0.038080,0.040220,0.013605,0.405875
2,collegiata santa maria assunta risale stile as...,0.065434,0.046711,0.041081,0.152473,0.057138,0.052703,0.537642,0.046818
3,unica navata ampia luminosa due nicchioni due ...,0.040049,0.045211,0.052179,0.017634,0.026298,0.094379,0.655286,0.068964
4,contesto urbano datazione secolo chiesa san fr...,0.012909,0.773145,0.025775,0.053879,0.027757,0.014603,0.071059,0.020873
...,...,...,...,...,...,...,...,...,...
568,santuario madonna assunta cielo xii secolo sti...,0.091510,0.060723,0.076481,0.417081,0.043196,0.076715,0.167971,0.066322
569,antico castello medievale assedi miracoli reli...,0.097651,0.049919,0.393356,0.096686,0.127703,0.114641,0.034626,0.085418
570,splendido castello pupaggi comune sellano luog...,0.059648,0.060035,0.136986,0.059653,0.400636,0.118194,0.076941,0.087908
571,esistono due tipologie prosciutto norcia primo...,0.043511,0.043287,0.716577,0.058148,0.028619,0.017501,0.042003,0.050354


In [30]:
# la salvo
new_df.to_excel(r'C:\Users\franc\Desktop\PROJECTS\PROJECTS\RASTA\risultati\ProdLDA\result_TD\topic-document-matrix.xlsx', index=False)  

## Ottimizzazione Topic Coherence

In [33]:
optimizer=Optimizer()
optimization_result = optimizer.optimize(
    model, dataset, npmi, search_space,
    number_of_call=optimization_runs,
    model_runs=model_runs, save_models=False,
    extra_metrics=None, # to keep track of other metrics
    save_path= save_path_npmi)

Current call:  0
Epoch: [1/100]	Samples: [573/57300]	Train Loss: 132.22707378926702	Time: 0:00:00.138013
Epoch: [2/100]	Samples: [1146/57300]	Train Loss: 131.77745708578752	Time: 0:00:00.141642
Epoch: [3/100]	Samples: [1719/57300]	Train Loss: 130.82189323598385	Time: 0:00:00.122696
Epoch: [4/100]	Samples: [2292/57300]	Train Loss: 131.2626203234075	Time: 0:00:00.126626
Epoch: [5/100]	Samples: [2865/57300]	Train Loss: 130.94853532667975	Time: 0:00:00.147154
Epoch: [6/100]	Samples: [3438/57300]	Train Loss: 130.72738738001746	Time: 0:00:00.133643
Epoch: [7/100]	Samples: [4011/57300]	Train Loss: 130.54534965368674	Time: 0:00:00.137137
Epoch: [8/100]	Samples: [4584/57300]	Train Loss: 130.1945992514725	Time: 0:00:00.146669
Epoch: [9/100]	Samples: [5157/57300]	Train Loss: 129.6921978962151	Time: 0:00:00.145785
Epoch: [10/100]	Samples: [5730/57300]	Train Loss: 129.4759787780868	Time: 0:00:00.135579
Epoch: [11/100]	Samples: [6303/57300]	Train Loss: 130.07490132117147	Time: 0:00:00.128541
Epoch: 

In [34]:
optimization_result.save_to_csv(r"C:\Users\franc\Desktop\PROJECTS\PROJECTS\RASTA\risultati\ProdLDA\result_npmi\results.csv")

In [36]:
import pandas as pd

df = pd.read_csv(r"C:\Users\franc\Desktop\PROJECTS\PROJECTS\RASTA\risultati\ProdLDA\result_npmi\results.csv")

# Trova l'indice del valore massimo di "mean" nella colonna "topic diversity"
indice_max_mean = df['Mean(model_runs)'].idxmax()

# Estrai i valori delle colonne corrispondenti ai parametri richiesti per l'indice trovato
parametri_max_mean = df.loc[indice_max_mean, ['Mean(model_runs)','num_iteration', 'activation', 'num_layers', 'num_neurons', 'num_topics']]

print("Parametri associati al valore massimo di mean nella colonna topic coherence:")
print(parametri_max_mean)

Parametri associati al valore massimo di mean nella colonna topic coherence:
Mean(model_runs)   -0.080664
num_iteration             23
activation           sigmoid
num_layers                 2
num_neurons              300
num_topics                15
Name: 23, dtype: object


In [37]:
model = AVITM(model_type='prodLDA', activation='sigmoid',num_topics = 15, num_neurons = 300, num_layers=2)
model.partitioning(False)

In [38]:
output_npmi = model.train_model(dataset)

Epoch: [1/100]	Samples: [573/57300]	Train Loss: 135.5257561968259	Time: 0:00:00.217475
Epoch: [2/100]	Samples: [1146/57300]	Train Loss: 135.10235840695898	Time: 0:00:00.213053
Epoch: [3/100]	Samples: [1719/57300]	Train Loss: 134.79233389916013	Time: 0:00:00.231572
Epoch: [4/100]	Samples: [2292/57300]	Train Loss: 134.49025738301702	Time: 0:00:00.209054
Epoch: [5/100]	Samples: [2865/57300]	Train Loss: 134.2525871236911	Time: 0:00:00.216065
Epoch: [6/100]	Samples: [3438/57300]	Train Loss: 134.2799342822862	Time: 0:00:00.283743
Epoch: [7/100]	Samples: [4011/57300]	Train Loss: 133.68381530868237	Time: 0:00:00.362047
Epoch: [8/100]	Samples: [4584/57300]	Train Loss: 133.4240999604603	Time: 0:00:00.390079
Epoch: [9/100]	Samples: [5157/57300]	Train Loss: 133.12332467550175	Time: 0:00:00.432688
Epoch: [10/100]	Samples: [5730/57300]	Train Loss: 132.80272074061955	Time: 0:00:00.331653
Epoch: [11/100]	Samples: [6303/57300]	Train Loss: 132.0282529927465	Time: 0:00:00.234566
Epoch: [12/100]	Samples: 

In [39]:
for t in output_npmi['topics']:
  print(" ".join(t))

erge gualdo rocca molta fiume vegetazione scrigno palazzi probabilita scoperta
trasimeno frazione piccola villa lago deruta martana bacino insieme santuario
esempio castello costruita edificio unica architettura citta sorta piedi muro
area cultura famiglia importante acquasparta sale sicuramente pianta fabbrica penna
umbro tevere valle secondo necropoli fino perugino territorio confine sperandio
medievale borgo immerso piccolo arte ricco trevi contemporanea boschi generazione
caratteristiche rappresenta qualita ricca crocefisso anni luogo stato bosco fornaci
maria trova piazza palazzo interno perugia santa corso torgiano direzione
san francesco chiesa storico piazza centro patrono umbertide dedicata bernardino
maria via chiesa lungo santa assunta centro principale maggiore trova
xiv secolo facciata edificata convento xvi francescano sec risale parte
prima romana risale meta duomo mosaici casa affreschi stesso forma
sorge nord maria colle mura allainterno fuori abbazia rispetto bella
co

In [40]:
topics = pd.DataFrame(output_npmi['topics'])
topics.to_excel(r'C:\Users\franc\Desktop\PROJECTS\PROJECTS\RASTA\risultati\ProdLDA\result_npmi\topics.xlsx', index=False) 

In [41]:
# Carica la matrice topic-word in un DataFrame pandas
topic_word_matrix = pd.DataFrame(output_npmi['topic-word-matrix'])

# Imposta i nomi delle colonne come parole

# Ottieni i primi k termini da ciascuna riga come nomi di colonna
topic_word_matrix.columns = [vocabolario[i] for i in range(topic_word_matrix.shape[1])]

# Stampa il DataFrame
print(topic_word_matrix)

topic_word_matrix.to_excel(r'C:\Users\franc\Desktop\PROJECTS\PROJECTS\RASTA\risultati\ProdLDA\result_npmi\topic_word_matrix.xlsx', index=False) 

    abbadesse\n  abbandonato\n  abbastanza\n  abbazia\n  abbaziale\n   
0      0.003425      -0.024928     -0.033940  -0.128576    -0.226417  \
1     -0.076418      -0.019694     -0.072477   0.014551    -0.061548   
2     -0.027574      -0.080815     -0.084221  -0.123979    -0.024360   
3     -0.029755      -0.011120     -0.004791  -0.063177    -0.067469   
4     -0.090196      -0.078837     -0.050488  -0.089735    -0.070932   
5     -0.017808      -0.050526     -0.032213  -0.106317     0.031545   
6     -0.098527       0.101125     -0.081950  -0.027372     0.038221   
7     -0.098757      -0.069252     -0.088543   0.128010    -0.016425   
8     -0.058379      -0.173086     -0.120585  -0.096524    -0.146347   
9     -0.058646      -0.001901     -0.123043  -0.012424    -0.219626   
10    -0.235056      -0.103794     -0.153431   0.009980    -0.086495   
11    -0.146932      -0.187363     -0.086135   0.007184    -0.022914   
12     0.069998      -0.051499     -0.094972   0.148601    -0.03

In [42]:
import pandas as pd

corpus_processed = pd.read_csv(r'C:\Users\franc\Desktop\PROJECTS\PROJECTS\RASTA\dataset\corpus.tsv', sep='\t', header=None)
corpus_processed
new_df = pd.DataFrame(corpus_processed[0])

In [43]:
num = 1
for topic in output_npmi['topic-document-matrix']:
  for i in range(0,len(topic)):
    new_df.loc[i,num] = topic[i]
  num = num+1
new_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
0,qui possiamo ammirare resti porta accesso anti...,0.126240,0.027669,0.212060,0.024637,0.017494,0.036054,0.020139,0.021239,0.036659,0.019544,0.051226,0.029687,0.060542,0.241512,0.075297
1,posizione dominante valle risacco val rasina r...,0.007328,0.007278,0.245599,0.014284,0.057294,0.010677,0.012992,0.019949,0.013208,0.016165,0.006785,0.024680,0.548477,0.006402,0.008882
2,collegiata santa maria assunta risale stile as...,0.018270,0.024717,0.088641,0.028127,0.029446,0.040719,0.039858,0.042866,0.033456,0.046042,0.109590,0.346695,0.058011,0.054069,0.039494
3,unica navata ampia luminosa due nicchioni due ...,0.019545,0.023334,0.068011,0.015489,0.166819,0.016810,0.040455,0.062932,0.046940,0.022510,0.035775,0.170482,0.271383,0.027570,0.011944
4,contesto urbano datazione secolo chiesa san fr...,0.008362,0.020955,0.029277,0.008823,0.009474,0.009554,0.019914,0.010521,0.702304,0.042060,0.083068,0.012732,0.018910,0.009410,0.014636
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
568,santuario madonna assunta cielo xii secolo sti...,0.013831,0.034128,0.046819,0.030328,0.086913,0.022173,0.042749,0.019822,0.007886,0.028597,0.034214,0.564906,0.021560,0.026982,0.019093
569,antico castello medievale assedi miracoli reli...,0.124220,0.032751,0.022843,0.023836,0.097413,0.400576,0.021494,0.049005,0.029871,0.014854,0.024461,0.030319,0.050254,0.043582,0.034522
570,splendido castello pupaggi comune sellano luog...,0.020514,0.151488,0.415302,0.023780,0.064639,0.038046,0.044903,0.058620,0.020103,0.020184,0.024967,0.039185,0.026936,0.030601,0.020733
571,esistono due tipologie prosciutto norcia primo...,0.008400,0.008958,0.004823,0.004612,0.889078,0.015278,0.005942,0.007803,0.006374,0.005056,0.002076,0.017830,0.009372,0.007591,0.006807


In [44]:
new_df.to_excel(r'C:\Users\franc\Desktop\PROJECTS\PROJECTS\RASTA\risultati\ProdLDA\result_npmi\topic-document-matrix.xlsx', index=False)  