In [9]:
import os
import glob
import pathlib
import pandas as pd
import numpy as np
import joblib
from sklearn.feature_extraction.text import TfidfVectorizer
import matplotlib.pyplot as plt
import string
import nltk
string.punctuation

'!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'

In [10]:
# defining function that contains punctuation removal
def remove_punctuation(text):
    no_punct = "".join([c for c in text if c not in string.punctuation])
    return no_punct

## Extracting files


In [11]:
def extract_files(folder_path):
    csv_files = glob.glob(os.path.join(folder_path, "*.csv"))
    dataframes = []

    for file in csv_files:
        df = pd.read_csv(file)
        dataframes.append(df)
    return dataframes


extracted = extract_files("Topic_modelling")

In [12]:
alldata = pd.DataFrame()
for listitems in extracted:
    alldata = pd.concat([alldata, listitems])

## Feature extraction

In [13]:
alldata.head()

Unnamed: 0,headlines,description,content,url,category
0,Nirmala Sitharaman to equal Morarji Desai’s re...,With the presentation of the interim budget on...,"Sitharaman, the first full-time woman finance ...",https://indianexpress.com/article/business/bud...,business
1,"‘Will densify network, want to be at least no....","'In terms of market share, we aim to double it...",The merger of Tata group’s budget airlines Air...,https://indianexpress.com/article/business/avi...,business
2,Air India group to induct an aircraft every si...,Air India currently has 117 operational aircra...,The Air India group plans to induct one aircra...,https://indianexpress.com/article/business/avi...,business
3,Red Sea woes: Exporters seek increased credit ...,Rising attacks forced shippers to consider the...,Indian exporters have asked the central govern...,https://indianexpress.com/article/business/red...,business
4,Air India group to induct a plane every 6 days...,"Apart from fleet expansion, 2024 will also see...",The Air India group plans to induct one aircra...,https://indianexpress.com/article/business/avi...,business


Remove punctuation, numbers and stopwords from the text. Then, apply stemming to the words.

In [14]:
alldata["remove_puntuations"] = alldata["headlines"].apply(lambda x: remove_punctuation(x))
alldata["to_lower"] = alldata["remove_puntuations"].apply(lambda x: x.lower())

### BerTopic Modeling


In [32]:
topic_model_df = pd.DataFrame()
topic_model_df["headlines"] = alldata["to_lower"]

In [21]:
# Load api key
import os
from dotenv import load_dotenv

load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")

print(api_key)

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


In [50]:
import openai
from bertopic import BERTopic
from bertopic.representation import KeyBERTInspired
from sentence_transformers import SentenceTransformer
from hdbscan import HDBSCAN
from umap import UMAP


client = openai.OpenAI(api_key=api_key)

# Define the models
representation_model = KeyBERTInspired()
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
umap_model = UMAP(n_neighbors=15, n_components=5, min_dist=0.0, 
                 metric='cosine', random_state=42)
hdbscan_model = HDBSCAN(min_cluster_size=150, metric='euclidean',
                        cluster_selection_method='eom', 
                        prediction_data=True)

# NOTE: A higher min_cluster_size will lead to a more conservative topic extraction

In [42]:
from sklearn.feature_extraction.text import CountVectorizer
vectorizer_model = CountVectorizer(stop_words="english", ngram_range=(1, 2))

In [51]:
# Train the model
model = BERTopic(
    embedding_model=embedding_model,
    umap_model=umap_model,
    hdbscan_model=hdbscan_model,
    vectorizer_model=vectorizer_model,
    representation_model=representation_model,

    top_n_words=10,
    verbose=True
)

topics, probs = model.fit_transform(topic_model_df["headlines"])

2024-10-08 19:16:42,089 - BERTopic - Embedding - Transforming documents to embeddings.


Batches:   0%|          | 0/313 [00:00<?, ?it/s]

2024-10-08 19:18:24,246 - BERTopic - Embedding - Completed ✓
2024-10-08 19:18:24,246 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-10-08 19:19:49,093 - BERTopic - Dimensionality - Completed ✓
2024-10-08 19:19:49,097 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-10-08 19:19:50,976 - BERTopic - Cluster - Completed ✓
2024-10-08 19:19:50,992 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-10-08 19:19:54,975 - BERTopic - Representation - Completed ✓


In [1]:
model.get_topic_info().head(10)

NameError: name 'model' is not defined

In [22]:
model.get_topic_info().head(20)

Unnamed: 0,Topic,Count,Name,Representation,Representative_Docs
0,-1,810,-1_india_rate_collection_day,"[india, rate, collection, day, dollar, rupee, ...",[salaar box office collection day 3 early repo...
1,0,1976,0_2023_student_exam_2024,"[2023, student, exam, 2024, university, ug, ii...",[neet pg 2023 counselling registration round 2...
2,1,1972,1_khan_film_say_kapoor,"[khan, film, say, kapoor, shah, rukh, animal, ...",[dunki box office collection day 1 early repor...
3,2,1657,2_ai_apple_google_new,"[ai, apple, google, new, tech, launch, feature...","[use google new gemini ai bard pixel 8 pro, te..."
4,3,1320,3_india_rbi_crore_bank,"[india, rbi, crore, bank, profit, rise, growth...",[federal bank net profit june quarter rise 29 ...
5,4,1230,4_india_test_australia_ind,"[india, test, australia, ind, world, cup, au, ...",[india woman v england woman live streaming t2...
6,5,392,5_ronaldo_messi_united_cristiano,"[ronaldo, messi, united, cristiano, manchester...",[saudi pro league al nassr v al wehda watch cr...
7,6,288,6_sensex_nifty_stock_pt,"[sensex, nifty, stock, pt, market, hdfc, high,...","[nifty record high sensex jump 300 point, stoc..."
8,7,261,7_isro_moon_space_mission,"[isro, moon, space, mission, nasa, chandrayaan...",[moon isro turn sun india 1st solar mission lo...
9,8,94,8_australian_open_djokovic_novak,"[australian, open, djokovic, novak, ukraine, s...",[djokovic pound paul reach 10th australian ope...


In [23]:
model.reduce_topics(alldata["unlist"], nr_topics=5)


2024-07-28 21:36:10,615 - BERTopic - Topic reduction - Reducing number of topics
2024-07-28 21:36:10,937 - BERTopic - Topic reduction - Reduced number of topics from 10 to 5


<bertopic._bertopic.BERTopic at 0x293043462d0>

In [24]:
model.get_topic_info()

Unnamed: 0,Topic,Count,Name,Representation,Representative_Docs
0,-1,810,-1_india_2023_day_collection,"[india, 2023, day, collection, rate, box, doll...",[salaar box office collection day 3 early repo...
1,0,3948,0_2023_khan_say_film,"[2023, khan, say, film, student, exam, 2024, u...",[animal box office collection day 15 early rep...
2,1,3265,1_ai_india_new_apple,"[ai, india, new, apple, google, bank, rbi, sen...",[google working new ai assistant pixel 9 repor...
3,2,1716,2_india_test_cup_world,"[india, test, cup, world, australia, ind, watc...",[india unbeaten run woman u19 t20 world cup en...
4,3,261,3_isro_moon_space_mission,"[isro, moon, space, mission, nasa, chandrayaan...",[australia launch lunar rover nasa artemis mis...


In [25]:
topic_labels = {0: "Education", 1: "Business", 2: "Sports", 3: "Technology", 4: "Entertainment"}
model.set_topic_labels(topic_labels)

In [26]:
model.get_topic_info().head(10)

Unnamed: 0,Topic,Count,Name,CustomName,Representation,Representative_Docs
0,-1,810,-1_india_2023_day_collection,-1_india_2023_day_collection,"[india, 2023, day, collection, rate, box, doll...",[salaar box office collection day 3 early repo...
1,0,3948,0_2023_khan_say_film,Education,"[2023, khan, say, film, student, exam, 2024, u...",[animal box office collection day 15 early rep...
2,1,3265,1_ai_india_new_apple,Business,"[ai, india, new, apple, google, bank, rbi, sen...",[google working new ai assistant pixel 9 repor...
3,2,1716,2_india_test_cup_world,Sports,"[india, test, cup, world, australia, ind, watc...",[india unbeaten run woman u19 t20 world cup en...
4,3,261,3_isro_moon_space_mission,Technology,"[isro, moon, space, mission, nasa, chandrayaan...",[australia launch lunar rover nasa artemis mis...


In [27]:
new_df = pd.DataFrame((model.get_document_info(alldata["unlist"])))
new_df.head()

Unnamed: 0,Document,Topic,Name,CustomName,Representation,Representative_Docs,Top_n_words,Probability,Representative_document
0,nirmala sitharaman equal morarji desai record ...,-1,-1_india_2023_day_collection,-1_india_2023_day_collection,"[india, 2023, day, collection, rate, box, doll...",[salaar box office collection day 3 early repo...,india - 2023 - day - collection - rate - box -...,0.835612,False
1,densify network want least 2 city pair air in...,-1,-1_india_2023_day_collection,-1_india_2023_day_collection,"[india, 2023, day, collection, rate, box, doll...",[salaar box office collection day 3 early repo...,india - 2023 - day - collection - rate - box -...,0.529844,False
2,air india group induct aircraft every six day ...,1,1_ai_india_new_apple,Business,"[ai, india, new, apple, google, bank, rbi, sen...",[google working new ai assistant pixel 9 repor...,ai - india - new - apple - google - bank - rbi...,0.591328,False
3,red sea woe exporter seek increased credit fre...,1,1_ai_india_new_apple,Business,"[ai, india, new, apple, google, bank, rbi, sen...",[google working new ai assistant pixel 9 repor...,ai - india - new - apple - google - bank - rbi...,0.662667,False
4,air india group induct plane every 6 day throu...,1,1_ai_india_new_apple,Business,"[ai, india, new, apple, google, bank, rbi, sen...",[google working new ai assistant pixel 9 repor...,ai - india - new - apple - google - bank - rbi...,0.594366,False


In [33]:
select_df = new_df[['CustomName', 'Probability', 'Document']]
transfromed_df = select_df.pivot_table(index='Document', columns='CustomName', values='Probability', aggfunc='mean').reset_index()

In [50]:
transfromed_df.rename(columns={"Document": "unlist"}, inplace=True) 
transfromed_df_merged = pd.merge(transfromed_df, alldata, on=["unlist"], how='inner')

In [52]:
transfromed_df_merged.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 16 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   unlist                        10000 non-null  object 
 1   -1_india_2023_day_collection  810 non-null    float64
 2   Business                      3265 non-null   float64
 3   Education                     3948 non-null   float64
 4   Sports                        1716 non-null   float64
 5   Technology                    261 non-null    float64
 6   headlines                     10000 non-null  object 
 7   description                   10000 non-null  object 
 8   content                       10000 non-null  object 
 9   url                           10000 non-null  object 
 10  category                      10000 non-null  object 
 11  remove_puntuations            10000 non-null  object 
 12  to_lower                      10000 non-null  object 
 13  to

In [51]:
# customize topic labels 
topic_labels = model.generate_topic_labels(nr_words=5, topic_prefix=False,
                                            word_length=5, separator=" ")
model.set_topic_labels(topic_labels)
freq = model.get_topic_info()
freq.head(10)

Unnamed: 0,Topic,Count,Name,CustomName,Representation,Representative_Docs
0,-1,1451,-1_india_rs_us_crore,india rs us crore oil,"[india, rs, us, crore, oil, dollar, rupee, gov...",[Rupee slips 2 paise to 83.25 against US dolla...
1,0,2010,0_khan_film_says_kapoor,khan film says kapoo shah,"[khan, film, says, kapoor, shah, rukh, animal,...",[Dunki box office collection Day 2 early repor...
2,1,1945,1_vs_india_cup_test,vs india cup test austr,"[vs, india, cup, test, australia, world, ind, ...","[India vs England Live Streaming, 1st Test: Wh..."
3,2,1858,2_2023_iit_2024_ug,2023 iit 2024 ug check,"[2023, iit, 2024, ug, check, students, univers...",[DU Admissions 2023: Check top ranking college...
4,3,1057,3_apple_google_samsung_pro,apple googl samsu pro ai,"[apple, google, samsung, pro, ai, galaxy, new,...",[Google Chrome is getting 3 new AI features: H...
5,4,339,4_profit_rs_sebi_crore,profi rs sebi crore net,"[profit, rs, sebi, crore, net, adani, shares, ...","[Wipro Q1 net profit rises 12% to Rs 2,870 cro..."
6,5,328,5_ai_openai_altman_ceo,ai opena altma ceo sam,"[ai, openai, altman, ceo, sam, tech, microsoft...",[Google in talks to invest in AI startup Chara...
7,6,230,6_moon_space_chandrayaan_nasa,moon space chand nasa missi,"[moon, space, chandrayaan, nasa, mission, isro...",[Chandrayaan-3 lander Vikram is now a landmark...
8,7,227,7_rbi_bank_inflation_banks,rbi bank infla banks rate,"[rbi, bank, inflation, banks, rate, governor, ...",[Rate cuts not even under discussion: RBI Gove...
9,8,182,8_sensex_nifty_pts_19,sense nifty pts 19 stock,"[sensex, nifty, pts, 19, stocks, markets, gain...","[Sensex slumps 440 pts, Nifty closes at 19,699..."


In [57]:
model.merge_topics(alldata["to_lower"], topics_to_merge=[[3, 5], [4, 6, 7]])
model.get_topic_info().head(10)

Unnamed: 0,Topic,Count,Name,Representation,Representative_Docs
0,-1,854,-1_dollar_rupee_2023_us,"[dollar, rupee, 2023, us, new, paise, india, 2...",[rupee falls 9 paise to settle at 8327 against...
1,0,1986,0_khan_film_says_kapoor,"[khan, film, says, kapoor, shah, rukh, animal,...",[dunki box office collection day 15 shah rukh ...
2,1,1916,1_vs_india_cup_test,"[vs, india, cup, test, world, australia, ind, ...",[aus vs sa live streaming women’s t20 world cu...
3,2,1893,2_2023_2024_ug_check,"[2023, 2024, ug, check, iit, university, stude...",[du admissions 2023 check top ranking colleges...
4,3,1841,3_rs_india_rbi_sensex,"[rs, india, rbi, sensex, crore, bank, nifty, p...",[sensex nifty jump over 1 pc on rally in globa...
5,4,1510,4_ai_apple_google_new,"[ai, apple, google, new, samsung, pro, tech, g...",[tech news today oneplus ai music festival 202...


In [58]:
#custome topic labels
topic_labels = {0: "Entertainment", 1: "Sports", 2: "Education", 3: "Business", 4: "Technology"}
model.set_topic_labels(topic_labels)
model.get_topic_info().head(6)

Unnamed: 0,Topic,Count,Name,CustomName,Representation,Representative_Docs
0,-1,854,-1_dollar_rupee_2023_us,-1_dollar_rupee_2023_us,"[dollar, rupee, 2023, us, new, paise, india, 2...",[rupee falls 9 paise to settle at 8327 against...
1,0,1986,0_khan_film_says_kapoor,Entertainment,"[khan, film, says, kapoor, shah, rukh, animal,...",[dunki box office collection day 15 shah rukh ...
2,1,1916,1_vs_india_cup_test,Sports,"[vs, india, cup, test, world, australia, ind, ...",[aus vs sa live streaming women’s t20 world cu...
3,2,1893,2_2023_2024_ug_check,Education,"[2023, 2024, ug, check, iit, university, stude...",[du admissions 2023 check top ranking colleges...
4,3,1841,3_rs_india_rbi_sensex,Business,"[rs, india, rbi, sensex, crore, bank, nifty, p...",[sensex nifty jump over 1 pc on rally in globa...
5,4,1510,4_ai_apple_google_new,Technology,"[ai, apple, google, new, samsung, pro, tech, g...",[tech news today oneplus ai music festival 202...


In [35]:
model.visualize_barchart()

In [61]:
model.visualize_distribution(probs[4])


In [67]:
model.reduce_topics(alldata["to_lower"], nr_topics=4)


2024-07-28 13:18:31,843 - BERTopic - Topic reduction - Reducing number of topics


2024-07-28 13:18:32,497 - BERTopic - Topic reduction - Reduced number of topics from 5 to 4


<bertopic._bertopic.BERTopic at 0x150630e7850>

In [61]:
new_df_topics = model.get_document_info(alldata["to_lower"])
new_df_topics = new_df_topics[new_df_topics["Topic"] != -1]

In [62]:
new_df_topics.head()

Unnamed: 0,Document,Topic,Name,CustomName,Representation,Representative_Docs,Top_n_words,Probability,Representative_document
2,air india group to induct an aircraft every si...,1,1_ai_india_new_apple,Business,"[ai, india, new, apple, google, bank, rbi, sen...",[google working new ai assistant pixel 9 repor...,ai - india - new - apple - google - bank - rbi...,0.591328,False
3,red sea woes exporters seek increased credit a...,1,1_ai_india_new_apple,Business,"[ai, india, new, apple, google, bank, rbi, sen...",[google working new ai assistant pixel 9 repor...,ai - india - new - apple - google - bank - rbi...,0.662667,False
4,air india group to induct a plane every 6 days...,1,1_ai_india_new_apple,Business,"[ai, india, new, apple, google, bank, rbi, sen...",[google working new ai assistant pixel 9 repor...,ai - india - new - apple - google - bank - rbi...,0.594366,False
5,q3 earnings results jsw steel pnb acc report m...,1,1_ai_india_new_apple,Business,"[ai, india, new, apple, google, bank, rbi, sen...",[google working new ai assistant pixel 9 repor...,ai - india - new - apple - google - bank - rbi...,0.967997,False
6,blackstone’s fourthquarter earnings rise 4 as ...,1,1_ai_india_new_apple,Business,"[ai, india, new, apple, google, bank, rbi, sen...",[google working new ai assistant pixel 9 repor...,ai - india - new - apple - google - bank - rbi...,0.469965,False


In [70]:
df_longer = new_df_topics.pivot_table(index=['Document', 'CustomName'], columns='Name', values='Probability', aggfunc='mean').reset_index()

In [72]:
df_longer.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9178 entries, 0 to 9177
Data columns (total 6 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Document                   9178 non-null   object 
 1   CustomName                 9178 non-null   object 
 2   0_2023_khan_say_film       3937 non-null   float64
 3   1_ai_india_new_apple       3265 non-null   float64
 4   2_india_test_cup_world     1715 non-null   float64
 5   3_isro_moon_space_mission  261 non-null    float64
dtypes: float64(4), object(2)
memory usage: 430.3+ KB


In [73]:
num_attributes = df_longer.select_dtypes(include="float64") 

In [88]:
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder

num_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy="median")),
    ('std_scaler', StandardScaler()),
])

num_attribs = ['0_2023_khan_say_film', '1_ai_india_new_apple', 
                '2_india_test_cup_world', '3_isro_moon_space_mission']

In [89]:
column_transformer = ColumnTransformer([
    ("num", num_pipeline, num_attribs),
])

df_prepared = column_transformer.fit_transform(df_longer)

In [90]:
df_prepared

array([[ 0.03486436,  2.59066604,  0.08395283, -0.02345788],
       [ 0.67449767, -0.09774444,  0.08395283, -0.02345788],
       [ 0.03486436, -0.09774444,  0.08395283, -4.06485282],
       ...,
       [ 0.03486436, -0.09774444,  1.85824603, -0.02345788],
       [ 0.03486436, -0.09774444,  1.30898985, -0.02345788],
       [-2.1314458 , -0.09774444,  0.08395283, -0.02345788]])

In [95]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(df_prepared, df_longer["CustomName"], test_size = 0.3, random_state=42)

In [99]:
from sklearn.multiclass import OneVsRestClassifier
from sklearn.ensemble import RandomForestClassifier

multi_clf = OneVsRestClassifier(estimator=RandomForestClassifier(n_estimators=500, max_depth=10, random_state=42))

In [100]:
mod_fit = multi_clf.fit(x_train, y_train)
#df_longer["CustomName"].value_counts()

In [104]:
df_longer["CustomName"].value_counts()

CustomName
Education     3937
Business      3265
Sports        1715
Technology     261
Name: count, dtype: int64

In [103]:
from sklearn.metrics import accuracy_score
modpreds = mod_fit.predict(x_test)
accscore = accuracy_score(y_test, modpreds)

from sklearn.metrics import classification_report
cm_ovr = classification_report(y_test, modpreds)
print(cm_ovr)
print(accscore)

              precision    recall  f1-score   support

    Business       1.00      1.00      1.00       999
   Education       1.00      1.00      1.00      1177
      Sports       1.00      1.00      1.00       498
  Technology       1.00      0.99      0.99        80

    accuracy                           1.00      2754
   macro avg       1.00      1.00      1.00      2754
weighted avg       1.00      1.00      1.00      2754

0.9992737835875091


In [6]:
# pre-defined topics
topic_labels = ["Education", "Business", "Sports", "Technology", "Entertainment"]

In [10]:
from bertopic import BERTopic
from bertopic.representation import KeyBERTInspired

topic_model = BERTopic(
    embedding_model="thenlper/gte-small",
    min_topic_size=15,
    zeroshot_topic_list = topic_labels,
    zeroshot_min_similarity=0.85,
    representation_model=KeyBERTInspired()
)

topics, probs = topic_model.fit_transform(alldata["to_lower"])

In [11]:
topic_model.get_topic_info()

Unnamed: 0,Topic,Count,Name,Representation,Representative_Docs
0,-1,3739,-1_kohli_cricket_india_delhi,"[kohli, cricket, india, delhi, indian, out, wa...",[‘such a nostalgic feeling’ virat kohli excite...
1,0,445,Technology,"[messi, ronaldo, fifa, mbappe, psg, neymar, ue...",[lionel messi to be available for champions le...
2,1,287,1_moon_lunar_isro_nasa,"[moon, lunar, isro, nasa, spacecraft, chandray...",[moon mission done isro aims for the sun with ...
3,2,194,2_sensex_stocks_nifty_rally,"[sensex, stocks, nifty, rally, stock, shares, ...",[sensex gains 149 pts nifty closes above 19630...
4,3,140,3_iphone_apple_iphones_ios,"[iphone, apple, iphones, ios, macbook, pro, ma...",[apple might fix the iphone 15 pro pro max hea...
...,...,...,...,...,...
124,123,17,123_hoaxes_credibility_disinformation_credible,"[hoaxes, credibility, disinformation, credible...",[facts about fiction from hashtags to hoaxes u...
125,124,17,124_died_actor_dies_dead,"[died, actor, dies, dead, starred, cancer, die...",[lee sunkyun death parasite actor underwent 19...
126,125,16,125_iisc_iitmadras_iit_,"[iisc, iitmadras, iit, , , , , , , ]",[iit jam 2024 registration deadline extended t...
127,126,15,126_iit_iitjodhpur_campuses_iitgandhinagar,"[iit, iitjodhpur, campuses, iitgandhinagar, ca...",[iit bombay university of chicago join hands t...


In [28]:
similar_topics, similarity = topic_model.find_topics("Education", top_n = 5)
topic_model.get_topic(similar_topics[0])

[('educators', 0.868628),
 ('education', 0.86747456),
 ('teachers', 0.8667326),
 ('teacher', 0.8649107),
 ('celebrated', 0.8604661),
 ('school', 0.8512543),
 ('unesco', 0.8446436),
 ('celebrate', 0.8444351),
 ('schools', 0.84291387),
 ('international', 0.83929527)]

In [29]:
topic_model.get_topic(0)

[('messi', 0.9273595),
 ('ronaldo', 0.926033),
 ('fifa', 0.8910235),
 ('mbappe', 0.87924504),
 ('psg', 0.87605125),
 ('neymar', 0.8741586),
 ('uefa', 0.87190783),
 ('cristiano', 0.86707425),
 ('juventus', 0.86587703),
 ('guardiola', 0.86341494)]