<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Libraries" data-toc-modified-id="Libraries-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Libraries</a></span></li><li><span><a href="#Data-Preprocessing" data-toc-modified-id="Data-Preprocessing-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Data Preprocessing</a></span></li><li><span><a href="#Extracting-tweets-from-SQL-database" data-toc-modified-id="Extracting-tweets-from-SQL-database-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Extracting tweets from SQL database</a></span><ul class="toc-item"><li><ul class="toc-item"><li><span><a href="#Word2vec" data-toc-modified-id="Word2vec-3.0.1"><span class="toc-item-num">3.0.1&nbsp;&nbsp;</span>Word2vec</a></span></li></ul></li></ul></li><li><span><a href="#Topic-Modeling" data-toc-modified-id="Topic-Modeling-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Topic Modeling</a></span></li></ul></div>

# Libraries 

    The following Libraries were used in the development of this project:

In [20]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import re
import spacy
import nltk
import pyodbc
from nltk.tokenize import RegexpTokenizer, WhitespaceTokenizer
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
import string
from string import punctuation
import collections
from collections import Counter
import json 

import nltk 


'''Natural Language Processing libraries'''
import nltk 
import gensim
import regex as re
import spacy
from spacy.lang.en import English
from nltk.tokenize import word_tokenize 
from nltk.corpus import stopwords
from gensim.models import Word2Vec
import gensim.downloader as api
import re, string, unicodedata
from nltk.stem import LancasterStemmer, WordNetLemmatizer
from nltk import sent_tokenize, word_tokenize
from gensim.models import FastText
from gensim.models import Word2Vec


import warnings
warnings.filterwarnings("ignore")

# Gensim
import gensim
import gensim.corpora as corpora
from gensim.utils import simple_preprocess
from gensim.models import CoherenceModel


# Plotting tools

import matplotlib.pyplot as plt

nltk.download('maxent_ne_chunker')
nltk.download('words')
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')




[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     C:\Users\afabi/nltk_data...
[nltk_data]   Package maxent_ne_chunker is already up-to-date!
[nltk_data] Downloading package words to C:\Users\afabi/nltk_data...
[nltk_data]   Package words is already up-to-date!
[nltk_data] Downloading package punkt to C:\Users\afabi/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\afabi/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to C:\Users\afabi/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

# Data Preprocessing

The data preprocessing consist of different stages:

In [22]:
class Document:
    """ Retrieve the narratives from the DataFrame and respectively
        store and pre-process it. 
        
        :param df: DataFrame including the reports and the predictor variable. 
        
        
        :ivar data: Stores the DataFrame.
        :ivar text: Stores the narratives as string.
        :ivar corpus: Stores the pre-processed text.
    """
    
    
    def __init__(self, df):
        self.data = df
        self.text = df["text"].astype(str)
        self.textPreProcessing()
        
        
    def remove_non_ascii(self, words):
        """Remove non-ASCII characters from list of tokenized words
        
        :param words:  List of words to be transformed when removing non_ascii characters.
        
        :return new_words: List of words after the transformation of removed non_ascii characters.
        
        """
        new_words = []
        for word in words:
            new_word = unicodedata.normalize('NFKD', word).encode('ascii', 'ignore').decode('utf-8', 'ignore')
            new_words.append(new_word)
        return new_words


    def remove_punctuation(self, words):
        """Remove punctuation from list of tokenized words
        
        :param words:  List of words that will get remove their punctuations, if any. 
        
        :return new_words: List of transformed words.
        
        
        """
        new_words = []
        for word in words:
            new_word = re.sub(r'[^\w\s]', '', word)
            if new_word != '':
                new_words.append(new_word)
        return new_words


    def stem_words(self, words):
        """Stem words in list of tokenized words
        
        :param words:  List of words to be processed. 
        
        :return new_words: List of the received words respective stems.
        
        
        """
        
        stemmer = LancasterStemmer()
        stems = []
        for word in words:
            stem = stemmer.stem(word)
            stems.append(stem)
        return stems
    
    def lemmatize_verbs(self, words):
        """Lemmatize verbs in list of tokenized words
        
        :param words:  List of words to be processed. 
        
        :return new_words: List of the received words respective lemmas.
        
        """
        
        lemmatizer = WordNetLemmatizer()
        lemmas = []
        for word in words:
            lemma = lemmatizer.lemmatize(word, pos='v')
            lemmas.append(lemma)
        return lemmas
    
    
    
    def remove_stopwords(self, words):
        """Remove common words that have no meaning or importance in the sentence.

        :param words:  List of words to be processed and get stop words removed.. 

        :return new_words: List of words with the stop words already removed."""
            
        
        stop_words = set(stopwords.words('spanish')) 
        stop_words1 = set(stopwords.words('english')) 


        
        for word in stop_words:
            if word in words:
                words.remove(word)
                
        for word in stop_words1:
            if word in words:
                words.remove(word)
                
        return words


    
    def normalize(self, words):
        words = self.remove_non_ascii(words)
        words = self.remove_stopwords(words)
        words = self.remove_punctuation(words)
        words = self.lemmatize_verbs(words)
        return words
    
    
    def textPreProcessing(self):
        """Pre-process the text, normalize and clean it.
        The function stores the cleaned text in the self.data
        attribute. """
        p.set_options(p.OPT.URL,p.OPT.MENTION,p.OPT.HASHTAG,p.OPT.RESERVED,p.OPT.SMILEY,p.OPT.NUMBER)


        clean_text = []

        for narrative in self.text:
            sentence = p.clean(narrative)
            #sentence = re.sub('RT @[\w_]+:', '', sentence)

            #sentence = re.sub(r'https?:\/\/.*[\r\n]*', '', sentence)
            sentence = word_tokenize(sentence)
            sentence = self.normalize(sentence)
                
                
            clean_text.append(sentence)
            
            
                    
        print(len(self.text), len(clean_text))
        self.data["clean_text"] = clean_text

In [5]:
def concatTweets(table):
    tweets = ''
    c = 0
    for row in df.tweets:
        if(c == 0):
            c+= 1
            tweets = pd.DataFrame(json.loads(row))
        else:
            tweets = pd.concat([tweets, pd.DataFrame(json.loads(row))], axis=0, ignore_index=True)
    
    
    return tweets 

# Extracting tweets from SQL database

In [23]:
server = 'sqldatamining.database.windows.net'
database = 'SNA'
username = 'UserAdmin'
password = 'Machomen123'   
driver= '{ODBC Driver 13 for SQL Server}'

cnxn = pyodbc.connect('DRIVER={ODBC Driver 13 for SQL Server};SERVER=sqldatamining.database.windows.net,1433', user='UserAdmin' , password='Machomen123', database='SNA')

In [8]:
cursor = cnxn.cursor()

In [9]:
rows = cursor.execute("SELECT * from dbo.Users1 ").fetchmany(40)

In [14]:
df = pd.DataFrame((tuple(t) for t in rows), columns=['id', 
 'author',
 'created_at',
 'location',
 'description',
 'verified',
 'followers',
 'following',
 'favourites_count',
 'statuses_count',
 'lang',
 'tweets',
 'following_json',
 'followers_json']) 


In [16]:
df.head(40)

Unnamed: 0,id,author,created_at,location,description,verified,followers,following,favourites_count,statuses_count,lang,tweets,following_json,followers_json
0,2284857094,themoonisironic,2014-01-16 01:10:33,‘97 // they/them // ????????,"where there is hope, there are trials // art: ...",False,101,516,121166,36377,,"[{""id"": 1356030533780824069, ""created_at"": ""20...","[{""id"": 1387079251476815875, ""author"": ""rikuno...","[{""id"": 1384297303129825284, ""author"": ""bts123..."
1,834324620,bbelita23,2012-09-19 23:21:48,"spooky town, PR",just put it out into the universe,False,670,443,33428,92917,,"[{""id"": 1356026926134140928, ""created_at"": ""20...","[{""id"": 2513536418, ""author"": ""Kvn_Astacio"", ""...","[{""id"": 1289669319526420482, ""author"": ""keviin..."
2,175521394,Stormfocus18,2010-08-06 21:03:08,Puerto Rico_Island,"Loving #MewGulf and #KristSingto, amante de la...",False,199,187,6497,38945,,[],"[{""id"": 1387677517822054400, ""author"": ""tinest...","[{""id"": 1387677517822054400, ""author"": ""tinest..."
3,1685491040,CoraimaINegron,2013-08-20 10:48:59,Puerto Rico,"“El karma te lo devolverá todo, excepto a la m...",False,64,203,1336,1565,,"[{""id"": 1283388114661244934, ""created_at"": ""20...","[{""id"": 299932350, ""author"": ""DMcIntyreWWE"", ""...","[{""id"": 713749428935462916, ""author"": ""DavidRo..."
4,1388202983964520449,Personn34091581,2021-04-30 18:46:05,Costa brava,,False,0,2,9,7,,[],"[{""id"": 560803492, ""author"": ""UrbanLePharaon"",...",[]
5,1685491040,CoraimaINegron,2013-08-20 10:48:59,Puerto Rico,"“El karma te lo devolverá todo, excepto a la m...",False,64,203,1336,1565,,"[{""id"": 1283388114661244934, ""created_at"": ""20...","[{""id"": 299932350, ""author"": ""DMcIntyreWWE"", ""...","[{""id"": 713749428935462916, ""author"": ""DavidRo..."
6,1388202983964520449,Personn34091581,2021-04-30 18:46:05,Costa brava,,False,0,2,9,7,,[],"[{""id"": 560803492, ""author"": ""UrbanLePharaon"",...",[]
7,908687401123553280,rsantanafonseca,2017-09-15 13:42:13,"COPU, UPRRP","19 (+3) | ?????? | Pop culture enthusiast, soc...",False,891,620,121676,124488,,[],"[{""id"": 755882683, ""author"": ""_vidalysrms"", ""c...","[{""id"": 1344047685716815872, ""author"": ""LeQuee..."
8,1346688657365889024,SJCiudadCapital,2021-01-06 05:23:04,,Cuenta Oficial de la Ciudad Capital de Puerto ...,False,2915,183,291,1157,,"[{""id"": 1355954464436473856, ""created_at"": ""20...","[{""id"": 1309256724830969857, ""author"": ""estefa...","[{""id"": 1271152649304518657, ""author"": ""JCruz_..."
9,521836439,Angelicv_G,2012-03-12 01:12:16,Puerto Rico,UT • yo no sé que poner aquí??,False,962,519,115665,130883,,"[{""id"": 1356041673227313154, ""created_at"": ""20...","[{""id"": 1192074658994180096, ""author"": ""Nsnili...","[{""id"": 1383464766283280389, ""author"": ""geegee..."


In [17]:
index = 0
for tweets in df.tweets:
    print(len(tweets))
    if(len(tweets) == 2):
        df.drop([index], inplace=True, axis=0)
    
    index+=1
    
df.reset_index(inplace=True)

index = 0
temp_user = ""
for user in df.author:
    if(user != temp_user):
        print("a")
        temp_user = user 
    else:
        df.drop([index], inplace=True, axis=0)
    
    
    index+=1

435940
181519
2
34422
2
34422
2
2
68260
331733
2
2
826569
2
476
617388
60177
2
725156
763868
731587
494057
2
2
12645
498511
398221
2
542318
325
325
325
702299
685193
2
2
2
140497
261657
685293
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a


In [18]:
df.head(10)

Unnamed: 0,index,id,author,created_at,location,description,verified,followers,following,favourites_count,statuses_count,lang,tweets,following_json,followers_json
0,0,2284857094,themoonisironic,2014-01-16 01:10:33,‘97 // they/them // ????????,"where there is hope, there are trials // art: ...",False,101,516,121166,36377,,"[{""id"": 1356030533780824069, ""created_at"": ""20...","[{""id"": 1387079251476815875, ""author"": ""rikuno...","[{""id"": 1384297303129825284, ""author"": ""bts123..."
1,1,834324620,bbelita23,2012-09-19 23:21:48,"spooky town, PR",just put it out into the universe,False,670,443,33428,92917,,"[{""id"": 1356026926134140928, ""created_at"": ""20...","[{""id"": 2513536418, ""author"": ""Kvn_Astacio"", ""...","[{""id"": 1289669319526420482, ""author"": ""keviin..."
2,3,1685491040,CoraimaINegron,2013-08-20 10:48:59,Puerto Rico,"“El karma te lo devolverá todo, excepto a la m...",False,64,203,1336,1565,,"[{""id"": 1283388114661244934, ""created_at"": ""20...","[{""id"": 299932350, ""author"": ""DMcIntyreWWE"", ""...","[{""id"": 713749428935462916, ""author"": ""DavidRo..."
4,8,1346688657365889024,SJCiudadCapital,2021-01-06 05:23:04,,Cuenta Oficial de la Ciudad Capital de Puerto ...,False,2915,183,291,1157,,"[{""id"": 1355954464436473856, ""created_at"": ""20...","[{""id"": 1309256724830969857, ""author"": ""estefa...","[{""id"": 1271152649304518657, ""author"": ""JCruz_..."
5,9,521836439,Angelicv_G,2012-03-12 01:12:16,Puerto Rico,UT • yo no sé que poner aquí??,False,962,519,115665,130883,,"[{""id"": 1356041673227313154, ""created_at"": ""20...","[{""id"": 1192074658994180096, ""author"": ""Nsnili...","[{""id"": 1383464766283280389, ""author"": ""geegee..."
6,12,1539635634,__lfc7,2013-06-22 22:47:49,,UPRRP | ????| Metas claras ??,False,1194,758,9114,47354,,"[{""id"": 1355634630569975814, ""created_at"": ""20...","[{""id"": 56610247, ""author"": ""contentoso"", ""cre...","[{""id"": 1286771388594954240, ""author"": ""AlyMon..."
7,14,1228334049221128193,Enrique94494039,2020-02-14 15:04:19,,,False,1,57,229,177,,"[{""id"": 1329582757916315648, ""created_at"": ""20...","[{""id"": 1083699592699543552, ""author"": ""SpaceP...","[{""id"": 795646211982323716, ""author"": ""derlisd..."
8,15,1646405720,karywasabi13,2013-08-04 23:50:38,"Río Grande, Puerto Rico",801 | ig:karytza13 | Patria Nueva,False,5216,512,34401,52523,,"[{""id"": 1356032271418085376, ""created_at"": ""20...","[{""id"": 1367836316440858625, ""author"": ""Franju...","[{""id"": 1367836316440858625, ""author"": ""Franju..."
9,16,1274303456325464064,ArekCell,2020-06-20 11:30:13,"Kota Madiun, Jawa Timur",Pejuang rupiah,False,74,930,242,262,,"[{""id"": 1348920054830616582, ""created_at"": ""20...","[{""id"": 777839694419668994, ""author"": ""youngme...","[{""id"": 1361315160586035202, ""author"": ""akukow..."
10,18,1083733167335456768,_sarielys_,2019-01-11 14:31:36,Puerto Rico,Si el corazón se aburre de querer para qué sirve,False,95,102,16430,10190,,"[{""id"": 1355685064361848836, ""created_at"": ""20...","[{""id"": 1326406342266019843, ""author"": ""EmmaGa...","[{""id"": 1287603523, ""author"": ""quoteoneself"", ..."


In [19]:
x = concatTweets(df)




### Word2vec

In [356]:
model = Word2Vec(documento.data["clean_text"], min_count=0, workers=20, window=2,  alpha=0.02, hs=1)



In [503]:
my_dict = dict({})
for idx, key in enumerate(model.wv.key_to_index):
    my_dict[key] = model.wv[key]

In [504]:
vectores = []
for a in documento.data.clean_text:
    t  = []
    for word in a:
        try:
            t.append(my_dict[word])
        except:
            continue
    
    vectores.append(t)
        
documento.data["vectorized_text"] = vectores
documento.data.tail()

Unnamed: 0,text,clean_text,vectorized_text
113,": El ajusta la magnitud a , en el límite de pr...","[El, ajusta, magnitud, limite, producir, tsunami]","[[-0.017272485, 0.008652826, 0.007515971, 0.00..."
114,: “ Van a seguir ocurriendo eventos fuertes. H...,"[Van, seguir, ocurriendo, eventos, fuertes, Ha...","[[-0.0027897793, -0.0016228552, -0.007687815, ..."
115,: Casa colapsada en Yauco.,"[Casa, colapsada, Yauco]","[[0.008535036, 0.013252284, 0.00035941493, 0.0..."
116,: “Las réplicas podrían ser de magnitud mayor ...,"[Las, replicas, podrian, ser, magnitud, mayor,...","[[0.0029013832, -0.0027108788, -0.0015446608, ..."
117,": PRELIMINAR -01-06 :58:03 No hay Aviso, Adver...","[PRELIMINAR, 06, 5803, No, Aviso, Advertencia,...","[[-0.0127012255, 0.0040803654, 0.009027934, -0..."


In [505]:
df.tail()

Unnamed: 0,id,author,created_at,location,description,verified,followers,following,favourites_count,statuses_count,lang,tweets,following_json,followers_json
5,1685491040,CoraimaINegron,2013-08-20 10:48:59,Puerto Rico,"“El karma te lo devolverá todo, excepto a la m...",False,64,203,1336,1565,,"[{""id"": 1283388114661244934, ""created_at"": ""20...","[{""id"": 299932350, ""author"": ""DMcIntyreWWE"", ""...","[{""id"": 713749428935462916, ""author"": ""DavidRo..."
6,1388202983964520449,Personn34091581,2021-04-30 18:46:05,Costa brava,,False,0,2,9,7,,[],"[{""id"": 560803492, ""author"": ""UrbanLePharaon"",...",[]
7,908687401123553280,rsantanafonseca,2017-09-15 13:42:13,"COPU, UPRRP","19 (+3) | ?????? | Pop culture enthusiast, soc...",False,891,620,121676,124488,,[],"[{""id"": 755882683, ""author"": ""_vidalysrms"", ""c...","[{""id"": 1344047685716815872, ""author"": ""LeQuee..."
8,1346688657365889024,SJCiudadCapital,2021-01-06 05:23:04,,Cuenta Oficial de la Ciudad Capital de Puerto ...,False,2915,183,291,1157,,"[{""id"": 1355954464436473856, ""created_at"": ""20...","[{""id"": 1309256724830969857, ""author"": ""estefa...","[{""id"": 1271152649304518657, ""author"": ""JCruz_..."
9,521836439,Angelicv_G,2012-03-12 01:12:16,Puerto Rico,UT • yo no sé que poner aquí??,False,962,519,115665,130883,,"[{""id"": 1356041673227313154, ""created_at"": ""20...","[{""id"": 1192074658994180096, ""author"": ""Nsnili...","[{""id"": 1383464766283280389, ""author"": ""geegee..."


# Topic Modeling 

In [158]:
def asignTopics(users, tweetss):
    topics = userSubSets(tweetss)
    topic0 = []
    topic1 = []
    topic2 = []
    topic3 = []
    topic4 = []
    
    for topic in topics:
        topic0.append(topic[0])
        topic1.append(topic[1])
        topic2.append(topic[2])
        topic3.append(topic[3])
        topic4.append(topic[4])
        
        
    users["Topic0"] = np.asarray(topic0)
    users["Topic1"] = np.asarray(topic1)
    users["Topic2"] = np.asarray(topic2)
    users["Topic3"] = np.asarray(topic3)
    users["Topic4"] = np.asarray(topic4)
    
    
    return users






def userSubSets(tweets_df):
    user = ''
    topics = []
    for instance in tweets_df.user:
        if(user != instance):
            print(instance)
            user = instance
            actual_user =  tweets_df['user']== user
            user_subset = tweets_df[actual_user]
            topics.append(topicModeling(user_subset))
        else:
            continue
            
            
    return topics
            

            
            
def topicModeling(user_subset):
    tweets = Document(pd.DataFrame(user_subset))
    id2word = corpora.Dictionary(tweets.data.clean_text)

    # Create Corpus
    texts = tweets.data.clean_text

    # Term Document Frequency
    corpus = [id2word.doc2bow(text) for text in texts]
    
    lda_model = gensim.models.ldamodel.LdaModel(corpus=corpus,
                                           id2word=id2word,
                                           num_topics=5, 
                                           random_state=100,
                                           update_every=1,
                                           chunksize=100,
                                           passes=10,
                                           alpha='auto',
                                           per_word_topics=True)
    
    a = lda_model.get_topics()
    vector = np.vectorize(np.float)
    x = vector(a)
    
    print(lda_model.print_topics())
    
    
    return a

    
    


    
    
            
        
        
    

  and should_run_async(code)


In [159]:
users = asignTopics(df, x)



  and should_run_async(code)


themoonisironic
1538 1538


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  vector = np.vectorize(np.float)


[(0, '0.035*"u" + 0.023*"like" + 0.022*"stream" + 0.020*"srr" + 0.017*"s" + 0.016*"i" + 0.013*"love" + 0.013*"today" + 0.012*"play" + 0.010*"hear"'), (1, '0.025*"like" + 0.021*"fkn" + 0.020*"make" + 0.019*"i" + 0.016*"soraru" + 0.014*"be" + 0.014*"even" + 0.014*"look" + 0.012*"sound" + 0.011*"s"'), (2, '0.021*"S" + 0.017*"IS" + 0.017*"HE" + 0.017*"I" + 0.015*"good" + 0.015*"A" + 0.014*"THE" + 0.013*"WHAT" + 0.013*"IT" + 0.013*"T"'), (3, '0.020*"the" + 0.018*"to" + 0.016*"one" + 0.015*"say" + 0.015*"know" + 0.012*"sing" + 0.012*"like" + 0.011*"i" + 0.010*"show" + 0.009*"im"'), (4, '0.029*"i" + 0.021*"get" + 0.021*"want" + 0.016*"man" + 0.015*"go" + 0.014*"sakata" + 0.013*"watch" + 0.012*"chainsaw" + 0.010*"shxt" + 0.010*"every"')]
bbelita23
719 719


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  vector = np.vectorize(np.float)


[(0, '0.024*"No" + 0.019*"get" + 0.018*"si" + 0.018*"mas" + 0.017*"ano" + 0.013*"menos" + 0.012*"cbn" + 0.012*"mi" + 0.012*"flow" + 0.012*"objetivo"'), (1, '0.033*"que" + 0.024*"claim" + 0.019*"you" + 0.015*"por" + 0.014*"amo" + 0.013*"lo" + 0.013*"talk" + 0.012*"Asi" + 0.012*"pudo" + 0.010*"nadie"'), (2, '0.032*"love" + 0.016*"fuck" + 0.012*"bad" + 0.011*"sientas" + 0.011*"go" + 0.011*"away" + 0.010*"Year" + 0.010*"New" + 0.010*"Eve" + 0.010*"bunny"'), (3, '0.021*"recibir" + 0.018*"solo" + 0.017*"like" + 0.015*"Yo" + 0.012*"ser" + 0.012*"necesita" + 0.011*"Dios" + 0.011*"q" + 0.011*"l" + 0.011*"dejastes"'), (4, '0.047*"I" + 0.022*"los" + 0.022*"mood" + 0.013*"Me" + 0.012*"mio" + 0.012*"better" + 0.011*"lmao" + 0.011*"idgaf" + 0.011*"Frank" + 0.011*"Ocean"')]
CoraimaINegron
118 118


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  vector = np.vectorize(np.float)


[(0, '0.035*"mayor" + 0.031*"Bayamon" + 0.020*"ser" + 0.017*"Temblor" + 0.016*"edad" + 0.016*"hombre" + 0.016*"Rescataron" + 0.016*"Baja" + 0.016*"moderado" + 0.016*"Cabo"'), (1, '0.018*"Mas" + 0.018*"base" + 0.018*"estadounidenses" + 0.018*"Jose" + 0.018*"Molinelli" + 0.012*"La" + 0.010*"isla" + 0.010*"un" + 0.010*"hoy" + 0.010*"magnitud"'), (2, '0.020*"tan" + 0.018*"triste" + 0.018*"Esto" + 0.011*"de" + 0.009*"la" + 0.007*"magnitud" + 0.007*"vez" + 0.005*"No" + 0.005*"temblores" + 0.005*"las"'), (3, '0.033*"tsunami" + 0.031*"No" + 0.029*"Aviso" + 0.025*"PRELIMINAR" + 0.025*"Puerto" + 0.025*"Rico" + 0.022*"Islas" + 0.022*"Advertencia" + 0.022*"Vigilancia" + 0.020*"Virgenes"'), (4, '0.030*"de" + 0.021*"luego" + 0.019*"causados" + 0.019*"refugiados" + 0.019*"terremotos" + 0.019*"Mas" + 0.019*"danos" + 0.015*"los" + 0.010*"Es" + 0.008*"NO"')]
SJCiudadCapital
216 216


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  vector = np.vectorize(np.float)


[(0, '0.061*"alcalde" + 0.059*"de" + 0.043*"Juan" + 0.042*"San" + 0.020*"electo" + 0.020*"nuevo" + 0.016*"El" + 0.015*"capital" + 0.014*"Hoy" + 0.013*"ciudad"'), (1, '0.054*"de" + 0.021*"San" + 0.016*"la" + 0.015*"Covid19" + 0.013*"el" + 0.011*"Sebastian" + 0.010*"Recuperando" + 0.010*"Fiestas" + 0.010*"Calle" + 0.009*"manana"'), (2, '0.045*"Hoy" + 0.039*"en" + 0.037*"mucha" + 0.036*"comenzamos" + 0.019*"hace" + 0.019*"gente" + 0.018*"Tengo" + 0.018*"buena" + 0.018*"Dios" + 0.018*"etapa"'), (3, '0.042*"alcalde" + 0.040*"electo" + 0.036*"de" + 0.033*"actos" + 0.033*"juramentacion" + 0.029*"En" + 0.023*"participar" + 0.022*"encuentra" + 0.022*"teatro" + 0.022*"Tapia"'), (4, '0.039*"de" + 0.016*"publico" + 0.015*"limpieza" + 0.014*"ciudad" + 0.014*"Es" + 0.014*"un" + 0.014*"amurallada" + 0.014*"probado" + 0.014*"trabajador" + 0.014*"servidor"')]
Angelicv_G
1182 1182


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  vector = np.vectorize(np.float)


[(0, '0.017*"El" + 0.009*"love" + 0.009*"get" + 0.007*"la" + 0.006*"estan" + 0.006*"van" + 0.005*"amp" + 0.005*"stop" + 0.005*"THE" + 0.005*"be"'), (1, '0.019*"I" + 0.010*"This" + 0.009*"de" + 0.008*"A" + 0.008*"say" + 0.007*"solo" + 0.007*"q" + 0.006*"Hoy" + 0.006*"tambien" + 0.006*"it"'), (2, '0.018*"La" + 0.015*"la" + 0.010*"el" + 0.008*"Puerto" + 0.007*"Me" + 0.007*"No" + 0.007*"dia" + 0.007*"Rico" + 0.007*"see" + 0.006*"bien"'), (3, '0.011*"the" + 0.010*"gt" + 0.009*"I" + 0.009*"pa" + 0.008*"one" + 0.008*"mas" + 0.008*"to" + 0.007*"Yo" + 0.007*"make" + 0.006*"Los"'), (4, '0.030*"de" + 0.025*"que" + 0.015*"si" + 0.010*"en" + 0.009*"Que" + 0.007*"gente" + 0.007*"hace" + 0.007*"tu" + 0.007*"ano" + 0.007*"Si"')]
__lfc7
3133 3133


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  vector = np.vectorize(np.float)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  vector = np.vectorize(np.float)


[(0, '0.035*"que" + 0.026*"de" + 0.021*"No" + 0.014*"si" + 0.014*"Que" + 0.012*"en" + 0.009*"tener" + 0.009*"Si" + 0.008*"gente" + 0.008*"casa"'), (1, '0.013*"PR" + 0.009*"Puerto" + 0.008*"The" + 0.008*"JAJAJAJA" + 0.008*"Rico" + 0.007*"You" + 0.007*"ME" + 0.007*"SI" + 0.007*"Ser" + 0.007*"mundo"'), (2, '0.021*"Yo" + 0.014*"mas" + 0.014*"Me" + 0.013*"pq" + 0.011*"clase" + 0.010*"pa" + 0.008*"La" + 0.008*"ser" + 0.008*"puedo" + 0.007*"tan"'), (3, '0.015*"NO" + 0.015*"A" + 0.012*"Y" + 0.010*"dia" + 0.010*"QUE" + 0.009*"I" + 0.008*"alguien" + 0.008*"cc" + 0.007*"first" + 0.007*"Ya"'), (4, '0.015*"El" + 0.014*"de" + 0.011*"voy" + 0.010*"En" + 0.010*"la" + 0.009*"bien" + 0.008*"el" + 0.008*"vida" + 0.007*"examen" + 0.007*"hoy"')]
Enrique94494039
2 2
[(0, '0.167*"Proto" + 0.167*"amp" + 0.167*"Tec" + 0.167*"Sorteo" + 0.167*"Millon" + 0.167*"Hw"'), (1, '0.167*"Proto" + 0.167*"Tec" + 0.167*"Sorteo" + 0.167*"Millon" + 0.167*"amp" + 0.167*"Hw"'), (2, '0.167*"Hw" + 0.167*"Millon" + 0.167*"Sorteo" 

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  vector = np.vectorize(np.float)


[(0, '0.044*"que" + 0.036*"No" + 0.016*"La" + 0.015*"El" + 0.015*"jiji" + 0.013*"es" + 0.012*"igual" + 0.012*"da" + 0.009*"pensando" + 0.008*"like"'), (1, '0.035*"quiero" + 0.022*"bien" + 0.019*"Yo" + 0.018*"cuki" + 0.018*"pa" + 0.017*"Quiero" + 0.014*"ser" + 0.012*"Si" + 0.011*"pelo" + 0.009*"cuarentena"'), (2, '0.033*"si" + 0.031*"Me" + 0.013*"bella" + 0.012*"Que" + 0.012*"Y" + 0.010*"hacer" + 0.010*"cabron" + 0.009*"sabes" + 0.009*"mundo" + 0.008*"nena"'), (3, '0.021*"de" + 0.020*"gt" + 0.017*"tan" + 0.016*"gracias" + 0.012*"Tu" + 0.012*"Te" + 0.011*"amo" + 0.011*"jaja" + 0.010*"diablo" + 0.009*"JAJAJAJAJAJA"'), (4, '0.085*"lt" + 0.073*"3" + 0.044*"mas" + 0.016*"baby" + 0.011*"Diablo" + 0.011*"bro" + 0.009*"alguien" + 0.009*"dia" + 0.009*"voy" + 0.009*"Como"')]
ArekCell
187 187


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  vector = np.vectorize(np.float)


[(0, '0.026*"amp" + 0.024*"Like" + 0.021*"us" + 0.016*"CRO" + 0.015*"GIVEAWAY" + 0.015*"RT" + 0.015*"Exchange" + 0.013*"No" + 0.013*"pick" + 0.013*"Follow"'), (1, '0.019*"Live" + 0.019*"Video" + 0.016*"talk" + 0.015*"CEO" + 0.014*"AMA" + 0.013*"PM" + 0.013*"Let" + 0.013*"amp" + 0.012*"YouTube" + 0.012*"in"'), (2, '0.024*"The" + 0.021*"to" + 0.017*"card" + 0.017*"Up" + 0.016*"0" + 0.015*"back" + 0.015*"gift" + 0.015*"include" + 0.014*"Available" + 0.012*"Exchange"'), (3, '0.035*"amp" + 0.025*"App" + 0.023*"fee" + 0.022*"Buy" + 0.019*"list" + 0.019*"cost" + 0.019*"true" + 0.019*"sell" + 0.018*"detail" + 0.014*"Check"'), (4, '0.027*"Syndicate" + 0.024*"Stake" + 0.019*"The" + 0.018*"BTC" + 0.016*"leave" + 0.016*"on" + 0.015*"Sign" + 0.011*"subscribe" + 0.010*"0" + 0.010*"Special"')]
_sarielys_
2567 2567


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  vector = np.vectorize(np.float)


[(0, '0.021*"que" + 0.017*"gt" + 0.017*"de" + 0.011*"mas" + 0.011*"s" + 0.010*"to" + 0.009*"people" + 0.008*"Puerto" + 0.008*"Rico" + 0.007*"This"'), (1, '0.009*"ever" + 0.009*"know" + 0.008*"and" + 0.008*"one" + 0.008*"need" + 0.007*"El" + 0.007*"la" + 0.006*"Y" + 0.006*"No" + 0.006*"en"'), (2, '0.016*"A" + 0.012*"say" + 0.012*"si" + 0.011*"The" + 0.010*"u" + 0.010*"amp" + 0.009*"Si" + 0.007*"time" + 0.007*"baby" + 0.006*"back"'), (3, '0.008*"NO" + 0.006*"quiero" + 0.006*"por" + 0.006*"miss" + 0.006*"HER" + 0.005*"nt" + 0.005*"ME" + 0.005*"in" + 0.005*"guy" + 0.005*"take"'), (4, '0.055*"I" + 0.019*"like" + 0.013*"make" + 0.013*"go" + 0.012*"get" + 0.011*"be" + 0.010*"you" + 0.010*"want" + 0.008*"the" + 0.007*"love"')]
icarusio
2764 2764


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  vector = np.vectorize(np.float)


[(0, '0.018*"pa" + 0.011*"bien" + 0.011*"Jajaja" + 0.010*"Lo" + 0.009*"mismo" + 0.009*"va" + 0.008*"Hoy" + 0.008*"hoy" + 0.008*"kbrn" + 0.007*"Yo"'), (1, '0.057*"que" + 0.018*"mas" + 0.014*"Es" + 0.012*"Me" + 0.010*"en" + 0.009*"Mira" + 0.009*"el" + 0.008*"Ahora" + 0.008*"ver" + 0.008*"Esta"'), (2, '0.062*"de" + 0.019*"la" + 0.019*"Que" + 0.017*"La" + 0.012*"Los" + 0.012*"Si" + 0.012*"Pero" + 0.011*"Ya" + 0.011*"Paquito" + 0.011*"De"'), (3, '0.031*"si" + 0.023*"Si" + 0.013*"Y" + 0.010*"solo" + 0.010*"estan" + 0.009*"Cuando" + 0.008*"gente" + 0.008*"le" + 0.007*"A" + 0.007*"500"'), (4, '0.028*"El" + 0.026*"No" + 0.019*"los" + 0.019*"de" + 0.012*"En" + 0.010*"el" + 0.009*"dice" + 0.009*"ser" + 0.007*"NO" + 0.007*"puede"')]
KeylimarBirriel
2740 2740


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  vector = np.vectorize(np.float)


[(0, '0.018*"pa" + 0.015*"dias" + 0.014*"gente" + 0.012*"La" + 0.011*"QUE" + 0.011*"tan" + 0.010*"quiere" + 0.009*"alguien" + 0.008*"Y" + 0.008*"LA"'), (1, '0.024*"cuarentena" + 0.015*"bien" + 0.013*"casa" + 0.013*"la" + 0.012*"Me" + 0.011*"Mi" + 0.010*"ser" + 0.009*"q" + 0.009*"voy" + 0.008*"a"'), (2, '0.024*"Yo" + 0.023*"si" + 0.017*"El" + 0.015*"nadie" + 0.013*"mejor" + 0.011*"Cuando" + 0.011*"ver" + 0.008*"NO" + 0.008*"de" + 0.008*"veces"'), (3, '0.033*"quiero" + 0.019*"dia" + 0.018*"playa" + 0.012*"Que" + 0.012*"amo" + 0.011*"tener" + 0.010*"Te" + 0.010*"Si" + 0.010*"el" + 0.010*"Ya"'), (4, '0.039*"que" + 0.029*"asi" + 0.023*"mas" + 0.018*"No" + 0.017*"hacer" + 0.016*"I" + 0.011*"A" + 0.008*"sabes" + 0.008*"siempre" + 0.007*"conmigo"')]
valeilu4
1837 1837


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  vector = np.vectorize(np.float)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  vector = np.vectorize(np.float)


[(0, '0.014*"Yo" + 0.012*"ano" + 0.012*"de" + 0.012*"vez" + 0.011*"en" + 0.010*"ver" + 0.009*"trabajo" + 0.009*"El" + 0.008*"Puerto" + 0.008*"ir"'), (1, '0.021*"de" + 0.021*"que" + 0.012*"si" + 0.010*"la" + 0.010*"mas" + 0.009*"pa" + 0.009*"da" + 0.008*"No" + 0.008*"Si" + 0.007*"gente"'), (2, '0.036*"I" + 0.014*"like" + 0.012*"el" + 0.011*"ser" + 0.010*"siempre" + 0.009*"A" + 0.007*"get" + 0.006*"something" + 0.006*"do" + 0.006*"WHY"'), (3, '0.015*"hoy" + 0.008*"cuenta" + 0.007*"personas" + 0.007*"los" + 0.007*"se" + 0.007*"fuck" + 0.007*"Por" + 0.006*"Disney" + 0.006*"you" + 0.006*"hahahahahaha"'), (4, '0.012*"alguien" + 0.010*"the" + 0.010*"The" + 0.010*"dias" + 0.008*"s" + 0.008*"un" + 0.008*"QUE" + 0.007*"NO" + 0.007*"quiero" + 0.007*"u"')]
teacup_talk
38 38
[(0, '0.033*"know" + 0.033*"like" + 0.033*"ur" + 0.033*"A" + 0.033*"You" + 0.018*"s" + 0.018*"World" + 0.018*"make" + 0.018*"world" + 0.018*"C"'), (1, '0.045*"choose" + 0.025*"course" + 0.025*"How" + 0.025*"Choose" + 0.025*"ly"

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  vector = np.vectorize(np.float)


[(0, '0.018*"No" + 0.018*"que" + 0.012*"cosas" + 0.011*"Si" + 0.011*"pa" + 0.010*"quiero" + 0.010*"Te" + 0.010*"muchas" + 0.009*"puneta" + 0.009*"nadie"'), (1, '0.034*"I" + 0.018*"like" + 0.013*"de" + 0.011*"SE" + 0.011*"Me" + 0.011*"love" + 0.010*"people" + 0.009*"the" + 0.009*"This" + 0.008*"real"'), (2, '0.032*"si" + 0.012*"Cuando" + 0.012*"ver" + 0.011*"puedo" + 0.010*"ser" + 0.009*"amigas" + 0.008*"gente" + 0.008*"bien" + 0.008*"I" + 0.007*"siempre"'), (3, '0.041*"Yo" + 0.015*"La" + 0.013*"El" + 0.013*"I" + 0.008*"nadie" + 0.008*"tuitear" + 0.008*"siguen" + 0.007*"carajo" + 0.007*"retuitear" + 0.007*"gatxs"'), (4, '0.018*"mas" + 0.011*"cabron" + 0.010*"nunca" + 0.009*"la" + 0.008*"Bad" + 0.008*"cancion" + 0.007*"palo" + 0.007*"Bunny" + 0.007*"alguien" + 0.007*"Al"')]
MColonCruz
1312 1312


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  vector = np.vectorize(np.float)


[(0, '0.019*"Puerto" + 0.019*"the" + 0.017*"Rico" + 0.015*"do" + 0.014*"nt" + 0.012*"s" + 0.011*"say" + 0.008*"take" + 0.007*"press" + 0.006*"right"'), (1, '0.018*"They" + 0.013*"I" + 0.012*"This" + 0.010*"become" + 0.009*"mas" + 0.009*"dias" + 0.008*"British" + 0.008*"away" + 0.008*"release" + 0.008*"voters"'), (2, '0.016*"personas" + 0.015*"de" + 0.015*"ano" + 0.015*"la" + 0.014*"locales" + 0.011*"que" + 0.007*"h" + 0.007*"bienes" + 0.007*"efecto" + 0.007*"economia"'), (3, '0.021*"de" + 0.012*"A" + 0.011*"En" + 0.010*"coronavirus" + 0.009*"No" + 0.008*"que" + 0.008*"hace" + 0.007*"Un" + 0.007*"a" + 0.007*"pm"'), (4, '0.039*"de" + 0.020*"El" + 0.018*"vez" + 0.017*"la" + 0.013*"estan" + 0.012*"violencia" + 0.012*"mujer" + 0.012*"comunidad" + 0.011*"mayor" + 0.011*"silencio"')]
PaganShay
2090 2090


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  vector = np.vectorize(np.float)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  vector = np.vectorize(np.float)


[(0, '0.028*"pa" + 0.019*"Me" + 0.016*"quiero" + 0.015*"Que" + 0.014*"asi" + 0.012*"trabajo" + 0.010*"mas" + 0.009*"hacer" + 0.009*"ir" + 0.009*"dias"'), (1, '0.054*"que" + 0.035*"de" + 0.016*"No" + 0.013*"hoy" + 0.012*"q" + 0.011*"bien" + 0.011*"Si" + 0.010*"dia" + 0.009*"pa" + 0.008*"Mi"'), (2, '0.033*"si" + 0.030*"Yo" + 0.019*"la" + 0.015*"ahora" + 0.011*"mas" + 0.011*"ver" + 0.010*"tan" + 0.009*"Bad" + 0.008*"Bunny" + 0.007*"quiero"'), (3, '0.025*"gente" + 0.015*"A" + 0.014*"El" + 0.013*"voy" + 0.012*"dormir" + 0.011*"casa" + 0.010*"tener" + 0.010*"Y" + 0.009*"salir" + 0.008*"jajajaja"'), (4, '0.017*"mejor" + 0.014*"vida" + 0.011*"La" + 0.010*"mas" + 0.009*"siempre" + 0.008*"ustedes" + 0.008*"veo" + 0.008*"En" + 0.007*"temblor" + 0.007*"es"')]
Badillo83
1 1
[(0, '0.125*"chance" + 0.125*"This" + 0.125*"RT" + 0.125*"win" + 0.125*"way" + 0.125*"inspire" + 0.125*"controllers" + 0.125*"Follow"'), (1, '0.125*"inspire" + 0.125*"controllers" + 0.125*"way" + 0.125*"Follow" + 0.125*"win" + 0

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  vector = np.vectorize(np.float)


[(0, '0.054*"que" + 0.023*"nadie" + 0.017*"alguien" + 0.017*"tener" + 0.015*"gente" + 0.014*"voy" + 0.013*"siempre" + 0.012*"la" + 0.011*"personas" + 0.010*"tan"'), (1, '0.020*"persona" + 0.016*"Si" + 0.016*"amp" + 0.014*"gt" + 0.013*"mejor" + 0.013*"A" + 0.012*"Que" + 0.010*"rico" + 0.010*"NO" + 0.010*"QUE"'), (2, '0.062*"si" + 0.021*"No" + 0.017*"ganas" + 0.009*"feliz" + 0.009*"na" + 0.009*"contigo" + 0.008*"vez" + 0.007*"vas" + 0.007*"van" + 0.007*"saber"'), (3, '0.027*"bien" + 0.018*"q" + 0.018*"vida" + 0.014*"te" + 0.014*"I" + 0.013*"cosas" + 0.013*"nunca" + 0.009*"Dios" + 0.008*"cuenta" + 0.008*"hacer"'), (4, '0.026*"quiero" + 0.026*"pa" + 0.026*"mas" + 0.019*"de" + 0.019*"ser" + 0.018*"Yo" + 0.013*"El" + 0.011*"solo" + 0.010*"dia" + 0.010*"gusta"')]
koberayou
2562 2562


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  vector = np.vectorize(np.float)


[(0, '0.024*"get" + 0.014*"like" + 0.011*"A" + 0.008*"look" + 0.008*"want" + 0.007*"point" + 0.007*"amp" + 0.006*"Juice" + 0.006*"MAN" + 0.005*"via"'), (1, '0.037*"I" + 0.018*"Kobe" + 0.012*"say" + 0.011*"s" + 0.011*"see" + 0.010*"The" + 0.007*"When" + 0.007*"day" + 0.006*"shit" + 0.006*"time"'), (2, '0.031*"the" + 0.014*"go" + 0.014*"make" + 0.013*"This" + 0.012*"game" + 0.011*"que" + 0.009*"EXACT" + 0.009*"today" + 0.008*"win" + 0.008*"el"'), (3, '0.019*"Messi" + 0.016*"de" + 0.010*"take" + 0.009*"en" + 0.009*"would" + 0.009*"back" + 0.006*"come" + 0.006*"need" + 0.006*"be" + 0.006*"Que"'), (4, '0.014*"va" + 0.014*"still" + 0.013*"si" + 0.012*"mas" + 0.010*"No" + 0.010*"Lakers" + 0.007*"Puerto" + 0.007*"people" + 0.007*"ver" + 0.006*"let"')]
IZayasI
516 516


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  vector = np.vectorize(np.float)


[(0, '0.029*"I" + 0.024*"God" + 0.012*"se" + 0.012*"Es" + 0.011*"thank" + 0.011*"Lo" + 0.010*"bless" + 0.009*"Just" + 0.009*"finish" + 0.009*"every"'), (1, '0.019*"the" + 0.016*"breath" + 0.012*"El" + 0.012*"Gracias" + 0.011*"Dios" + 0.011*"en" + 0.011*"do" + 0.011*"tiempo" + 0.010*"Pero" + 0.010*"that"'), (2, '0.020*"you" + 0.020*"be" + 0.011*"New" + 0.011*"people" + 0.010*"vez" + 0.010*"time" + 0.009*"You" + 0.009*"alla" + 0.008*"navidad" + 0.008*"Cuando"'), (3, '0.028*"de" + 0.012*"puede" + 0.011*"Eso" + 0.011*"online" + 0.011*"Este" + 0.010*"seguir" + 0.010*"dando" + 0.009*"proyecto" + 0.009*"clave" + 0.009*"conocer"'), (4, '0.017*"dias" + 0.013*"bajo" + 0.012*"en" + 0.012*"Bayamon" + 0.010*"varios" + 0.010*"pr" + 0.010*"llevamos" + 0.010*"hogares" + 0.010*"Saben" + 0.010*"voltaje"')]
nnavm_og
1022 1022


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  vector = np.vectorize(np.float)


[(0, '0.022*"Que" + 0.018*"vez" + 0.018*"si" + 0.018*"quiero" + 0.014*"persona" + 0.012*"problema" + 0.012*"Tal" + 0.012*"platos" + 0.011*"que" + 0.010*"Y"'), (1, '0.029*"ver" + 0.015*"DE" + 0.014*"A" + 0.013*"nena" + 0.012*"mas" + 0.010*"ES" + 0.010*"LOS" + 0.009*"Si" + 0.009*"gt" + 0.009*"hace"'), (2, '0.024*"personas" + 0.022*"que" + 0.022*"ano" + 0.020*"No" + 0.019*"mismo" + 0.017*"fin" + 0.015*"solo" + 0.015*"dia" + 0.013*"La" + 0.012*"al"'), (3, '0.024*"te" + 0.019*"la" + 0.015*"Ya" + 0.014*"Me" + 0.013*"tan" + 0.013*"feliz" + 0.013*"si" + 0.011*"hoy" + 0.011*"Estoy" + 0.011*"puedo"'), (4, '0.036*"voy" + 0.024*"si" + 0.023*"unico" + 0.017*"Yo" + 0.012*"de" + 0.012*"pa" + 0.010*"alguien" + 0.010*"ahora" + 0.009*"pedir" + 0.008*"envio"')]
23jamel
2747 2747
[(0, '0.027*"DE" + 0.024*"Y" + 0.017*"A" + 0.016*"LA" + 0.014*"QUE" + 0.013*"Lo" + 0.012*"EL" + 0.011*"merengue" + 0.008*"solo" + 0.008*"sabe"'), (1, '0.027*"Me" + 0.027*"La" + 0.015*"gente" + 0.013*"Yo" + 0.013*"Los" + 0.011*"ca

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  vector = np.vectorize(np.float)


In [151]:
topics = df[["Topic0", "Topic1", "Topic2", "Topic3", "Topic4"]]
topics.head()



  and should_run_async(code)


Unnamed: 0,Topic0,Topic1,Topic2,Topic3,Topic4
0,"[6.4059626e-05, 6.423177e-05, 6.522911e-05, 6....","[7.131866e-05, 7.13527e-05, 7.226892e-05, 7.27...","[6.306197e-05, 0.016837869, 6.1405466e-05, 6.1...","[0.00010639355, 7.018809e-05, 0.0017958591, 0....","[6.8705805e-05, 6.8814385e-05, 7.199822e-05, 6..."
1,"[0.00022911737, 0.00022911589, 0.00022911589, ...","[0.000234279, 0.00023427073, 0.00023427073, 0....","[0.00019619269, 0.00019618758, 0.00019618758, ...","[0.00049253425, 0.00020517374, 0.00020517374, ...","[0.00019165115, 0.00046023697, 0.00046023697, ..."
2,"[0.0007776633, 0.0007776634, 0.004510385, 0.00...","[0.00044708577, 0.00044708565, 0.00044708583, ...","[0.0008799134, 0.0008799134, 0.0051034573, 0.0...","[0.0005133346, 0.0005133345, 0.0005133347, 0.0...","[0.003119646, 0.005321997, 0.0031196459, 0.000..."
4,"[0.00019731079, 0.00639439, 0.0001994974, 0.04...","[0.009712919, 0.0061110817, 0.009712921, 0.020...","[0.0002943054, 0.0002943031, 0.0002943054, 0.0...","[0.00035719664, 0.00035717327, 0.00035719745, ...","[0.0004296852, 0.00042968814, 0.0014735191, 0...."
5,"[8.2627594e-05, 8.2628765e-05, 8.2628765e-05, ...","[8.410271e-05, 8.4103274e-05, 8.4103274e-05, 8...","[7.250998e-05, 7.2512456e-05, 7.2512456e-05, 7...","[6.990478e-05, 6.990746e-05, 6.990746e-05, 8.2...","[0.0003292375, 0.0001769559, 0.0001769559, 0.0..."


In [26]:
from sklearn.manifold import TSNE
warnings.filterwarnings('ignore')
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import matplotlib.style as style 
%matplotlib inline


ModuleNotFoundError: No module named 'sklearn.utils'