In [1]:
!pip install nltk
!pip install gensim
!pip install pyLDAvis
!pip install networkx
!pip install googletrans

Collecting pyLDAvis
[?25l  Downloading https://files.pythonhosted.org/packages/a5/3a/af82e070a8a96e13217c8f362f9a73e82d61ac8fff3a2561946a97f96266/pyLDAvis-2.1.2.tar.gz (1.6MB)
[K     |████████████████████████████████| 1.6MB 652kB/s eta 0:00:01
Collecting funcy (from pyLDAvis)
  Downloading https://files.pythonhosted.org/packages/b3/23/d1f90f4e2af5f9d4921ab3797e33cf0503e3f130dd390a812f3bf59ce9ea/funcy-1.12-py2.py3-none-any.whl
Building wheels for collected packages: pyLDAvis
  Building wheel for pyLDAvis (setup.py) ... [?25ldone
[?25h  Stored in directory: /home/franklin/.cache/pip/wheels/98/71/24/513a99e58bb6b8465bae4d2d5e9dba8f0bef8179e3051ac414
Successfully built pyLDAvis
Installing collected packages: funcy, pyLDAvis
Successfully installed funcy-1.12 pyLDAvis-2.1.2
Collecting googletrans
  Downloading https://files.pythonhosted.org/packages/fd/f0/a22d41d3846d1f46a4f20086141e0428ccc9c6d644aacbfd30990cf46886/googletrans-2.4.0.tar.gz
Building wheels for collected packages: googletr

In [4]:
import nltk

nltk.download('wordnet')
nltk.download('stopwords')
nltk.download('averaged_perceptron_tagger')
nltk.download('punkt')
#nltk.download('universal_tagset')
#nltk.download('treebank')
#nltk.download('maxent_ne_chunker')
#nltk.download('words')

[nltk_data] Downloading package wordnet to /home/franklin/nltk_data...
[nltk_data]   Unzipping corpora/wordnet.zip.
[nltk_data] Downloading package stopwords to
[nltk_data]     /home/franklin/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /home/franklin/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.
[nltk_data] Downloading package punkt to /home/franklin/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [5]:
text = '''
We propose the N-Learning paradigm, which allows the sharing (teaching and learning)
of behaviors in multi-robot systems autonomously and at run time. The proposed paradigm 
is based on behavioral robotics and uses cooperative learning. In the formal model, 
robot behaviors are represented in the form of a graph, where complex behaviors can be 
broken down into simple behaviors that, in turn, can be performed simultaneously. 
N-Learning allows to change the scope domain of the robot without the need for reprogramming. 
That is, a robot that does not have compatible behaviors for a given domain can change and 
learn from the other robots acting in that domain. This feature is useful when there are a 
large number of robots and several, different missions (in different domains) to be fulfilled. 
N-Learning can also be used with emerging behaviors that need to be shared with the team. 
To validate the paradigm, a reference implementation was developed based on the Python 
language and the Robot Operating System, using the Stage simulator and real robots. 
Results show that individuals in a group of robots can learn through interaction in 
the multirobot system. The team comes from a state of less knowledge of robots, 
individually (ie, robots possessing ability to execute a few behaviors) to a 
state of more knowledge (robots accomplishing more behaviors, learned online). 
With this approach, behaviors that are specific to certain environments, 
already existing, do not need to be preprogrammed in the robots, which 
can learn them with the other robots of the team. The experiments demonstrate 
the versatility of N-Learning, validating our approach.
'''

In [6]:
import string

from googletrans import Translator

from nltk import word_tokenize
from nltk.corpus import wordnet as wn
from nltk.stem.wordnet import WordNetLemmatizer
from nltk.corpus import stopwords
from nltk import pos_tag

def preprocess(text):
  result=[]
  pos = {"J": wn.ADJ, "N": wn.NOUN, "V": wn.VERB, "R": wn.ADV}
  
  if Translator().detect(text).lang == 'pt':
    text = Translator().translate(text_portuguese,src='pt').text
  
  # We run through the tokenized words joint with their respective pos tag
  for token in pos_tag(word_tokenize(text)):
    
    # For each token, we need to check if it isn't a stopword or a punctuation character
    if token[0].lower() not in stopwords.words('english') and token[0] not in string.punctuation:
        
      tag = pos.get(token[1][0],wn.NOUN)
      
      # We'll work only with nouns and adjectives
      if tag == wn.NOUN or tag == wn.ADJ:
        
        # We lemmatize the token based on it pos tag
        lemma = WordNetLemmatizer().lemmatize(token[0], tag)
        if len(lemma) > 1:
          # Finally, we add the lemmatized token into the list to be returned
          result.append(lemma.lower())
      
  return result

tokens = preprocess(text)

In [18]:
from gensim.corpora import Dictionary
from gensim.models import LdaModel

def main_topic_terms(tokens,nwords=5):
  # First we make a list with the list of tokens
  texts = [tokens]
  
  # We use the class Dictionary to map normalized words to their ids 
  texts_dictionary = Dictionary(texts)
  
  # Convert the document into the bag-of-words format
  corpus = [texts_dictionary.doc2bow(text) for text in texts]
  
  # And now, we build the LDA model
  lda_model = LdaModel(corpus, num_topics=10, id2word=texts_dictionary, passes=10, alpha='auto')
  
  # Finally, with the topics on our hands, we take the terms of the main topic and return them
  return [texts_dictionary[id2word[0]] for id2word in lda_model.get_topic_terms(lda_model[corpus[0]][0][0],topn=nwords)]
  
topics = main_topic_terms(tokens)
topics

['robot', 'behavior', 'n-learning', 'domain', 'team']

In [0]:
import pandas as pd

df = pd.read_csv('dataset.csv')
df

In [52]:
from itertools import combinations
import networkx as nx

graph = nx.Graph()

[graph.add_edges_from(combination) for combination in [list(combinations(curr,2)) for curr in [main_topic_terms(preprocess(abstract)) for abstract in df.abstract.to_list()]]]

print("{} nodes, {} edges".format(len(graph), nx.number_of_edges(graph)))
nx.write_graphml(graph, "graph.graphml")

213 nodes, 627 edges


In [0]:
%matplotlib inline
import pyLDAvis
import pyLDAvis.gensim
vis = pyLDAvis.gensim.prepare(topic_model=lda_model, corpus=corpus, dictionary=texts_dictionary)
pyLDAvis.enable_notebook()
pyLDAvis.display(vis)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  return pd.concat([default_term_info] + list(topic_dfs))
