# Features de TextMining:

---------------------------------

In [1]:
import os
import sys
import re
import datetime
import dateutil

sys.path.insert(0,os.path.dirname(os.getcwd()))
sys.path.insert(0,os.path.join(os.getcwd(),'grobid'))
sys.path.insert(0,os.getcwd())

import numpy as np
import pandas as pd

from grobid import grobid_client
import grobid_tei_xml
from grobid_to_dataframe import grobid_cli, xmltei_to_dataframe

import plotly

from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

from pyvis.network import Network
import nltk

import random

import plotly.graph_objects as go

import networkx as nx

!pip install markupsafe==2.0.1

In [2]:
%load_ext autoreload
%autoreload 2
%reload_ext autoreload

---------------------------------

### Comandos Docker

docker run -t --rm --init -p 8080:8070 -p 8081:8071 --memory="9g" lfoppiano/grobid:0.7.0

docker run -t --rm --init -p 8080:8070 -p 8081:8071 lfoppiano/grobid:0.6.2

### Definindo variáveis e caminhos

In [3]:
path = os.path.dirname(os.getcwd())
path_input = os.path.join(path,'artifacts','articles','ml_material','teste')

---------------------------------

### Funções para execução em batch

In [4]:
def get_path(path_input_path):
    """"""
    if os.path.exists(path_input_path):
        return path_input_path
    
    return os.getcwd()


def batch_process_path(path_input_path, n_workers=2,
                       check_cache=True,
                       cache_folder_name='summarticles_cache',
                       config_path="./grobid/config.json"):
    
    """"""
    
    gcli = grobid_cli(config_path=config_path)
    result_batch = gcli.process_pdfs(input_path=path_input_path,
                                     check_cache=check_cache,
                                     cache_folder_name=cache_folder_name,
                                     n_workers=n_workers,
                                     service="processFulltextDocument",
                                     generateIDs=True,
                                     include_raw_citations=True,
                                     include_raw_affiliations=True,
                                     consolidate_header=False,
                                     consolidate_citations=False,
                                     tei_coordinates=False,
                                     segment_sentences=True,
                                     verbose=True)
    return result_batch


def get_dataframes(result_batch):
    
    """"""
    
    xml_to_df = xmltei_to_dataframe()
    dict_dfs, dic_errors = xml_to_df.get_dataframe_articles(result_batch)
    
    return dict_dfs, dic_errors


def files_path(path):
    list_dir = os.listdir(path)
    files = []
    for file in list_dir:
        if os.path.isfile(os.path.join(path,file)):
            files.append(os.path.join(path,file))
    return files

In [5]:
def run_batch_process(path_input, n_workers=6, check_cache=True, 
                      cache_folder_name='summarticles_cache', 
                      config_path="./grobid/config.json"):

    dict_exec = {'path':path_input}
    dict_exec['start_datetime'] = datetime.datetime.now()
    
    # path_input = os.path.join(path,'artifacts','test_article')
    config_path = os.path.join(os.getcwd(),'grobid','config.json')
    dict_exec['grobid_config'] = config_path
    
    gcli = grobid_client.GrobidClient(config_path=config_path, check_server=False)
    
    dict_exec['files'] = gcli.get_input_files(path_input)
    dict_exec['num_files'] = len(dict_exec['files'])
    dict_exec['n_workers'] = n_workers
    
    path_input_path = get_path(path_input)
    result_batch = batch_process_path(path_input_path, n_workers=dict_exec['n_workers'], check_cache=check_cache)
    dict_dfs, dic_errors = get_dataframes(result_batch)
    
    gcli.save_xmltei_files(result_batch, input_folder_path, cache_folder_name=cache_folder_name)
    
    dict_exec['end_datetime'] = datetime.datetime.now()
    dict_exec['time_exec_sec'] = (dict_exec['end_datetime']-dict_exec['start_datetime']).seconds
    dict_exec['time_exec_min'] = (dict_exec['end_datetime']-dict_exec['start_datetime']).seconds
    
    return dict_dfs, dict_exec, dic_errors

In [6]:
input_folder_path = r"""C:\Users\vierb\OneDrive\Área de Trabalho\Projetos\PGC\artifacts\articles\ml_material"""

In [7]:
%%time
dict_dfs, dict_exec, dic_errors = run_batch_process(path_input=input_folder_path, 
                                                    n_workers=10, 
                                                    check_cache=True, 
                                                    cache_folder_name='summarticles_cache', 
                                                    config_path="./grobid/config.json")

GROBID server is up and running
587 files to process in current batch
[Input Files] 587
[Cache Files] 587
In the end, we have: 0  new files to process!
And we have : 587  files to back from cache!
Processed articles: 581
Number articles with errors: 6
Wall time: 23.1 s


---------------------------------

### Trabalhando no tratamento do texto

In [8]:
import nltk
#import spacy
#import corenlp
#import textblob
#import gensim
#import transformers

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('omw-1.4')

--------------------------------

In [9]:
from text import text_prep, text_mining, text_viz

In [10]:
tprep = text_prep()

In [11]:
dict_dfs['df_doc_info']['acknowledgement_prep'] = tprep.text_preparation_column(dict_dfs['df_doc_info']['acknowledgement'])
dict_dfs['df_doc_info']['abstract_prep'] = tprep.text_preparation_column(dict_dfs['df_doc_info']['abstract'])
dict_dfs['df_doc_info']['body_prep'] = tprep.text_preparation_column(dict_dfs['df_doc_info']['body'])

--------------------------------

Criando BOW e TFIDF:

In [12]:
tmining = text_mining()

In [13]:
documents_abs = dict_dfs['df_doc_info']['abstract_prep'].fillna(' ').tolist()
documents_body = dict_dfs['df_doc_info']['body_prep'].fillna(' ').tolist()

In [14]:
df_tfidf_abstract_abs = tmining.get_df_tfidf(documents_abs)
df_tfidf_abstract_body = tmining.get_df_tfidf(documents_body)

In [15]:
df_bow_abstract_abs = tmining.get_df_bow(documents_abs)
df_bow_abstract_body = tmining.get_df_bow(documents_body)

In [16]:
df_tfidf_abstract_abs.head()

Unnamed: 0,aa,aa aluminum,aare,ab,ab initio,abaqus,abilities,ability,ability feasible,ability gfa,...,zirconia,zn,zn alloy,zn coat,zncl,zone,zone fz,zr,zr hf,zro
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.072353,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.075554,0.075554,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.208047,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [17]:
df_bow_abstract_body.head()

Unnamed: 0,aa,aa aa,aa ab,aa alloy,aa aluminium,aa aluminum,aa bb,aa sample,aa solution,aa vector,...,zunger,zunger pseudopotential,zuo,zuo et,zwick,zwickroell,zx,zy,zz,zz xx
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [18]:
df_bow_abstract_body.shape

(581, 176907)

--------------------------------

In [19]:
dict_dfs['df_doc_info']['abstract'][0]

'Modeling stress-strain curves in mechanics of material needs deriving suitable constitutive equations and solving those equations using some Euler-like scheme in th more general setting of a return mapping algorithm. However, such an approach which is based on establishing a framework that is thermodynamically consistent and physically motivated may require a substantial theoretical and experimental work. In order to bypass this traditional scheme, we consider an alternative data-driven approach motivated by tools developed in the larger scope of machine learning. In particular, we show how feedforward neural networks have the ability to accurately model stress-strain relationships for an isotropic and rate independent elastoplastic solid. Taking into account the experimental cost of producing many measurements and the computational cost associated with the training of neural networks, we elaborate a full end-to-end methodology to model stress-strain curves with neural networks in a l

nltk.download("all")

In [27]:
nltk.download("maxent_treebank_pos_tagger")

[nltk_data] Downloading package maxent_treebank_pos_tagger to
[nltk_data]     C:\Users\vierb\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping taggers\maxent_treebank_pos_tagger.zip.


True

In [53]:
tokens[0:10]

['Modeling',
 'stress-strain',
 'curves',
 'in',
 'mechanics',
 'of',
 'material',
 'needs',
 'deriving',
 'suitable']

In [28]:
nltk.pos_tag(tokens)

[('Modeling', 'VBG'),
 ('stress-strain', 'JJ'),
 ('curves', 'NNS'),
 ('in', 'IN'),
 ('mechanics', 'NNS'),
 ('of', 'IN'),
 ('material', 'NN'),
 ('needs', 'NNS'),
 ('deriving', 'VBG'),
 ('suitable', 'JJ'),
 ('constitutive', 'JJ'),
 ('equations', 'NNS'),
 ('and', 'CC'),
 ('solving', 'VBG'),
 ('those', 'DT'),
 ('equations', 'NNS'),
 ('using', 'VBG'),
 ('some', 'DT'),
 ('Euler-like', 'JJ'),
 ('scheme', 'NN'),
 ('in', 'IN'),
 ('th', 'NN'),
 ('more', 'RBR'),
 ('general', 'JJ'),
 ('setting', 'NN'),
 ('of', 'IN'),
 ('a', 'DT'),
 ('return', 'NN'),
 ('mapping', 'NN'),
 ('algorithm', 'NN'),
 ('.', '.'),
 ('However', 'RB'),
 (',', ','),
 ('such', 'PDT'),
 ('an', 'DT'),
 ('approach', 'NN'),
 ('which', 'WDT'),
 ('is', 'VBZ'),
 ('based', 'VBN'),
 ('on', 'IN'),
 ('establishing', 'VBG'),
 ('a', 'DT'),
 ('framework', 'NN'),
 ('that', 'WDT'),
 ('is', 'VBZ'),
 ('thermodynamically', 'RB'),
 ('consistent', 'JJ'),
 ('and', 'CC'),
 ('physically', 'RB'),
 ('motivated', 'VBD'),
 ('may', 'MD'),
 ('require', 'VB')

In [29]:
from annotated_text import annotated_text

In [54]:
annotated_text?

[1;31mSignature:[0m [0mannotated_text[0m[1;33m([0m[1;33m*[0m[0margs[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m
[1;31mDocstring:[0m
Writes text with annotations into your Streamlit app.

Parameters
----------
*args : str, tuple or htbuilder.HtmlElement
    Arguments can be:
    - strings, to draw the string as-is on the screen.
    - tuples of the form (main_text, annotation_text, background, color) where
      background and foreground colors are optional and should be an CSS-valid string such as
      "#aabbcc" or "rgb(10, 20, 30)"
    - HtmlElement objects in case you want to customize the annotations further. In particular,
      you can import the `annotation()` function from this module to easily produce annotations
      whose CSS you can customize via keyword arguments.

Examples
--------

>>> annotated_text(
...     "This ",
...     ("is", "verb", "#8ef"),
...     " some ",
...     ("annotated", "adj", "#faa"),
...     ("text", "noun", "#afa"),
...     " for those of 

In [30]:
annotated_text(
    "This ",
    ("is", "verb"),
    " some ",
    ("annotated", "adj"),
    ("text", "noun"),
    " for those of ",
    ("you", "pronoun"),
    " who ",
    ("like", "verb"),
    " this sort of ",
    ("thing", "noun"),
    "."
)

2022-07-21 00:08:28.240 
  command:

    streamlit run c:\Users\vierb\anaconda3\lib\site-packages\ipykernel_launcher.py [ARGUMENTS]


In [32]:
!streamlit run c:\Users\vierb\anaconda3\lib\site-packages\ipykernel_launcher.py

^C


In [33]:
dict_dfs.keys()

dict_keys(['df_doc_info', 'df_doc_head', 'df_doc_authors', 'df_doc_citations', 'df_doc_authors_citations'])

In [50]:
dict_dfs['df_doc_info'].head()

Unnamed: 0_level_0,grobid_version,grobid_timestamp,pdf_md5,language_code,acknowledgement,abstract,body,annex,file,status,raw_data,acknowledgement_prep,abstract_prep,body_prep
article_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,0.7.0,2022-05-15 02:09:00,1117E90601706A168EDA1D37671DEC5F,en,,Modeling stress-strain curves in mechanics of ...,Introduction \n Background and motivation In s...,,C:\Users\vierb\OneDrive\Área de Trabalho\Proje...,status 200,"<?xml version=""1.0"" encoding=""UTF-8""?>\n<TEI x...",,modeling stressstrain curve mechanics material...,introduction background motivation in solid me...
2,0.7.0,2022-05-15 02:10:00,4A7C74D2B2C59D8DDF4B895EBDB87272,en,Acknowledgements,This is a PDF file of an article that has unde...,Introduction Many observable physical phenomen...,,C:\Users\vierb\OneDrive\Área de Trabalho\Proje...,status 200,"<?xml version=""1.0"" encoding=""UTF-8""?>\n<TEI x...",acknowledgements,this pdf file article undergo enhancements acc...,introduction many observable physical phenomen...
3,0.7.0,2022-05-15 02:11:00,50869EC05C100CDB0D34583B9D3B77B9,en,Acknowledgements This work is supported by the...,Applying deep learning methods in materials sc...,Background & Summary Designing novel molecules...,,C:\Users\vierb\OneDrive\Área de Trabalho\Proje...,status 200,"<?xml version=""1.0"" encoding=""UTF-8""?>\n<TEI x...",acknowledgements this work support robotic dis...,applying deep learn methods materials science ...,background summary designing novel molecules s...
4,0.7.0,2022-05-15 02:10:00,3D7A73B4B358B6F999AB7F4C83C7457E,en,Acknowledgements The authors acknowledge Dr G....,This study involves optimizing hardness and to...,Introduction Zirconia toughened alumina (ZTA) ...,,C:\Users\vierb\OneDrive\Área de Trabalho\Proje...,status 200,"<?xml version=""1.0"" encoding=""UTF-8""?>\n<TEI x...",acknowledgements the author acknowledge dr gvn...,this study involve optimize hardness toughness...,introduction zirconia toughen alumina zta prim...
5,0.7.0,2022-05-15 02:08:00,6C7700B89A64B3A1DAC9473D99847991,en,Acknowledgements The author thanks the DFG for...,Modern simulation techniques have reached a le...,Introduction Advances in science and technolog...,,C:\Users\vierb\OneDrive\Área de Trabalho\Proje...,status 200,"<?xml version=""1.0"" encoding=""UTF-8""?>\n<TEI x...",acknowledgements the author thank dfg heisenbe...,modern simulation techniques reach level matur...,introduction advances science technology rely ...


In [49]:
list_articles_select

['Computational Materials Science - j.commatsci.2020.109629.pdf',
 'Journal Pre-proof Mesoscale informed parameter estimation through machine learning: A case-study in fracture modeling - j.jcp.2020.109719.pdf',
 'QM-sym, a symmetrized quantum chemistry database of 135 kilo molecules - s41597-019-0237-9.pdf',
 'Optimizing mechanical properties of spark plasma sintered ZTA using neural network and genetic algorithm - j.msea.2011.09.008.pdf',
 'High-Dimensional Neural Network Potentials for Complex Systems - anie.201703114.pdf',
 'A general and transferable deep learning framework for predicting phase formation in materials - s41524-020-00488-z.pdf',
 'Machine learning and symbolic regression investigation on stability of MXene materials - j.commatsci.2021.110578.pdf',
 'Computational Materials Science - j.commatsci.2019.109203.pdf',
 'Atomistic modeling of meso-timescale processes with SEAKMC: A perspective and recent developments - j.commatsci.2021.110390.pdf',
 'Computational Material

In [34]:
dict_dfs['df_doc_info']

Unnamed: 0_level_0,grobid_version,grobid_timestamp,pdf_md5,language_code,acknowledgement,abstract,body,annex,file,status,raw_data,acknowledgement_prep,abstract_prep,body_prep
article_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,0.7.0,2022-05-15 02:09:00,1117E90601706A168EDA1D37671DEC5F,en,,Modeling stress-strain curves in mechanics of ...,Introduction \n Background and motivation In s...,,C:\Users\vierb\OneDrive\Área de Trabalho\Proje...,status 200,"<?xml version=""1.0"" encoding=""UTF-8""?>\n<TEI x...",,modeling stressstrain curve mechanics material...,introduction background motivation in solid me...
2,0.7.0,2022-05-15 02:10:00,4A7C74D2B2C59D8DDF4B895EBDB87272,en,Acknowledgements,This is a PDF file of an article that has unde...,Introduction Many observable physical phenomen...,,C:\Users\vierb\OneDrive\Área de Trabalho\Proje...,status 200,"<?xml version=""1.0"" encoding=""UTF-8""?>\n<TEI x...",acknowledgements,this pdf file article undergo enhancements acc...,introduction many observable physical phenomen...
3,0.7.0,2022-05-15 02:11:00,50869EC05C100CDB0D34583B9D3B77B9,en,Acknowledgements This work is supported by the...,Applying deep learning methods in materials sc...,Background & Summary Designing novel molecules...,,C:\Users\vierb\OneDrive\Área de Trabalho\Proje...,status 200,"<?xml version=""1.0"" encoding=""UTF-8""?>\n<TEI x...",acknowledgements this work support robotic dis...,applying deep learn methods materials science ...,background summary designing novel molecules s...
4,0.7.0,2022-05-15 02:10:00,3D7A73B4B358B6F999AB7F4C83C7457E,en,Acknowledgements The authors acknowledge Dr G....,This study involves optimizing hardness and to...,Introduction Zirconia toughened alumina (ZTA) ...,,C:\Users\vierb\OneDrive\Área de Trabalho\Proje...,status 200,"<?xml version=""1.0"" encoding=""UTF-8""?>\n<TEI x...",acknowledgements the author acknowledge dr gvn...,this study involve optimize hardness toughness...,introduction zirconia toughen alumina zta prim...
5,0.7.0,2022-05-15 02:08:00,6C7700B89A64B3A1DAC9473D99847991,en,Acknowledgements The author thanks the DFG for...,Modern simulation techniques have reached a le...,Introduction Advances in science and technolog...,,C:\Users\vierb\OneDrive\Área de Trabalho\Proje...,status 200,"<?xml version=""1.0"" encoding=""UTF-8""?>\n<TEI x...",acknowledgements the author thank dfg heisenbe...,modern simulation techniques reach level matur...,introduction advances science technology rely ...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
583,0.7.0,2022-05-15 02:11:00,4A1BC3FA7E28DA5CD52A24872B64D6E1,en,Acknowledgements,This is a PDF file of an article that has unde...,Introduction The formation of solids from a li...,,C:\Users\vierb\OneDrive\Área de Trabalho\Proje...,status 200,"<?xml version=""1.0"" encoding=""UTF-8""?>\n<TEI x...",acknowledgements,this pdf file article undergo enhancements acc...,introduction the formation solids liquid solut...
584,0.7.0,2022-05-15 02:10:00,3A74E4C2811C25DAE739ED589F582522,en,Acknowledgements,Computational modeling is playing an increasin...,Introduction Numerous computational models are...,,C:\Users\vierb\OneDrive\Área de Trabalho\Proje...,status 200,"<?xml version=""1.0"" encoding=""UTF-8""?>\n<TEI x...",acknowledgements,computational model play increasingly importan...,introduction numerous computational model cont...
585,0.7.0,2022-05-15 02:10:00,2798C257BBAC6B8D61CDC72EC4F39FB1,en,Acknowledgments S.L. acknowledges support from...,A large database is desired for machine learni...,"Introduction In the past years, there has been...",,C:\Users\vierb\OneDrive\Área de Trabalho\Proje...,status 200,"<?xml version=""1.0"" encoding=""UTF-8""?>\n<TEI x...",acknowledgments sl acknowledge support ministr...,large database desire machine learn ml technol...,introduction in past years increase research i...
586,0.7.0,2022-05-15 02:11:00,F19AA8813E5420718C11AB00FBABDFC8,en,,"In the present paper, two models based on arti...",Introduction Strength assessment of concrete i...,,C:\Users\vierb\OneDrive\Área de Trabalho\Proje...,status 200,"<?xml version=""1.0"" encoding=""UTF-8""?>\n<TEI x...",,in present paper two model base artificial neu...,introduction strength assessment concrete main...


In [5]:
import spacy

nlp = spacy.load("en_core_web_sm")

In [6]:
doc = nlp("Apple is looking at buying U.K. startup for $1 billion")
[(ent.text,ent.label_) for ent in doc.ents]

fonte: https://spacy.io/usage/spacy-101#annotations-ner

!pip install spacy

In [4]:
!python -m spacy download en_core_web_sm

✔ Download and installation successful
You can now load the package via spacy.load('en_core_web_sm')


2022-07-24 15:12:44.201874: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'cudart64_110.dll'; dlerror: cudart64_110.dll not found
2022-07-24 15:12:44.202529: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [1]:
!conda install geopandas

Collecting package metadata (current_repodata.json): ...working... done


Building graph of deps:   0%|          | 0/310 [00:00<?, ?it/s]
Examining toml:   0%|          | 0/310 [00:00<?, ?it/s]        


Solving environment: ...working... failed with initial frozen solve. Retrying with flexible solve.
Solving environment: ...working... failed with repodata from current_repodata.json, will retry with next repodata source.
Collecting package metadata (repodata.json): ...working... done
Solving environment: ...working... failed with initial frozen solve. Retrying with flexible solve.
Solving environment: ...working... 
Found conflicts! Looking for incompatible packages.
This can take several minutes.  Press CTRL-C to abort.
failed



Examining console_shortcut:   0%|          | 1/310 [00:00<00:17, 17.24it/s]
Examining pandas:   1%|          | 2/310 [00:00<00:14, 20.93it/s]          
Examining libxslt:   1%|          | 3/310 [00:03<06:31,  1.28s/it]
Examining libxslt:   1%|▏         | 4/310 [00:03<04:52,  1.05it/s]
Examining scikit-learn:   1%|▏         | 4/310 [00:03<04:52,  1.05it/s]
Examining markupsafe:   2%|▏         | 5/310 [00:07<04:51,  1.05it/s]  
Examining markupsafe:   2%|▏         | 6/310 [00:07<06:14,  1.23s/it]
Examining backcall:   2%|▏         | 6/310 [00:07<06:14,  1.23s/it]  
Examining libsodium:   2%|▏         | 7/310 [00:07<06:13,  1.23s/it]
Examining attrs:   3%|▎         | 8/310 [00:07<06:12,  1.23s/it]    
Examining pep8:   3%|▎         | 9/310 [00:07<06:11,  1.23s/it] 
Examining pep8:   3%|▎         | 10/310 [00:07<04:24,  1.13it/s]
Examining wrapt:   3%|▎         | 10/310 [00:07<04:24,  1.13it/s]
Examining babel:   4%|▎         | 11/310 [00:07<04:23,  1.13it/s]
Examining entrypoints:   4%|▍

!pip install spacy-streamlit

https://github.com/explosion/spacy-streamlit