In [14]:
with open('brusov.txt', "r") as file:
    text=file.read()
import spacy
nlp = spacy.load("ru_core_news_sm")
doc = nlp(text)
import pandas as pd

cols = ("text", "lemma", "POS", "explain", "tense")
rows = []

for t in doc:
    row = [t.text, t.lemma_, t.pos_, spacy.explain(t.pos_), t.morph]
    rows.append(row)

df = pd.DataFrame(rows, columns=cols)
    
df_str = df.to_string(index=False)
with open('brusov_spacy.txt', 'w', encoding='utf-8') as f:
    f.write(df_str)

In [13]:
import pymorphy3
from pymorphy3.tokenizers import simple_word_tokenize
morph = pymorphy3.MorphAnalyzer()
tokens = simple_word_tokenize(text)
cols = ("word", "normal_form", "tag")
rows = []
for i in tokens:
    parsed=morph.parse(i)
    if parsed:
        p = parsed[0]
        row=[i, p.normal_form, p.tag]
        rows.append(row)
df = pd.DataFrame(rows, columns=cols)
print(df)

df_str = df.to_string(index=False)
with open('brusov_pymorphy.txt', 'w', encoding='utf-8') as f:
    f.write(df_str)

        word normal_form                                 tag
0          В           в                                PREP
1          .           .                                PNCT
2          Я           я                 NPRO,1per sing,nomn
3          .           .                                PNCT
4     БРЮСОВ      брюсов  NOUN,anim,masc,Sgtm,Surn sing,nomn
..       ...         ...                                 ...
152     плат       плата            NOUN,inan,femn plur,gent
153        !           !                                PNCT
154       12          12                           NUMB,intg
155  декабря     декабрь            NOUN,inan,masc sing,gent
156     1914        1914                           NUMB,intg

[157 rows x 3 columns]


In [11]:
import json
import pandas as pd
from pymystem3 import Mystem

m = Mystem()
analysis_result = m.analyze(text)

data = []

for item in analysis_result:
    if 'analysis' in item:
        for analysis in item['analysis']:
            data.append({
                'text': item['text'],
                'lex': analysis['lex'],
                'weight': analysis['wt'],
                'grammar': analysis['gr']
            })
    else:
        data.append({
            'text': item['text'],
            'lex': None,
            'weight': None,
            'grammar': None
        })

df = pd.DataFrame(data)

print(df)
df_str = df.to_string(index=False)
with open('brusov_pymystem.txt', 'w', encoding='utf-8') as f:
    f.write(df_str)

        text      lex    weight  \
0          В        в  0.000008   
1         .      None       NaN   
2          Я        я  0.000028   
3         .      None       NaN   
4     БРЮСОВ   брюсов  0.999879   
..       ...      ...       ...   
254              None       NaN   
255  декабря  декабрь  1.000000   
256              None       NaN   
257     1914     None       NaN   
258       \n     None       NaN   

                                               grammar  
0    S,сокр=(пр,мн|пр,ед|вин,мн|вин,ед|дат,мн|дат,е...  
1                                                 None  
2    S,сокр=(пр,мн|пр,ед|вин,мн|вин,ед|дат,мн|дат,е...  
3                                                 None  
4                                   S,фам,муж,од=им,ед  
..                                                 ...  
254                                               None  
255                                  S,муж,неод=род,ед  
256                                               None  
257      