In [117]:
import csv
import codecs
import fuzzywuzzy
import re
from itertools import groupby
from fuzzywuzzy import process, fuzz
from collections import namedtuple
import requests

In [2]:
def read_csv(file):
    with codecs.open(file, encoding='utf-8') as f:
        data = list(csv.reader(f, delimiter=';', quotechar='"'))[1:]
        return data

In [17]:
catalog = read_csv('catalog.txt')
len(catalog)

453621

In [91]:
liter = read_csv('liter.csv')
len(liter)

543

In [97]:
curr_author = None
for l in liter:
    if l[0]:
        curr_author = l[0]
    else:
        l[0] = curr_author

In [105]:
def get_surname(s):
    return re.split(',? ', s)[0]

In [106]:
author_set = set(get_surname(t[0]) for t in liter)
catalog_with_authors = [x for x in catalog if x[0] in author_set]
catalog_by_author_surname = dict((surname, list(entries)) for surname, entries in groupby(catalog_with_authors, lambda x: x[0]))
len(catalog_by_author_surname['Замятин'])

85

In [156]:
def match_catalog_entry(liter_entry):
    surname = get_surname(liter_entry[0])
    book = liter_entry[3]
    cat_by_author = catalog_by_author_surname[surname]
    cat_book_names = [x[3] for x in cat_by_author]
    best_book, score = process.extractOne(book, cat_book_names, scorer=fuzz.token_sort_ratio)
    best_cat_entry = next(x for x in cat_by_author if x[3] == best_book)
    return (score, best_cat_entry, liter_entry)

matched_with_cat = list(map(match_catalog_entry, liter))

In [158]:
for s, c, l in matched_with_cat:
    print(s, l[0], (l[3], c[3]))

100 Лажечников, Иван Иванович ('Ледяной дом', 'Ледяной дом')
100 Лажечников, Иван Иванович ('Последний Новик', 'Последний Новик')
100 Лажечников, Иван Иванович ('Басурман', 'Басурман')
100 Бестужев-Марлинский, Александр Александрович (Александр Марлинский) ('Страшное гаданье', 'Страшное гаданье')
100 Бестужев-Марлинский, Александр Александрович (Александр Марлинский) ('Вечер на бивуаке', 'Вечер на бивуаке')
100 Бестужев-Марлинский, Александр Александрович (Александр Марлинский) ('Наезды', 'Наезды')
100 Чехов, Антон Павлович ('Драма на охоте', 'Драма на охоте')
62 Чехов, Антон Павлович ('Степь', 'Мститель')
100 Чехов, Антон Павлович ('Палата №6', 'Палата № 6')
100 Чехов, Антон Павлович ('В овраге', 'В овраге')
100 Пушкин, Александр Сергеевич ('Станционный смотритель', 'Станционный смотритель')
100 Пушкин, Александр Сергеевич ('Пиковая дама', 'Пиковая Дама')
100 Пушкин, Александр Сергеевич ('Дубровский', 'Дубровский')
100 Пушкин, Александр Сергеевич ('Капитанская дочка', 'Капитанская доч

100 Ефремов, Иван Антонович ('Час быка', 'Час Быка')
38 Булычев ('http://www.2lib.ru/authors/376.html', 'Alice: The Girl From Earth')
100 Булычев ('Сто лет тому вперед', 'Сто лет тому вперед')
100 Булычев ('Похищение чародея', 'Похищение чародея')
100 Булычев ('Перпендикулярный мир', 'Перпендикулярный мир')
100 Булычев ('Вид на битву с высоты', 'Вид на битву с высоты')
100 Вайнер, Аркадий Александрович ('Эра милосердия', 'Эра милосердия')
100 Вайнер, Аркадий Александрович ('Город принял!', 'Город принял')
97 Вайнер, Аркадий Александрович ('Петля и камень в зелёной траве', 'Петля и камень в зеленой траве')
100 Гранин, Даниил Александрович ('Искатели', 'Искатели')
100 Гранин, Даниил Александрович ('Иду на грозу', 'Иду на грозу')
100 Гранин, Даниил Александрович ('Картина', 'Картина')
100 Гранин, Даниил Александрович ('Еще заметен след', 'Еще заметен след')
100 Гранин, Даниил Александрович ('Наш дорогой Роман Авдеевич', 'Наш дорогой Роман Авдеевич')
100 Искандер, Фазиль Абдулович ('Созвез

In [119]:
well_matched = [x for x in matched_with_cat if x[0] > 90]
len(well_matched)

446

In [139]:
ids = [cat[-1] for score, cat, lit in well_matched]
urls = ['http://flibusta.is/b/%s/fb2' % id for id in ids]
urls

['http://flibusta.is/b/388207/fb2',
 'http://flibusta.is/b/425691/fb2',
 'http://flibusta.is/b/168343/fb2',
 'http://flibusta.is/b/225045/fb2',
 'http://flibusta.is/b/329591/fb2',
 'http://flibusta.is/b/235106/fb2',
 'http://flibusta.is/b/10547/fb2',
 'http://flibusta.is/b/295292/fb2',
 'http://flibusta.is/b/169265/fb2',
 'http://flibusta.is/b/348954/fb2',
 'http://flibusta.is/b/77039/fb2',
 'http://flibusta.is/b/253788/fb2',
 'http://flibusta.is/b/395149/fb2',
 'http://flibusta.is/b/102645/fb2',
 'http://flibusta.is/b/96412/fb2',
 'http://flibusta.is/b/137565/fb2',
 'http://flibusta.is/b/74285/fb2',
 'http://flibusta.is/b/173116/fb2',
 'http://flibusta.is/b/395621/fb2',
 'http://flibusta.is/b/74105/fb2',
 'http://flibusta.is/b/69892/fb2',
 'http://flibusta.is/b/150978/fb2',
 'http://flibusta.is/b/97832/fb2',
 'http://flibusta.is/b/78600/fb2',
 'http://flibusta.is/b/401178/fb2',
 'http://flibusta.is/b/461562/fb2',
 'http://flibusta.is/b/388665/fb2',
 'http://flibusta.is/b/72596/fb2',
 

In [145]:
def download_file(url):
    r = requests.get(url, stream=True, proxies = {
      "http": "45.76.95.243:3128"
    })
    local_filename = re.match('.*filename="(.+)".*', r.headers['Content-Disposition'])[1]
    print(local_filename)
    with open('fb2/' + local_filename, 'wb') as f:
        for chunk in r.iter_content(chunk_size=1024*1024):
            if chunk: # filter out keep-alive new chunks
                f.write(chunk)
                #f.flush()
    return local_filename

for url in urls:
    try:
        download_file(url)
    except Exception as e:
        print(e)

'content-disposition'
'content-disposition'
Bulychev_Perpendikulyarnyy-mir.g38QIA.408502.fb2.zip
'content-disposition'
Vayner_Era-miloserdiya.tsDo2g.324990.fb2.zip
Vayner_Gorod-prinyal.3ofHjw.200525.fb2.zip
Vayner_Dilogiya_1_Petlya-i-kamen-v-zelenoy-trave.Qj7Bsw.201842.fb2.zip
Granin_Iskateli.bhevgw.498584.fb2.zip
Granin_Idu-na-grozu.rC7RHg.499588.fb2.zip
Granin_Kartina.pp67Hg.20477.fb2.zip
Granin_Eshche-zameten-sled.qk5PRw.149025.fb2.zip
Granin_Nash-dorogoy-Roman-Avdeevich.mi15NA.490942.fb2.zip
Iskander_Sozvezdie-Kozlotura.LfWWBg.156514.fb2.zip
Iskander_Sandro-iz-Chegema.1nMzkQ.273618.fb2.zip
Iskander_Morskoy-skorpion.UiuNBg.153215.fb2.zip
Iskander_Kroliki-i-udavy.zk4xWA.70498.fb2.zip
Iskander_Trinadcatyy-podvig-Gerakla.1mM4qw.388896.fb2.zip
Kabakov_Nevozvrashchenec.onH8QA.24693.fb2.zip
Kabakov_Posledniy-geroy.NUzPXw.24695.fb2.zip
Kabakov_Sochinitel.t8Swbg.24698.fb2.zip
Kaverin_Dva-kapitana.qh2Zmg.68345.fb2.zip
Kaverin_Otkrytaya-kniga.eUgdOw.68344.fb2.zip
Kaverin_Sem-par-nechistyh.AOp

Uspenskiy_Krasnaya-ruka-chernaya-prostynya-zelenye-palcy.b9fQxw.77731.fb2.zip
'content-disposition'
'content-disposition'
'content-disposition'
Pogorelskiy_Lafertovskaya-makovnica.WmDcMQ.43667.fb2.zip
Pogorelskiy_Lyubimye-knigi-Lva-Tolstogo-Detstvo-do-14-let-_1_Chernaya-kurica-ili-Podzemnye-zhiteli.13aYXg.110929.fb2.zip
Vodolazkin_Aviator.9JpD7Q.449077.fb2.zip
Vodolazkin_Lavr.fga7xw.310583.fb2.zip
Evgeniy_Vodolazkin_Solovev_i_Larionov.pdf
Yuzefovich_Syshchik-Putilin_3_Knyaz-vetra.N0bLlQ.76325.fb2.zip
Yuzefovich_Zhuravli-i-karliki.AnIC0Q.148480.fb2.zip
Shishkin_Vzyatie-Izmaila.rMm-kg.248856.fb2.zip
Shishkin_Venerin-volos.yH-w_A.248855.fb2.zip
Shishkin_Pismovnik.-AQ4xg.248853.fb2.zip
Matveeva_Pereval-Dyatlova.2PJxMA.36282.fb2.zip
Matveeva_Nebesa.MwQjQg.380537.fb2.zip
Fedin_Goroda-i-gody.OuJQdg.176408.fb2.zip
Fedin_Pervye-radosti_1_Pervye-radosti.oeQtrA.151379.fb2.zip
Fedin_Pervye-radosti_2_Neobyknovennoe-leto.gs-FgQ.115721.fb2.zip
Bazhov_Mednoy-gory-hozyayka.S2MSYw.143109.fb2.zip
Bazhov_

In [161]:
import os

books = ['fb2/' + f for f in os.listdir('fb2') if f[-7:] == "fb2.zip"]
len(books)

393

In [162]:
import zipfile
for b in books:
    zip_ref = zipfile.ZipFile(b, 'r')
    zip_ref.extractall('fb2.unzipped')
    zip_ref.close()

In [173]:
import subprocess
for f in os.listdir('fb2.unzipped'):
    if (f[-3:] == "fb2"):
        cmd = ["pandoc", "-f", "html", "-t", "plain", "-o", "txt/" + f + ".txt", "fb2.unzipped/" + f]
        print(cmd)
        subprocess.call(cmd)

['pandoc', '-f', 'html', '-t', 'plain', '-o', 'txt/Abramov_Pryasliny_1_Bratya-i-sestry.edbdyw.158030.fb2.txt', 'fb2.unzipped/Abramov_Pryasliny_1_Bratya-i-sestry.edbdyw.158030.fb2']
['pandoc', '-f', 'html', '-t', 'plain', '-o', 'txt/Abramov_Pryasliny_2_Dve-zimy-i-tri-leta.hU9IrQ.158033.fb2.txt', 'fb2.unzipped/Abramov_Pryasliny_2_Dve-zimy-i-tri-leta.hU9IrQ.158033.fb2']
['pandoc', '-f', 'html', '-t', 'plain', '-o', 'txt/Abramov_Pryasliny_3_Puti-pereputya.ktaB0Q.158036.fb2.txt', 'fb2.unzipped/Abramov_Pryasliny_3_Puti-pereputya.ktaB0Q.158036.fb2']
['pandoc', '-f', 'html', '-t', 'plain', '-o', 'txt/Abramov_Pryasliny_4_Dom.FWiheg.158032.fb2.txt', 'fb2.unzipped/Abramov_Pryasliny_4_Dom.FWiheg.158032.fb2']
['pandoc', '-f', 'html', '-t', 'plain', '-o', 'txt/Aksenov_Zvezdnyy-bilet.HMAp3Q.677.fb2.txt', 'fb2.unzipped/Aksenov_Zvezdnyy-bilet.HMAp3Q.677.fb2']
['pandoc', '-f', 'html', '-t', 'plain', '-o', 'txt/Akunin_Priklyucheniya-Erasta-Fandorina_12_Nefritovye-chetki.-trNMQ.144729.fb2.txt', 'fb2.unzip

['pandoc', '-f', 'html', '-t', 'plain', '-o', 'txt/Bitov_Prepodavatel-simmetrii.myq3hA.383719.fb2.txt', 'fb2.unzipped/Bitov_Prepodavatel-simmetrii.myq3hA.383719.fb2']
['pandoc', '-f', 'html', '-t', 'plain', '-o', 'txt/Bitov_Pushkinskiy-Dom._PR-6A.246296.fb2.txt', 'fb2.unzipped/Bitov_Pushkinskiy-Dom._PR-6A.246296.fb2']
['pandoc', '-f', 'html', '-t', 'plain', '-o', 'txt/Bondarev_Bereg.qE7ZrA.472356.fb2.txt', 'fb2.unzipped/Bondarev_Bereg.qE7ZrA.472356.fb2']
['pandoc', '-f', 'html', '-t', 'plain', '-o', 'txt/Bondarev_Goryachiy-sneg.rA-GCg.276869.fb2.txt', 'fb2.unzipped/Bondarev_Goryachiy-sneg.rA-GCg.276869.fb2']
['pandoc', '-f', 'html', '-t', 'plain', '-o', 'txt/Bondarev_Poslednie-zalpy.bX6XhA.468939.fb2.txt', 'fb2.unzipped/Bondarev_Poslednie-zalpy.bX6XhA.468939.fb2']
['pandoc', '-f', 'html', '-t', 'plain', '-o', 'txt/Bulgakov_Belaya-gvardiya_1_Belaya-gvardiya.Rgu96w.261423.fb2.txt', 'fb2.unzipped/Bulgakov_Belaya-gvardiya_1_Belaya-gvardiya.Rgu96w.261423.fb2']
['pandoc', '-f', 'html', '-t',

['pandoc', '-f', 'html', '-t', 'plain', '-o', 'txt/Dudincev_Ne-hlebom-edinym.CCAP-A.105764.fb2.txt', 'fb2.unzipped/Dudincev_Ne-hlebom-edinym.CCAP-A.105764.fb2']
['pandoc', '-f', 'html', '-t', 'plain', '-o', 'txt/Dudincev_Novogodnyaya-skazka.OArvQA.372308.fb2.txt', 'fb2.unzipped/Dudincev_Novogodnyaya-skazka.OArvQA.372308.fb2']
['pandoc', '-f', 'html', '-t', 'plain', '-o', 'txt/Efremov_Velikoe-Kolco_3_Chas-Byka.29r8Yw.133556.fb2.txt', 'fb2.unzipped/Efremov_Velikoe-Kolco_3_Chas-Byka.29r8Yw.133556.fb2']
['pandoc', '-f', 'html', '-t', 'plain', '-o', 'txt/Erenburg_Ottepel.r4pVag.350166.fb2.txt', 'fb2.unzipped/Erenburg_Ottepel.r4pVag.350166.fb2']
['pandoc', '-f', 'html', '-t', 'plain', '-o', 'txt/Erofeev_Akimudy.JlZxzg.371214.fb2.txt', 'fb2.unzipped/Erofeev_Akimudy.JlZxzg.371214.fb2']
['pandoc', '-f', 'html', '-t', 'plain', '-o', 'txt/Erofeev_Moskva-Petushki.0ZnQUQ.282345.fb2.txt', 'fb2.unzipped/Erofeev_Moskva-Petushki.0ZnQUQ.282345.fb2']
['pandoc', '-f', 'html', '-t', 'plain', '-o', 'txt/Ero

['pandoc', '-f', 'html', '-t', 'plain', '-o', 'txt/Grishkovec_Rubashka.Go3d8Q.186275.fb2.txt', 'fb2.unzipped/Grishkovec_Rubashka.Go3d8Q.186275.fb2']
['pandoc', '-f', 'html', '-t', 'plain', '-o', 'txt/Grossman_Za-pravoe-delo.W2HG_Q.366727.fb2.txt', 'fb2.unzipped/Grossman_Za-pravoe-delo.W2HG_Q.366727.fb2']
['pandoc', '-f', 'html', '-t', 'plain', '-o', 'txt/Grossman_Zhizn-i-sudba.kkKMgw.312635.fb2.txt', 'fb2.unzipped/Grossman_Zhizn-i-sudba.kkKMgw.312635.fb2']
['pandoc', '-f', 'html', '-t', 'plain', '-o', 'txt/Iskander_Kroliki-i-udavy.zk4xWA.70498.fb2.txt', 'fb2.unzipped/Iskander_Kroliki-i-udavy.zk4xWA.70498.fb2']
['pandoc', '-f', 'html', '-t', 'plain', '-o', 'txt/Iskander_Morskoy-skorpion.UiuNBg.153215.fb2.txt', 'fb2.unzipped/Iskander_Morskoy-skorpion.UiuNBg.153215.fb2']
['pandoc', '-f', 'html', '-t', 'plain', '-o', 'txt/Iskander_Sandro-iz-Chegema.1nMzkQ.273618.fb2.txt', 'fb2.unzipped/Iskander_Sandro-iz-Chegema.1nMzkQ.273618.fb2']
['pandoc', '-f', 'html', '-t', 'plain', '-o', 'txt/Iskande

['pandoc', '-f', 'html', '-t', 'plain', '-o', 'txt/Mamin-Sibiryak_Hleb.d8AQzw.69300.fb2.txt', 'fb2.unzipped/Mamin-Sibiryak_Hleb.d8AQzw.69300.fb2']
['pandoc', '-f', 'html', '-t', 'plain', '-o', 'txt/Mamin-Sibiryak_Privalovskie-milliony.gFAMoQ.97422.fb2.txt', 'fb2.unzipped/Mamin-Sibiryak_Privalovskie-milliony.gFAMoQ.97422.fb2']
['pandoc', '-f', 'html', '-t', 'plain', '-o', 'txt/Mamin-Sibiryak_Tri-konca.PRxNxA.97423.fb2.txt', 'fb2.unzipped/Mamin-Sibiryak_Tri-konca.PRxNxA.97423.fb2']
['pandoc', '-f', 'html', '-t', 'plain', '-o', 'txt/Mamin-Sibiryak_Zoloto.QBwfIw.97715.fb2.txt', 'fb2.unzipped/Mamin-Sibiryak_Zoloto.QBwfIw.97715.fb2']
['pandoc', '-f', 'html', '-t', 'plain', '-o', 'txt/Matveeva_Nebesa.MwQjQg.380537.fb2.txt', 'fb2.unzipped/Matveeva_Nebesa.MwQjQg.380537.fb2']
['pandoc', '-f', 'html', '-t', 'plain', '-o', 'txt/Matveeva_Pereval-Dyatlova.2PJxMA.36282.fb2.txt', 'fb2.unzipped/Matveeva_Pereval-Dyatlova.2PJxMA.36282.fb2']
['pandoc', '-f', 'html', '-t', 'plain', '-o', 'txt/Mozhaev_Istor

['pandoc', '-f', 'html', '-t', 'plain', '-o', 'txt/Pikul_Slovo-i-delo.ka3RZQ.76728.fb2.txt', 'fb2.unzipped/Pikul_Slovo-i-delo.ka3RZQ.76728.fb2']
['pandoc', '-f', 'html', '-t', 'plain', '-o', 'txt/Pilnyak_Golyy-god.-zO_KQ.167283.fb2.txt', 'fb2.unzipped/Pilnyak_Golyy-god.-zO_KQ.167283.fb2']
['pandoc', '-f', 'html', '-t', 'plain', '-o', 'txt/Pilnyak_Mat-syra-zemlya.kAyUPQ.42844.fb2.txt', 'fb2.unzipped/Pilnyak_Mat-syra-zemlya.kAyUPQ.42844.fb2']
['pandoc', '-f', 'html', '-t', 'plain', '-o', 'txt/Pilnyak_Povest-nepogashennoy-luny.skoHLw.202517.fb2.txt', 'fb2.unzipped/Pilnyak_Povest-nepogashennoy-luny.skoHLw.202517.fb2']
['pandoc', '-f', 'html', '-t', 'plain', '-o', 'txt/Platonov_Chevengur.qjIjwA.162997.fb2.txt', 'fb2.unzipped/Platonov_Chevengur.qjIjwA.162997.fb2']
['pandoc', '-f', 'html', '-t', 'plain', '-o', 'txt/Platonov_Gorod-Gradov.UaaG9A.43142.fb2.txt', 'fb2.unzipped/Platonov_Gorod-Gradov.UaaG9A.43142.fb2']
['pandoc', '-f', 'html', '-t', 'plain', '-o', 'txt/Platonov_Kotlovan.v_mO5w.4315

['pandoc', '-f', 'html', '-t', 'plain', '-o', 'txt/Rybakov_Tyazhelyy-pesok.keUMJA.232857.fb2.txt', 'fb2.unzipped/Rybakov_Tyazhelyy-pesok.keUMJA.232857.fb2']
['pandoc', '-f', 'html', '-t', 'plain', '-o', 'txt/Saltykov-Shchedrin_Gospoda-Golovlevy.yUc_rg.81151.fb2.txt', 'fb2.unzipped/Saltykov-Shchedrin_Gospoda-Golovlevy.yUc_rg.81151.fb2']
['pandoc', '-f', 'html', '-t', 'plain', '-o', 'txt/Saltykov-Shchedrin_Istoriya-odnogo-goroda.7JZzLA.114591.fb2.txt', 'fb2.unzipped/Saltykov-Shchedrin_Istoriya-odnogo-goroda.7JZzLA.114591.fb2']
['pandoc', '-f', 'html', '-t', 'plain', '-o', 'txt/Saltykov-Shchedrin_Poshehonskaya-starina.AbCuig.190592.fb2.txt', 'fb2.unzipped/Saltykov-Shchedrin_Poshehonskaya-starina.AbCuig.190592.fb2']
['pandoc', '-f', 'html', '-t', 'plain', '-o', 'txt/Semenov_Kostenko_1_Petrovka-38.dHg6ww.181064.fb2.txt', 'fb2.unzipped/Semenov_Kostenko_1_Petrovka-38.dHg6ww.181064.fb2']
['pandoc', '-f', 'html', '-t', 'plain', '-o', 'txt/Semenov_Kostenko_2_Ogareva-6.44697g.181065.fb2.txt', 'fb

['pandoc', '-f', 'html', '-t', 'plain', '-o', 'txt/Tolstoy_Giperboloid-inzhenera-Garina.aTZWjA.142170.fb2.txt', 'fb2.unzipped/Tolstoy_Giperboloid-inzhenera-Garina.aTZWjA.142170.fb2']
['pandoc', '-f', 'html', '-t', 'plain', '-o', 'txt/Tolstoy_Hadzhi-Murat.ZyDwhQ.78449.fb2.txt', 'fb2.unzipped/Tolstoy_Hadzhi-Murat.ZyDwhQ.78449.fb2']
['pandoc', '-f', 'html', '-t', 'plain', '-o', 'txt/Tolstoy_Petr-Pervyy.rFSPUA.358535.fb2.txt', 'fb2.unzipped/Tolstoy_Petr-Pervyy.rFSPUA.358535.fb2']
['pandoc', '-f', 'html', '-t', 'plain', '-o', 'txt/Tolstoy_Voyna-i-mir._xoraA.436421.fb2.txt', 'fb2.unzipped/Tolstoy_Voyna-i-mir._xoraA.436421.fb2']
['pandoc', '-f', 'html', '-t', 'plain', '-o', 'txt/Trifonov_Dolgoe-proshchanie.Ggrq1w.495691.fb2.txt', 'fb2.unzipped/Trifonov_Dolgoe-proshchanie.Ggrq1w.495691.fb2']
['pandoc', '-f', 'html', '-t', 'plain', '-o', 'txt/Trifonov_Dom-na-naberezhnoy.XRMOSA.161430.fb2.txt', 'fb2.unzipped/Trifonov_Dom-na-naberezhnoy.XRMOSA.161430.fb2']
['pandoc', '-f', 'html', '-t', 'plain', 

In [174]:
for f in os.listdir('txt'):
    if (f[-3:] == "txt"):
        cmd = ["mystem", "--format=json", "-nigfcsd", "txt/" + f, "json/" + f + ".json"]
        print(cmd)
        subprocess.call(cmd)

['mystem', '--format=json', '-nigfcsd', 'txt/Abramov_Pryasliny_1_Bratya-i-sestry.edbdyw.158030.fb2.txt', 'json/Abramov_Pryasliny_1_Bratya-i-sestry.edbdyw.158030.fb2.txt.json']
['mystem', '--format=json', '-nigfcsd', 'txt/Abramov_Pryasliny_2_Dve-zimy-i-tri-leta.hU9IrQ.158033.fb2.txt', 'json/Abramov_Pryasliny_2_Dve-zimy-i-tri-leta.hU9IrQ.158033.fb2.txt.json']
['mystem', '--format=json', '-nigfcsd', 'txt/Abramov_Pryasliny_3_Puti-pereputya.ktaB0Q.158036.fb2.txt', 'json/Abramov_Pryasliny_3_Puti-pereputya.ktaB0Q.158036.fb2.txt.json']
['mystem', '--format=json', '-nigfcsd', 'txt/Abramov_Pryasliny_4_Dom.FWiheg.158032.fb2.txt', 'json/Abramov_Pryasliny_4_Dom.FWiheg.158032.fb2.txt.json']
['mystem', '--format=json', '-nigfcsd', 'txt/Aksenov_Zvezdnyy-bilet.HMAp3Q.677.fb2.txt', 'json/Aksenov_Zvezdnyy-bilet.HMAp3Q.677.fb2.txt.json']
['mystem', '--format=json', '-nigfcsd', 'txt/Akunin_Priklyucheniya-Erasta-Fandorina_12_Nefritovye-chetki.-trNMQ.144729.fb2.txt', 'json/Akunin_Priklyucheniya-Erasta-Fandor

['mystem', '--format=json', '-nigfcsd', 'txt/Bitov_Pushkinskiy-Dom._PR-6A.246296.fb2.txt', 'json/Bitov_Pushkinskiy-Dom._PR-6A.246296.fb2.txt.json']
['mystem', '--format=json', '-nigfcsd', 'txt/Bondarev_Bereg.qE7ZrA.472356.fb2.txt', 'json/Bondarev_Bereg.qE7ZrA.472356.fb2.txt.json']
['mystem', '--format=json', '-nigfcsd', 'txt/Bondarev_Goryachiy-sneg.rA-GCg.276869.fb2.txt', 'json/Bondarev_Goryachiy-sneg.rA-GCg.276869.fb2.txt.json']
['mystem', '--format=json', '-nigfcsd', 'txt/Bondarev_Poslednie-zalpy.bX6XhA.468939.fb2.txt', 'json/Bondarev_Poslednie-zalpy.bX6XhA.468939.fb2.txt.json']
['mystem', '--format=json', '-nigfcsd', 'txt/Bulgakov_Belaya-gvardiya_1_Belaya-gvardiya.Rgu96w.261423.fb2.txt', 'json/Bulgakov_Belaya-gvardiya_1_Belaya-gvardiya.Rgu96w.261423.fb2.txt.json']
['mystem', '--format=json', '-nigfcsd', 'txt/Bulgakov_Master-i-Margarita.7WgsYg.66372.fb2.txt', 'json/Bulgakov_Master-i-Margarita.7WgsYg.66372.fb2.txt.json']
['mystem', '--format=json', '-nigfcsd', 'txt/Bulgakov_Rokovye-ya

['mystem', '--format=json', '-nigfcsd', 'txt/Erenburg_Ottepel.r4pVag.350166.fb2.txt', 'json/Erenburg_Ottepel.r4pVag.350166.fb2.txt.json']
['mystem', '--format=json', '-nigfcsd', 'txt/Erofeev_Akimudy.JlZxzg.371214.fb2.txt', 'json/Erofeev_Akimudy.JlZxzg.371214.fb2.txt.json']
['mystem', '--format=json', '-nigfcsd', 'txt/Erofeev_Moskva-Petushki.0ZnQUQ.282345.fb2.txt', 'json/Erofeev_Moskva-Petushki.0ZnQUQ.282345.fb2.txt.json']
['mystem', '--format=json', '-nigfcsd', 'txt/Erofeev_Zapiski-psihopata.Naur2Q.15832.fb2.txt', 'json/Erofeev_Zapiski-psihopata.Naur2Q.15832.fb2.txt.json']
['mystem', '--format=json', '-nigfcsd', 'txt/Erofeev_Zhizn-s-idiotom.VTk4Ig.15844.fb2.txt', 'json/Erofeev_Zhizn-s-idiotom.VTk4Ig.15844.fb2.txt.json']
['mystem', '--format=json', '-nigfcsd', 'txt/Fadeev_Molodaya-Gvardiya.uI8bnQ.184141.fb2.txt', 'json/Fadeev_Molodaya-Gvardiya.uI8bnQ.184141.fb2.txt.json']
['mystem', '--format=json', '-nigfcsd', 'txt/Fadeev_Razgrom.hsQRWA.153202.fb2.txt', 'json/Fadeev_Razgrom.hsQRWA.1532

['mystem', '--format=json', '-nigfcsd', 'txt/Iskander_Sandro-iz-Chegema.1nMzkQ.273618.fb2.txt', 'json/Iskander_Sandro-iz-Chegema.1nMzkQ.273618.fb2.txt.json']
['mystem', '--format=json', '-nigfcsd', 'txt/Iskander_Sozvezdie-Kozlotura.LfWWBg.156514.fb2.txt', 'json/Iskander_Sozvezdie-Kozlotura.LfWWBg.156514.fb2.txt.json']
['mystem', '--format=json', '-nigfcsd', 'txt/Iskander_Trinadcatyy-podvig-Gerakla.1mM4qw.388896.fb2.txt', 'json/Iskander_Trinadcatyy-podvig-Gerakla.1mM4qw.388896.fb2.txt.json']
['mystem', '--format=json', '-nigfcsd', 'txt/Kabakov_Nevozvrashchenec.onH8QA.24693.fb2.txt', 'json/Kabakov_Nevozvrashchenec.onH8QA.24693.fb2.txt.json']
['mystem', '--format=json', '-nigfcsd', 'txt/Kabakov_Posledniy-geroy.NUzPXw.24695.fb2.txt', 'json/Kabakov_Posledniy-geroy.NUzPXw.24695.fb2.txt.json']
['mystem', '--format=json', '-nigfcsd', 'txt/Kabakov_Sochinitel.t8Swbg.24698.fb2.txt', 'json/Kabakov_Sochinitel.t8Swbg.24698.fb2.txt.json']
['mystem', '--format=json', '-nigfcsd', 'txt/Kassil_Budte-goto

['mystem', '--format=json', '-nigfcsd', 'txt/Mozhaev_Muzhiki-i-baby.4PWoXg.173143.fb2.txt', 'json/Mozhaev_Muzhiki-i-baby.4PWoXg.173143.fb2.txt.json']
['mystem', '--format=json', '-nigfcsd', 'txt/Mozhaev_Poltora-kvadratnyh-metra.GDiyzw.173145.fb2.txt', 'json/Mozhaev_Poltora-kvadratnyh-metra.GDiyzw.173145.fb2.txt.json']
['mystem', '--format=json', '-nigfcsd', 'txt/Mozhaev_Zhivoy.4GABYw.101185.fb2.txt', 'json/Mozhaev_Zhivoy.4GABYw.101185.fb2.txt.json']
['mystem', '--format=json', '-nigfcsd', 'txt/Nabokov_Drugie-berega.KeMJrQ.38566.fb2.txt', 'json/Nabokov_Drugie-berega.KeMJrQ.38566.fb2.txt.json']
['mystem', '--format=json', '-nigfcsd', 'txt/Nabokov_Korol-dama-valet.VAZMoQ.385742.fb2.txt', 'json/Nabokov_Korol-dama-valet.VAZMoQ.385742.fb2.txt.json']
['mystem', '--format=json', '-nigfcsd', 'txt/Nabokov_Romany_1_Mashenka.U9O7fQ.76082.fb2.txt', 'json/Nabokov_Romany_1_Mashenka.U9O7fQ.76082.fb2.txt.json']
['mystem', '--format=json', '-nigfcsd', 'txt/Nabokov_Romany_3_Zashchita-Luzhina.Hd9p9w.21749

['mystem', '--format=json', '-nigfcsd', 'txt/Pogorelskiy_Lyubimye-knigi-Lva-Tolstogo-Detstvo-do-14-let-_1_Chernaya-kurica-ili-Podzemnye-zhiteli.13aYXg.110929.fb2.txt', 'json/Pogorelskiy_Lyubimye-knigi-Lva-Tolstogo-Detstvo-do-14-let-_1_Chernaya-kurica-ili-Podzemnye-zhiteli.13aYXg.110929.fb2.txt.json']
['mystem', '--format=json', '-nigfcsd', 'txt/Polevoy_Povest-o-nastoyashchem-cheloveke._y43EA.43759.fb2.txt', 'json/Polevoy_Povest-o-nastoyashchem-cheloveke._y43EA.43759.fb2.txt.json']
['mystem', '--format=json', '-nigfcsd', 'txt/Polevoy_Roman-gazeta_182_Glubokiy-tyl.Q_bBhw.162247.fb2.txt', 'json/Polevoy_Roman-gazeta_182_Glubokiy-tyl.Q_bBhw.162247.fb2.txt.json']
['mystem', '--format=json', '-nigfcsd', 'txt/Polevoy_Roman-gazeta_273_Na-dikom-brege.iYP03g.159730.fb2.txt', 'json/Polevoy_Roman-gazeta_273_Na-dikom-brege.iYP03g.159730.fb2.txt.json']
['mystem', '--format=json', '-nigfcsd', 'txt/Polevoy_Zoloto.AvVjhw.147400.fb2.txt', 'json/Polevoy_Zoloto.AvVjhw.147400.fb2.txt.json']
['mystem', '--fo

['mystem', '--format=json', '-nigfcsd', 'txt/Serafimovich_Zheleznyy-potok.Zq_Ygg.290867.fb2.txt', 'json/Serafimovich_Zheleznyy-potok.Zq_Ygg.290867.fb2.txt.json']
['mystem', '--format=json', '-nigfcsd', 'txt/Shalamov_Posledniy-boy-mayora-Pugacheva.BhN61g.474364.fb2.txt', 'json/Shalamov_Posledniy-boy-mayora-Pugacheva.BhN61g.474364.fb2.txt.json']
['mystem', '--format=json', '-nigfcsd', 'txt/Shalamov_Sgushchennoe-moloko.otUOdA.474349.fb2.txt', 'json/Shalamov_Sgushchennoe-moloko.otUOdA.474349.fb2.txt.json']
['mystem', '--format=json', '-nigfcsd', 'txt/Shargunov_Ptichiy-gripp.mff4Lw.171291.fb2.txt', 'json/Shargunov_Ptichiy-gripp.mff4Lw.171291.fb2.txt.json']
['mystem', '--format=json', '-nigfcsd', 'txt/Shishkin_Pismovnik.-AQ4xg.248853.fb2.txt', 'json/Shishkin_Pismovnik.-AQ4xg.248853.fb2.txt.json']
['mystem', '--format=json', '-nigfcsd', 'txt/Shishkin_Venerin-volos.yH-w_A.248855.fb2.txt', 'json/Shishkin_Venerin-volos.yH-w_A.248855.fb2.txt.json']
['mystem', '--format=json', '-nigfcsd', 'txt/Shi

['mystem', '--format=json', '-nigfcsd', 'txt/Turgenev_Rudin.ot0gIQ.150978.fb2.txt', 'json/Turgenev_Rudin.ot0gIQ.150978.fb2.txt.json']
['mystem', '--format=json', '-nigfcsd', 'txt/Tynyanov_Kyuhlya.dZ_bOg.170534.fb2.txt', 'json/Tynyanov_Kyuhlya.dZ_bOg.170534.fb2.txt.json']
['mystem', '--format=json', '-nigfcsd', 'txt/Tynyanov_Podporuchik-Kizhe.wfO0_Q.168586.fb2.txt', 'json/Tynyanov_Podporuchik-Kizhe.wfO0_Q.168586.fb2.txt.json']
['mystem', '--format=json', '-nigfcsd', 'txt/Tynyanov_Smert-Vazir-Muhtara.2bXmjA.168585.fb2.txt', 'json/Tynyanov_Smert-Vazir-Muhtara.2bXmjA.168585.fb2.txt.json']
['mystem', '--format=json', '-nigfcsd', 'txt/Tynyanov_Voskovaya-persona.VWx6Yg.168588.fb2.txt', 'json/Tynyanov_Voskovaya-persona.VWx6Yg.168588.fb2.txt.json']
['mystem', '--format=json', '-nigfcsd', 'txt/Ulickaya_Daniel-Shtayn-perevodchik.NDuTFw.93997.fb2.txt', 'json/Ulickaya_Daniel-Shtayn-perevodchik.NDuTFw.93997.fb2.txt.json']
['mystem', '--format=json', '-nigfcsd', 'txt/Ulickaya_Iskrenne-vash-Shurik.A-p