In [4]:
import shutil

pth = shutil.copytree('drive/MyDrive/project', '/content', dirs_exist_ok=True)

In [None]:
# Installations

# !pip install -q pyngrok
# !pip install -q flask
# !pip install -q flask-ngrok
# !pip install -q rank-bm25
# !pip install -q pymorphy2
# !pip install -q navec
# !pip install -q wget

In [None]:
# Authentication

# !ngrok authtoken your_authtoken

In [12]:
import wget

pth = wget.download('https://storage.yandexcloud.net/natasha-navec/packs/navec_hudlit_v1_12B_500K_300d_100q.tar')
pth = wget.download('http://vectors.nlpl.eu/repository/20/65.zip')

In [18]:
from flask import Flask, render_template, request, redirect, url_for
from flask_ngrok import run_with_ngrok
from bm import IndexatorBM25
from embed import Embedder
from lemm import Lemmatizer
import json
from os import path
import time

app = Flask(__name__)
run_with_ngrok(app)

@app.route('/')
def home():
    return render_template('home.html')

@app.route('/about')
def about():
    return render_template('about.html')

@app.route('/process', methods=['GET', 'POST'])
def answer_process():
    query = request.args.get('search')
    n = int(request.args.get('n_top'))
    model_type = request.args.get('models')
    if not all([query, model_type]):
        return redirect(url_for('home'))
    start = time.time()
    if not path.isfile('corpus.json'):
        lemzer = Lemmatizer()
        corpus, texts = lemzer.preprocess()
        with open('corpus.json', 'w', encoding='utf-8') as corp:
            json.dump(corpus, corp, ensure_ascii=False, indent=3)
        with open('texts.json', 'w', encoding='utf-8') as txt:
            json.dump(texts, txt, ensure_ascii=False, indent=3)
    else:
        with open('corpus.json', 'r', encoding='utf-8') as corp:
            corpus = json.load(corp)
        with open('texts.json', 'r', encoding='utf-8') as txt:
            texts = json.load(txt)

    if model_type == 'w2v' or model_type == 'nvc':
        indexator = Embedder(corpus, texts, model_type)
    elif model_type == 'bm25':
        indexator = IndexatorBM25(corpus, texts)
    else:
        raise ValueError('Embedder must be BM-25 (bm25), Word2Vec (w2v) or Navec (nvc)!')
    outputs = indexator.get_top_n(query, n)
    timing = round(time.time() - start, 2)
    return render_template('output.html', outputs=outputs, n=n, query=query, timing=timing)

In [None]:
if __name__ == '__main__':
  app.run()