In [1]:
import joblib, requests, string
from joblib import dump, load
import pandas as pd
from nltk.stem import PorterStemmer
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
from nltk import word_tokenize
import math, nltk, json
from collections import Counter
from flask import Flask, jsonify, render_template, request

In [2]:
app = Flask(__name__)

In [3]:
stemmer = PorterStemmer()
def stem_words(text):
    return ' '.join([stemmer.stem(word) for word in text.split()])

lemmatizer = WordNetLemmatizer()
def lemmatize_words(text):
    return ' '.join([lemmatizer.lemmatize(word) for word in text.split()])

def text_process(text):
    nopunc = [char for char in text if char not in string.punctuation]
    nopunc = ''.join(nopunc)
    return ' '.join([word for word in nopunc.split() if word.lower() not in stopwords.words('english') and not word.isdigit()])

In [4]:
def build_vector(iterable1, iterable2):
    counter1 = Counter(iterable1)
    counter2 = Counter(iterable2)
    all_items = set(counter1.keys()).union(set(counter2.keys()))
    vector1 = [counter1[k] for k in all_items]
    vector2 = [counter2[k] for k in all_items]
    return vector1, vector2

In [5]:
def cosim(v1, v2):
    dot_product = sum(n1 * n2 for n1, n2 in zip(v1, v2) )
    magnitude1 = math.sqrt(sum(n ** 2 for n in v1))
    magnitude2 = math.sqrt(sum(n ** 2 for n in v2))
    return dot_product / (magnitude1 * magnitude2)

In [None]:
@app.route('/')
def home():
    return render_template('text_similarity.html')

@app.route('/predict',methods=['POST'])
def predict():
    if request.method == 'POST':
        text1 = request.form.get("text1")
        text2 = request.form.get("text2")
#         text1 = text_process(text1)
#         text2 = text_process(text2)
#         text1 = stem_words(text1)
#         text2 = stem_words(text2)
#         text1 = lemmatize_words(text1)
#         text2 = lemmatize_words(text2)
        t1 = text1.split()
        t2 = text2.split()
        v1,v2 = build_vector(t1,t2)
        return jsonify({'similarity_score': cosim(v1,v2)})

if __name__ == '__main__':
    cosine_similarity_model = joblib.load('model.pkl')
    app.run(port=8080)


 * Serving Flask app '__main__' (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on http://127.0.0.1:8080/ (Press CTRL+C to quit)
127.0.0.1 - - [16/Apr/2022 01:31:00] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [16/Apr/2022 01:31:00] "GET /static/styles/styles.css HTTP/1.1" 404 -
127.0.0.1 - - [16/Apr/2022 01:31:28] "POST /predict HTTP/1.1" 200 -
