In [1]:
# START REMOVE

In [10]:
!pip install gensim



In [11]:
!pip list

Package                            Version  
---------------------------------- ---------
absl-py                            0.9.0    
alabaster                          0.7.12   
anaconda-client                    1.7.2    
anaconda-navigator                 1.9.12   
anaconda-project                   0.8.3    
appdirs                            1.4.4    
appnope                            0.1.0    
appscript                          1.0.1    
asn1crypto                         1.0.1    
astor                              0.8.0    
astroid                            2.3.1    
astropy                            3.2.2    
atomicwrites                       1.3.0    
attrs                              19.2.0   
awscli                             1.16.309 
azure-ai-textanalytics             5.0.0    
azure-common                       1.1.26   
azure-core                         1.9.0    
Babel                              2.7.0    
backcall                          

In [12]:
# END REMOVE

In [13]:
import gensim
from gensim.models import KeyedVectors
import numpy as np
import pandas as pd
import json

import nltk
from nltk import word_tokenize
nltk.download('punkt')

from scipy.spatial.distance import cosine
from flask import Flask, request, Response

[nltk_data] Downloading package punkt to /Users/foohm/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [14]:
wv = KeyedVectors.load_word2vec_format('./vectors.txt', binary=False)

In [15]:
def printJSON(j):
    output = json.dumps(j, indent=2)
    lines = output.split("\n")
    for line in lines:
        print(line)

def replaceNull(payload):
    if payload is None:
        return ""
    else:
        return payload
    
def parseJSON(payload):
    df = pd.DataFrame()
    title = payload['target']['title']
    description = replaceNull(payload['target']['description'])
    iden = payload['target']['id']
    #print(iden,title,description)
    row = {'id': iden, 'title': title, 'description': description}
    df = df.append(row, ignore_index=True)
    for r in payload['corpus']:
        title = r['title']
        description = replaceNull(r['description'])
        iden = r['id']
        #print(iden,title,description)
        row = {'id': iden, 'title': title, 'description': description}
        df = df.append(row, ignore_index=True)
    return df
    
def sentance2vector(sentance):
  tokens = word_tokenize(sentance)
  vector = np.zeros(100)
  for token in tokens:
    if token in wv.vocab:
      vector = vector + wv[token]
  return vector

def cosineSimilarity(v1, v2):
  return 1 - cosine(v1,v2)

def preprocess(df):
    df["features"] = df["title"] + " " + df["description"]
    df.features = df.features.str.replace(r"[\n\r\t]+", " ")
    df.features = df.features.str.replace(r"([a-zA-Z0-9_\-\.]+)@([a-zA-Z0-9_\-\.]+)\.([a-zA-Z]{2,5})", " ")
    df.features = df.features.str.replace(r"((http[s]?|ftp):\/)?\/?([^:\/\s]+)((\/\w+)*\/)([\w\-\.]+[^#?\s]+)(.*)?(#[\w\-]+)?", " ")
    return df

def score(df):
    feature = df.iloc[0].features
    vector = sentance2vector(feature)
    df["score"] = np.zeros(len(df))
    for i in range(0,len(df)):
        # print(df.iloc[i].features)
        vect = sentance2vector(df.iloc[i].features)
        df.iloc[i,4] = cosineSimilarity(vector, vect)
        # print(cosineSimilarity(vector, vect))
    return df

def jsonResult(df):
    result = []
    for i in range(0,len(df)):
        d = {'id': df.iloc[i].id, 'title': df.iloc[i].title, 'description': df.iloc[i].description, 'score': df.iloc[i].score}
        result.append(d)
    return json.dumps(result)
    

In [16]:
app = Flask(__name__)

In [17]:
@app.route('/predict', methods=["GET", "POST"])
def predict():
    if request.method == "GET":
        return "Please send Post Request"
    elif request.method == "POST":
        data = request.get_json()
        printJSON(data)
        df = parseJSON(data)
        print(df.to_string())
        df = preprocess(df)
        print(df.to_string())
        df = score(df)
        print(df.to_string())
        resp = Response(jsonResult(df))
        resp.headers['Access-Control-Allow-Origin'] = '*'
        resp.headers['Content-Type'] = 'application/json'
        return resp
        

In [None]:
# START REMOVE
# This is to be used for local testing
app.run()
# END REMOVE

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


 * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)


{
  "target": {
    "id": "12",
    "title": "mary had a little lamb",
    "description": "mary had a little lamb its fleece was white as snow"
  },
  "corpus": [
    {
      "id": "15",
      "title": "the quick brown fox",
      "description": "hey diddle diddle, the cat and the fiddle"
    },
    {
      "id": "200",
      "title": "the cow jumped over the moon",
      "description": "the rain in spain fell mainly in the plain"
    },
    {
      "id": "345",
      "title": "have a happy happy new year",
      "description": null
    }
  ]
}
                                         description   id                         title
0  mary had a little lamb its fleece was white as...   12        mary had a little lamb
1          hey diddle diddle, the cat and the fiddle   15           the quick brown fox
2         the rain in spain fell mainly in the plain  200  the cow jumped over the moon
3                                                     345   have a happy happy new year
         

127.0.0.1 - - [02/Jan/2021 11:13:47] "[37mPOST /predict HTTP/1.1[0m" 200 -


                                         description   id                         title                                           features     score
0  mary had a little lamb its fleece was white as...   12        mary had a little lamb  mary had a little lamb mary had a little lamb ...  1.000000
1          hey diddle diddle, the cat and the fiddle   15           the quick brown fox  the quick brown fox hey diddle diddle, the cat...  0.570206
2         the rain in spain fell mainly in the plain  200  the cow jumped over the moon  the cow jumped over the moon the rain in spain...  0.650617
3                                                     345   have a happy happy new year                       have a happy happy new year   0.576496


127.0.0.1 - - [02/Jan/2021 11:14:16] "[37mPOST /predict HTTP/1.1[0m" 200 -


{
  "target": {
    "id": "12",
    "title": "mary had a little lamb",
    "description": "mary had a little lamb its fleece was white as snow"
  },
  "corpus": [
    {
      "id": "15",
      "title": "the quick brown fox",
      "description": "hey diddle diddle, the cat and the fiddle"
    },
    {
      "id": "200",
      "title": "the cow jumped over the moon",
      "description": "the rain in spain fell mainly in the plain"
    }
  ]
}
                                         description   id                         title
0  mary had a little lamb its fleece was white as...   12        mary had a little lamb
1          hey diddle diddle, the cat and the fiddle   15           the quick brown fox
2         the rain in spain fell mainly in the plain  200  the cow jumped over the moon
                                         description   id                         title                                           features
0  mary had a little lamb its fleece was white as...   12     

127.0.0.1 - - [02/Jan/2021 11:14:33] "[37mPOST /predict HTTP/1.1[0m" 200 -


{
  "target": {
    "id": "12",
    "title": "mary had a little lamb",
    "description": "mary had a little lamb its fleece was white as snow"
  },
  "corpus": [
    {
      "id": "15",
      "title": "the quick brown fox",
      "description": null
    },
    {
      "id": "200",
      "title": "the cow jumped over the moon",
      "description": "the rain in spain fell mainly in the plain"
    }
  ]
}
                                         description   id                         title
0  mary had a little lamb its fleece was white as...   12        mary had a little lamb
1                                                      15           the quick brown fox
2         the rain in spain fell mainly in the plain  200  the cow jumped over the moon
                                         description   id                         title                                           features
0  mary had a little lamb its fleece was white as...   12        mary had a little lamb  mary had a l

In [8]:
# This is to be used to run app.py in a docker container
app.run(host='0.0.0.0', port='33')

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


 * Running on http://0.0.0.0:33/ (Press CTRL+C to quit)
