In [1]:
import pydgraph
import numpy as np
import json as js
from boto3 import client as bclient
from botocore.client import Config

In [2]:
from scipy.spatial.distance import cosine

In [3]:
url = "111.93.155.194:9080"

In [4]:
def to_json(data, filename):
    with open(filename + ".json", "w", encoding="utf-8") as f_:
        js.dump(data, f_, ensure_ascii=False, indent=4)

def read_json(json_file):
    with open(json_file) as f_:
        meeting = js.load(f_)
    return meeting

In [5]:
client_stub = pydgraph.DgraphClientStub(url)
client = pydgraph.DgraphClient(client_stub)

In [6]:
def perform_query(query, variables):
    txn = client.txn()
    try:
        res = client.txn(read_only=True).query(query, variables=variables)
        response = js.loads(res.json)

        return response
    finally:
        # Clean up. Calling this after txn.commit() is a no-op and hence safe.
        txn.discard()

In [7]:
user_kw_query = """
query userKw($n: string, $t: int) {
    userKw(func: eq(email, $n)) {
        uid
        xid
        name
        mentionName
        ~hasMember{
            name
        }
        ~authoredBy (first: $t) {
            hasKeywords {
                values
            }
        }
    }
}
"""

In [8]:
variables = {"$n": "shashank@etherlabs.io", "$t": "5"}
user_kw_resp = perform_query(query=user_kw_query, variables=variables)

In [9]:
user_kw_resp

{'userKw': [{'uid': '0xcfc87',
   'xid': 'e034c7a6-0681-40ba-b7b2-b78de2a2cc3d',
   'name': 'Shashank',
   'mentionName': 'shashank',
   '~authoredBy': [{'hasKeywords': [{'values': ['CSV ether ready',
        'transportation app']}]},
    {'hasKeywords': [{'values': ['animation to switched',
        'HTTP libraries',
        'HTTP',
        'real change white code',
        'second time',
        'offline mode',
        'images just renders',
        'app remember',
        'catch data',
        'jump offline',
        'choose is random']}]},
    {'hasKeywords': [{'values': ['great examples',
        'Lambda',
        'modern application',
        'Lambda functions',
        'lot of different Lambda functions']}]},
    {'hasKeywords': [{'values': ['real change',
        'web component',
        'app Remember',
        'HTTP libraries',
        'second time round',
        'start the app',
        'footer colleagues javascript',
        'couple of differences',
        'chose is a rando

In [10]:
def format_response(resp):
    user_id_dict = {}
    user_kw_dict = {}
    for info in resp["userKw"]:
        try:
            user_id = info["xid"]
            user_name = info["name"]

            user_id_dict.update({
                user_id: user_name
            })

            keyword_object = info["~authoredBy"]
            user_kw_list = []
            for obj in keyword_object:
                kw = obj["hasKeywords"]
                user_kw_list.extend([words for words in kw[0]["values"]])

            user_kw_dict.update({
                user_id: user_kw_list
            })
        except Exception:
            continue
    
    return user_kw_dict, user_id_dict

In [11]:
user_kw_dict, user_id_dict = format_response(resp=user_kw_resp)

In [12]:
user_kw_dict

{'e034c7a6-0681-40ba-b7b2-b78de2a2cc3d': ['CSV ether ready',
  'transportation app',
  'animation to switched',
  'HTTP libraries',
  'HTTP',
  'real change white code',
  'second time',
  'offline mode',
  'images just renders',
  'app remember',
  'catch data',
  'jump offline',
  'choose is random',
  'great examples',
  'Lambda',
  'modern application',
  'Lambda functions',
  'lot of different Lambda functions',
  'real change',
  'web component',
  'app Remember',
  'HTTP libraries',
  'second time round',
  'start the app',
  'footer colleagues javascript',
  'couple of differences',
  'chose is a random',
  'Stitches the same styling',
  'web app',
  'HTTP',
  'Switched Off',
  'working hard',
  'deploy by cash',
  'offline mode',
  'core complex pieces',
  'switch up online'],
 'be748cf6-d983-4ac1-8854-97fdc98c056d': ['preference relations',
  'variables by refer',
  'ends the NI belong',
  'training and test',
  'architecture like work',
  'carry on context',
  'idea of gener

In [13]:
ref_user_kw_query = """
query userKw($t: int) {
    userKw(func: type("User")) {
        uid
        xid
        name
        mentionName
        ~hasMember{
            name
        }
        ~authoredBy (first: $t) {
            hasKeywords {
                values
            }
        }
    }
}
"""

In [14]:
var = {"$t": "5"}
ref_user_kw_resp = perform_query(query=ref_user_kw_query, variables=var)

In [15]:
ref_user_kw_resp

{'userKw': [{'uid': '0xa633a',
   'xid': '18b15cf4-bcc7-4b0a-8d46-8a05421188c7',
   'name': 'F1 Testing',
   'mentionName': 'f1'},
  {'uid': '0xa6387',
   'xid': '3a23aba9-7d04-4451-927c-684c1673496e',
   'name': 'Franklin Ferdinand',
   'mentionName': 'franklin',
   '~authoredBy': [{'hasKeywords': [{'values': ['Keys']}]},
    {'hasKeywords': [{'values': ['nothingto Those people', 'Theo', 'Over']}]},
    {'hasKeywords': [{'values': ['marks Jira',
        'civil rights Isis',
        'open project',
        'climate changes',
        'climate nats',
        'lot of salt',
        'smooth change',
        'wait Meetings',
        'speak aloud',
        'feel ability',
        'Redis',
        'Isis',
        'ability to speak aloud']}]},
    {'hasKeywords': [{'values': ['process refund',
        'process purchase',
        'basic role',
        'great function',
        'leave a default option create role',
        'create our two lambda channel',
        'AWS Console',
        'basic la

In [16]:
ref_user_kw_dict, ref_user_id_dict = format_response(resp=ref_user_kw_resp)

In [17]:
len(ref_user_id_dict.keys())

179

In [18]:
len(ref_user_kw_dict)

23

## Remove users (if any) that are being compared against

In [19]:
for k in ref_user_id_dict.keys():
    if k in user_kw_dict.keys():
        del ref_user_kw_dict[k]

In [20]:
len(ref_user_kw_dict)

21

In [21]:
aws_config = Config(
        connect_timeout=180,
        read_timeout=300,
        retries={"max_attempts": 2},
        region_name="us-east-1",
    )
lambda_client = bclient("lambda", config=aws_config)
lambda_function = "sentence-encoder-lambda"

def get_embeddings(input_list):
    lambda_payload = {"body": {"text_input": input_list}}

    try:
        invoke_response = lambda_client.invoke(
            FunctionName=lambda_function,
            InvocationType="RequestResponse",
            Payload=js.dumps(lambda_payload),
        )

        lambda_output = (
            invoke_response["Payload"].read().decode("utf8").replace("'", '"')
        )
        response = js.loads(lambda_output)
        status_code = response["statusCode"]
        response_body = response["body"]

        if status_code == 200:
            embedding_vector = np.asarray(js.loads(response_body)["embeddings"])

        else:
            embedding_vector = np.asarray(js.loads(response_body)["embeddings"])

        return embedding_vector

    except Exception as e:
        print(e)
        embedding_vector = np.zeros((1, 512))
        return embedding_vector

In [22]:
vec1 = get_embeddings(input_list=["AWS lambda function"])
vec2 = get_embeddings(input_list=["AWS serverless computing"])

In [23]:
sim = 1 - cosine(vec1, vec2)

In [24]:
sim

0.7383127021592407

In [25]:
def hash_func(vecs, projections):
    bools = np.dot(vecs, projections.T) > 0
    return [bool2int(bool_vec) for bool_vec in bools]

def bool2int(x):
    y = 0
    for i,j in enumerate(x):
        if j: y += 1<<i
    return y

In [122]:
class Table:
    
    def __init__(self, hash_size, dim):
        self.table = dict()
        self.hash_size = hash_size
        self.projections = np.random.randn(self.hash_size, dim)

    def add(self, vecs, label):
        entry = {'label': label}
        hashes = hash_func(vecs, self.projections)
        for h in hashes:
            if h in self.table.keys():
                self.table[h].append(entry)
            else:
                self.table[h] = [entry]

    def query(self, vecs):
        hashes = hash_func(vecs, self.projections)
        results = list()
        for h in hashes:
            if h in self.table.keys():
                results.extend(self.table[h])
        return results

class LSH:
    
    def __init__(self, dim):
        self.num_tables = 4
        self.hash_size = 8
        self.tables = list()
        for i in range(self.num_tables):
            self.tables.append(Table(self.hash_size, dim))
    
    def add(self, vecs, label):
        for table in self.tables:
            table.add(vecs, label)
    
    def query(self, vecs):
        results = list()
        for table in self.tables:
            results.extend(table.query(vecs))
        return results

    def describe(self):
        for table in self.tables:
            # print(table.table, table.projections)
            yield(table)
    

class Search:
    
    def __init__(self, input_list):
        self.dim_size = 512
        self.lsh = LSH(self.dim_size)
        self.input = input_list
        self.num_features_in_input = dict()
        for f in self.input:
            self.num_features_in_input[f] = 0
                
    def featurize(self):
        # for kw_list in self.input:
        kw_features = get_embeddings(input_list=self.input)
        
        for i in range(len(kw_features)):
            self.lsh.add([kw_features[i]], self.input[i])
            self.num_features_in_input[self.input[i]] += len(kw_features[i])
                
    def query(self, kw_list):
        # kw_list = user_dict[user]
        kw_features = get_embeddings(input_list=kw_list)
        
        results = self.lsh.query(kw_features)
        print('num results', len(results))

        counts = dict()
        for r in results:
            if r['label'] in counts.keys():
                counts[r['label']] += 1
            else:
                counts[r['label']] = 1
        for k in counts:
            counts[k] = float(counts[k])/self.num_features_in_input[k]
        return counts
    
    def describe(self):
        for t in self.lsh.describe():
            yield(t)
    

In [117]:
ref_kws = [v for val in ref_user_kw_dict.values() for v in val]

In [149]:
ref_kws[:60]

['Keys',
 'nothingto Those people',
 'Theo',
 'Over',
 'marks Jira',
 'civil rights Isis',
 'open project',
 'climate changes',
 'climate nats',
 'lot of salt',
 'smooth change',
 'wait Meetings',
 'speak aloud',
 'feel ability',
 'Redis',
 'Isis',
 'ability to speak aloud',
 'process refund',
 'process purchase',
 'basic role',
 'great function',
 'leave a default option create role',
 'create our two lambda channel',
 'AWS Console',
 'basic lambda',
 'Lambda dot',
 'guys called the process',
 'insert events',
 'lambda arguments',
 'add some more criteria',
 'combinations of criterias',
 'change events',
 'players scores',
 'lot is data replication',
 'record as the argument',
 'guy to this',
 'table at real time',
 'player ID play',
 'replicate dot',
 'bit more searchable',
 'leaderboards a tree',
 'insert on the table',
 'batch of changes',
 'elastic search cluster',
 'games played greater',
 'time it event',
 'reading off this table',
 'lambda to your dot audio',
 'faster and more 

In [160]:
ws = Search(input_list=ref_kws[:60])

In [124]:
ws.num_features_in_input

{'Keys': 0,
 'nothingto Those people': 0,
 'Theo': 0,
 'Over': 0,
 'marks Jira': 0,
 'civil rights Isis': 0,
 'open project': 0,
 'climate changes': 0,
 'climate nats': 0,
 'lot of salt': 0,
 'smooth change': 0,
 'wait Meetings': 0,
 'speak aloud': 0,
 'feel ability': 0,
 'Redis': 0,
 'Isis': 0,
 'ability to speak aloud': 0,
 'process refund': 0,
 'process purchase': 0,
 'basic role': 0,
 'great function': 0,
 'leave a default option create role': 0,
 'create our two lambda channel': 0,
 'AWS Console': 0,
 'basic lambda': 0,
 'Lambda dot': 0,
 'guys called the process': 0,
 'insert events': 0,
 'lambda arguments': 0,
 'add some more criteria': 0,
 'combinations of criterias': 0,
 'change events': 0,
 'players scores': 0,
 'lot is data replication': 0,
 'record as the argument': 0,
 'guy to this': 0,
 'table at real time': 0,
 'player ID play': 0,
 'replicate dot': 0,
 'bit more searchable': 0,
 'leaderboards a tree': 0,
 'insert on the table': 0,
 'batch of changes': 0,
 'elastic searc

In [161]:
ws.featurize()

In [162]:
ws.num_features_in_input

{'Keys': 512,
 'nothingto Those people': 512,
 'Theo': 512,
 'Over': 512,
 'marks Jira': 512,
 'civil rights Isis': 512,
 'open project': 512,
 'climate changes': 512,
 'climate nats': 512,
 'lot of salt': 512,
 'smooth change': 512,
 'wait Meetings': 512,
 'speak aloud': 512,
 'feel ability': 512,
 'Redis': 512,
 'Isis': 512,
 'ability to speak aloud': 512,
 'process refund': 512,
 'process purchase': 512,
 'basic role': 512,
 'great function': 512,
 'leave a default option create role': 512,
 'create our two lambda channel': 512,
 'AWS Console': 512,
 'basic lambda': 512,
 'Lambda dot': 512,
 'guys called the process': 512,
 'insert events': 512,
 'lambda arguments': 512,
 'add some more criteria': 512,
 'combinations of criterias': 512,
 'change events': 512,
 'players scores': 512,
 'lot is data replication': 512,
 'record as the argument': 512,
 'guy to this': 512,
 'table at real time': 512,
 'player ID play': 512,
 'replicate dot': 512,
 'bit more searchable': 512,
 'leaderboard

In [59]:
projections = np.random.randn(8, 512)
h = hash_func(vec3, projections)

In [92]:
vec4 = get_embeddings(input_list=ref_user_kw_dict['3a23aba9-7d04-4451-927c-684c1673496e'])

In [93]:
h1 = hash_func(vec4, projections)

In [94]:
h1

[6,
 103,
 65,
 198,
 0,
 67,
 194,
 236,
 204,
 206,
 68,
 20,
 196,
 196,
 192,
 71,
 212,
 160,
 128,
 196,
 68,
 132,
 192,
 72,
 200,
 198,
 198]

In [99]:
ref_user_kw_dict['3a23aba9-7d04-4451-927c-684c1673496e'][12]

'speak aloud'

In [102]:
user_kw_dict['e034c7a6-0681-40ba-b7b2-b78de2a2cc3d'][-2]

'core complex pieces'

In [61]:
h

[193,
 4,
 198,
 64,
 70,
 196,
 196,
 64,
 194,
 67,
 0,
 64,
 142,
 198,
 194,
 68,
 192,
 192,
 197,
 2,
 67,
 64,
 140,
 0,
 196,
 102,
 206,
 166,
 0,
 70,
 192,
 134,
 76,
 64,
 198,
 68]

In [71]:
h[18]

197

In [69]:
for i, n in enumerate(h):
    if n == 166:
        print(i)

27


In [70]:
user_kw_dict['e034c7a6-0681-40ba-b7b2-b78de2a2cc3d'][27]

'Stitches the same styling'

In [137]:
t = ws.describe()

In [139]:
print(next(t).table)

{196: [{'label': 'Keys'}], 133: [{'label': 'nothingto Those people'}], 212: [{'label': 'Theo'}], 68: [{'label': 'Over'}], 241: [{'label': 'marks Jira'}], 229: [{'label': 'civil rights Isis'}, {'label': 'feel ability'}], 181: [{'label': 'open project'}], 221: [{'label': 'climate changes'}, {'label': 'climate nats'}], 142: [{'label': 'lot of salt'}], 116: [{'label': 'smooth change'}, {'label': 'great function'}], 175: [{'label': 'wait Meetings'}], 5: [{'label': 'speak aloud'}], 245: [{'label': 'Redis'}], 244: [{'label': 'Isis'}], 39: [{'label': 'ability to speak aloud'}], 126: [{'label': 'process refund'}], 62: [{'label': 'process purchase'}], 228: [{'label': 'basic role'}], 54: [{'label': 'leave a default option create role'}], 59: [{'label': 'create our two lambda channel'}], 189: [{'label': 'AWS Console'}, {'label': 'basic lambda'}], 165: [{'label': 'Lambda dot'}], 188: [{'label': 'guys called the process'}], 145: [{'label': 'insert events'}], 177: [{'label': 'lambda arguments'}], 147

In [167]:
result = ws.query(kw_list=["VMware", "Linux distribution", "Mac OS", "VMware and virtualbox", "interpret these operating systems"])

num results 8


In [168]:
for r in sorted(result, key=result.get, reverse=True):
    for u, l in ref_user_kw_dict.items():
        if r in l:
            print("{}, {}, {}, {}".format(r, result[r], u, ref_user_id_dict[u]))

open project, 0.00390625, 3a23aba9-7d04-4451-927c-684c1673496e, Franklin Ferdinand
faster and more efficient operation, 0.001953125, feffe3ee-896f-45c3-9d91-1f6d962c66ce, Karthik Muralidharan
Redis, 0.001953125, 3a23aba9-7d04-4451-927c-684c1673496e, Franklin Ferdinand
Redis, 0.001953125, b1b92672-20ce-4e0c-af0a-82de3016fcb0, mithun
AWS Console, 0.001953125, 3a23aba9-7d04-4451-927c-684c1673496e, Franklin Ferdinand
operation in your leader table, 0.001953125, feffe3ee-896f-45c3-9d91-1f6d962c66ce, Karthik Muralidharan
leaderboards a tree, 0.001953125, feffe3ee-896f-45c3-9d91-1f6d962c66ce, Karthik Muralidharan
batch operation, 0.001953125, feffe3ee-896f-45c3-9d91-1f6d962c66ce, Karthik Muralidharan


In [143]:
ref_user_kw_dict['3a23aba9-7d04-4451-927c-684c1673496e']

['Keys',
 'nothingto Those people',
 'Theo',
 'Over',
 'marks Jira',
 'civil rights Isis',
 'open project',
 'climate changes',
 'climate nats',
 'lot of salt',
 'smooth change',
 'wait Meetings',
 'speak aloud',
 'feel ability',
 'Redis',
 'Isis',
 'ability to speak aloud',
 'process refund',
 'process purchase',
 'basic role',
 'great function',
 'leave a default option create role',
 'create our two lambda channel',
 'AWS Console',
 'basic lambda',
 'Lambda dot',
 'guys called the process']

In [245]:
user_kw_dict['e034c7a6-0681-40ba-b7b2-b78de2a2cc3d']

['CSV ether ready',
 'transportation app',
 'animation to switched',
 'HTTP libraries',
 'HTTP',
 'real change white code',
 'second time',
 'offline mode',
 'images just renders',
 'app remember',
 'catch data',
 'jump offline',
 'choose is random',
 'great examples',
 'Lambda',
 'modern application',
 'Lambda functions',
 'lot of different Lambda functions',
 'real change',
 'web component',
 'app Remember',
 'HTTP libraries',
 'second time round',
 'start the app',
 'footer colleagues javascript',
 'couple of differences',
 'chose is a random',
 'Stitches the same styling',
 'web app',
 'HTTP',
 'Switched Off',
 'working hard',
 'deploy by cash',
 'offline mode',
 'core complex pieces',
 'switch up online']

In [41]:
len(user_kw_dict['e034c7a6-0681-40ba-b7b2-b78de2a2cc3d'])

36