In [1]:
from jina import Flow, Executor, requests, Document, DocumentArray
import pandas as pd
import numpy as np 

### 3 main fundamental jina concepts 

* document
* executor 
* flow

In [2]:
df = pd.read_csv('../datasets/all_products.csv')
df.head()

Unnamed: 0,product_name,product_category,description
0,HILL'S PRESCRIPTION DIET u/d Urinary Care Dog ...,food,Prescription Diet u/d is clinical nutrition sp...
1,adidas Predator Freak .2 Firm Ground Soccer Cl...,footwear,SUPPORTIVE SOCCER CLEATS BUILT FOR CONTROL. Yo...
2,Wysong Uretic - Dry Natural Food for Cats,food,Uretic has achieved extraordinary results in t...
3,adidas Firm Ground Predator 20.3 Unisex-child,footwear,You're not cheating the system. You're just be...
4,Kaisi Precision Screwdriver Set Professional E...,electronics,- Forged from high-strength chrome vanadium s...


In [3]:
df = df.drop_duplicates().dropna()
df.shape

(25873, 3)

### sample products

In [4]:
for ind in range(5):
    print()
    print(f" product name : {df.iloc[ind, 0]} ".center(80, ' '))
    print()
    print(df.iloc[ind, -1])
    print()
    print('-' * 80)


       product name : HILL'S PRESCRIPTION DIET u/d Urinary Care Dog Food        

Prescription Diet u/d is clinical nutrition specially formulated to reduce risk of urate and cystine stones in dogs. It’s made in the USA with global ingredients you can trust, and was developed by Hill’s nutritionists and veterinarians. Please consult your veterinarian for further information on how Prescription Diet foods can help your dog continue to enjoy a happy and active life.

--------------------------------------------------------------------------------

       product name : adidas Predator Freak .2 Firm Ground Soccer Cleats        

SUPPORTIVE SOCCER CLEATS BUILT FOR CONTROL. You can't change the game till you let the game change you. Every match is a chance to be bigger, better. More in control. Unleash your full force of nature with Predator Freak. The Demonskin 2.0 on these adidas soccer cleats give you superhuman ball control. The textile upper has a synthetic leather strike zone. It rid

### creating a document and documentArray

In [6]:
# basic document 

# Document(text = 'something')  # use it for strings 
# Document(content = 'something') # use it when you dont know what type of jina data type it is 
# Document(blob = 'img, vid, audio') # use it for images, audios and videos
# Document(uri = 'local file path or url link') # can be any form a local or remote link

In [20]:
docs = DocumentArray()
for ind in range(df.shape[0]):
    name = df.iloc[ind, 0]
    desc = df.iloc[ind, -1]
    doc = Document(text = name)
    doc.tags['description'] = desc
    docs.append(doc)
    
docs

<jina.types.arrays.document.DocumentArray length=25873 at 139829446576448>

In [8]:
docs[0].json()

'{\n  "id": "e093cbae-34e4-11ec-9c56-18dbf233429c",\n  "mime_type": "text/plain",\n  "tags": {\n    "description": "Prescription Diet u/d is clinical nutrition specially formulated to reduce risk of urate and cystine stones in dogs. It\\u2019s made in the USA with global ingredients you can trust, and was developed by Hill\\u2019s nutritionists and veterinarians. Please consult your veterinarian for further information on how Prescription Diet foods can help your dog continue to enjoy a happy and active life."\n  },\n  "text": "HILL\'S PRESCRIPTION DIET u/d Urinary Care Dog Food"\n}'

In [9]:
docs[0].text

"HILL'S PRESCRIPTION DIET u/d Urinary Care Dog Food"

### creating your own executor

In [22]:
class Clean(Executor):
    @requests
    def clean(self, docs, parameters, **kwargs):
        print(parameters)
        return DocumentArray(list(filter(lambda x : len(x.text) > 0, docs)))

### Flow

In [23]:
model = "sentence-transformers/paraphrase-distilroberta-base-v1"

flow = (
    Flow()
    .add(
        name = 'clean_text',
        uses = Clean
    )
    .add(
        name="error_text_encoder",
        uses="jinahub://TransformerTorchEncoder",
        uses_with={"pretrained_model_name_or_path": model},
    )
    .add(
        name="error_text_indexer",
        uses='jinahub://SimpleIndexer',
    )
)

In [21]:
docs = docs[:1000]
docs

<jina.types.arrays.document.DocumentArray length=1000 at 139829446519248>

In [None]:
for i in range(3):
    print(i.text)

### indexing 

In [13]:
!rm -rf workspace

with flow:
    flow.index(
      inputs=docs,
        docs = docs,
        parameters = {'name' : 'somethign', 'xyz' : 'fsdfsdfsa'}
  )

           Flow@12347[I]:[32m🎉 Flow is ready to use![0m                                          
	🔗 Protocol: 		[1mGRPC[0m
	🏠 Local access:	[4m[36m0.0.0.0:49839[0m
	🔒 Private network:	[4m[36m192.168.1.42:49839[0m[0m
{'name': 'somethign'}


### querying 

In [18]:
query = Document(text = input('Query product : '))
with flow:
    response = flow.search(inputs = query, return_results = True)

Query product : printer
           Flow@12347[I]:[32m🎉 Flow is ready to use![0m                                          
	🔗 Protocol: 		[1mGRPC[0m
	🏠 Local access:	[4m[36m0.0.0.0:49759[0m
	🔒 Private network:	[4m[36m192.168.1.42:49759[0m[0m
{}


### finding the best match

In [16]:
matches = response[0].docs[0].matches
matches

<jina.types.arrays.match.MatchArray length=20 at 139829488721536>

In [19]:
for m in matches:
    print()
    print(f" product name : {m.text} ".center(80, ' '))
    print()
    print(m.tags['description'])
    print()
    print('-' * 80)


              product name : Scenic Nature Stationery - Pack of 48              

Professionally printed Scenic Stationery. 4 unique scenes from nature, 12 of each for a total of 48. Measure 8.5 x 11 inches. Works through all printers and copiers. Images are ghosted to be printed on. Also can be handwritten. Perfect for personalized letters, projects, and poetry. Professionally printed in USA on premium 70 lb. bright white smooth text stock.

--------------------------------------------------------------------------------

 product name : Canson 100510926 XL Mix Media Paper Pad, 98 Pound, 7 x 10 Inch, 60 Sheets 

Canson XL Mix Media Pads were developed as the answer to the demand for a heavyweight, fine texture paper with heavy sizing for wet and dry media. This paper can be used for: sketching, drawing, light washes of ink and watercolor, collage, journaling, and more. It has quickly become the most popular paper in the Canson paper family. Mix Media paper erases well and blends easi