In [1]:
import json
with open ("IMDB_TOP_100.json","r")as file :
    imdb_dataset=json.load(file)

In [4]:
"""
Vespa Expects a list 
"""
vespa_feed=[]
for film in imdb_dataset:
    film_dict={
        'id':film['id'],
        'fields':{
            'film_title':film['Series_Title'],
            'synopsis':film['Synopsis'],
            'id':film['id'],
            'year':film['Released_Year']

        }
    }
    vespa_feed.append(film_dict)


In [6]:
"""
Build Vespa Application that will recieve the data
"""
from vespa.package import ApplicationPackage
app_package=ApplicationPackage(name='imddbquickstart')

In [7]:
from vespa.package import Schema,Document
film_schema=Schema(name='imdb_film_schema',document=Document())

In [8]:
from vespa.package import Field
film_schema.add_fields(
    Field(
        name='film_title',
        type='string',
        indexing=['index','summary']
    ),
    Field(
        name='synopsis',
        type='string',
        indexing=['index','summary']
    ),
    Field(
        name='year',
        type='int',
        indexing=['attribute']
    ),
    Field(
        name='id',
        type='string',
        indexing=['summary']
    )
)

In [10]:
from vespa.package import FieldSet
film_schema.add_field_set(
    FieldSet(name='default',fields=['film_title','synopsis'])
)

In [11]:
app_package.add_schema(film_schema)

In [None]:
""" 
Run this in CMD if using Windows : vespa auth login
"""

from vespa.deployment import VespaCloud
vespa_cloud = VespaCloud(
    tenant='tenantcctech',
    application=app_package.name,
    application_package=app_package
)

Setting application...
Running: vespa config set application tenantcctech.imddbquickstart.default
Setting target cloud...
Running: vespa config set target cloud

No api-key found for control plane access. Using access token.
Checking for access token in auth.json...
Successfully obtained access token for control plane access.
Certificate and key not found in c:\Users\Vinayak Shirahatti\Desktop\Vespa\.vespa or C:\Users\Vinayak Shirahatti\.vespa\tenantcctech.imddbquickstart.default: Creating new cert/key pair with vespa CLI.
Generating certificate and key...
Running: vespa auth cert -N
Success: Certificate written to 'C:\Users\Vinayak Shirahatti\.vespa\tenantcctech.imddbquickstart.default\data-plane-public-cert.pem'
Success: Private key written to 'C:\Users\Vinayak Shirahatti\.vespa\tenantcctech.imddbquickstart.default\data-plane-private-key.pem'



In [15]:
app=vespa_cloud.deploy()

Deployment started in run 1 of dev-aws-us-east-1c for tenantcctech.imddbquickstart. This may take a few minutes the first time.
INFO    [09:20:22]  Deploying platform version 8.619.77 and application dev build 1 for dev-aws-us-east-1c of default ...
INFO    [09:20:22]  Using CA signed certificate version 1
INFO    [09:20:22]  Using 1 nodes in container cluster 'imddbquickstart_container'
INFO    [09:20:30]  Session 3 for tenant 'tenantcctech' prepared and activated.
INFO    [09:20:58]  ######## Details for all nodes ########
INFO    [09:20:58]  h117197f.dev.us-east-1c.aws.vespa-cloud.net: expected to be UP
INFO    [09:20:58]  --- platform vespa/cloud-tenant-rhel8:8.619.77
INFO    [09:20:58]  --- container-clustercontroller on port 19050 has not started 
INFO    [09:20:58]  --- metricsproxy-container on port 19092 has not started 
INFO    [09:20:58]  h117215b.dev.us-east-1c.aws.vespa-cloud.net: expected to be UP
INFO    [09:20:58]  --- platform vespa/cloud-tenant-rhel8:8.619.77
INFO    

In [17]:
cert_path=app.cert
key_path=app.key

endpoint=vespa_cloud.get_mtls_endpoint

In [20]:
endpoint='https://fc34723d.e0917210.z.vespa-app.cloud/'
from vespa.application import Vespa
vespa_application = Vespa(endpoint,cert=cert_path,key=key_path)

In [23]:
from vespa.io import VespaResponse
def callback(response:VespaResponse,id:str):
    if not response.is_successful():
        print(f"Error when feeding documwnt{id}: {response.get_json()}")

vespa_application.feed_iterable(vespa_feed,schema='imdb_film_schema',callback=callback)

In [27]:
from vespa.io import VespaQueryResponse
def print_hits(response:VespaQueryResponse):
    for i,hit in enumerate(response.hits):
        print(f'{i+1:>3} {hit["fields"]["id"]:>3} {hit["fields"]["film_title"]}')


In [32]:
response=vespa_application.query(
    yql='select * from sources * where default contains "world war" limit 5 '
)
print_hits(response)

  1  80 Paths of Glory
  2  50 Casablanca
  3  46 Grave of the Fireflies
  4  84 1917
  5  24 Saving Private Ryan


In [38]:
response=vespa_application.query(
    yql='select * from sources * where userQuery() ',
    query='List all the movies '
)
print_hits(response)

  1   7 Schindler's List
  2  73 The Shining
  3  12 The good, the Bad and the Ugly
  4  10 The Lord of the Rings: The Fellowship of the Ring
  5   5 The Lord of the Rings: The Return of the King
  6  13 The Lord of the Rings: The Two Towers
  7  28 The Silence of the Lambs
  8   3 The Godfather: Part II
  9   1 The Godfather
 10   0 The Shawshank Redemption


In [41]:
response=vespa_application.query(
    yql='select * from sources * where userQuery() limit 100 ',
    query='List all the movies '
)
print_hits(response)

  1   7 Schindler's List
  2  73 The Shining
  3  12 The good, the Bad and the Ugly
  4  10 The Lord of the Rings: The Fellowship of the Ring
  5   5 The Lord of the Rings: The Return of the King
  6  13 The Lord of the Rings: The Two Towers
  7  28 The Silence of the Lambs
  8   3 The Godfather: Part II
  9   1 The Godfather
 10   0 The Shawshank Redemption
 11  38 The Pianist
 12  36 The Prestige
 13  67 The Lives of Others
 14  63 The Dark Knight Rises
 15   2 The Dark Knight
 16  83 The Great Dictator
 17  41 The Usual Suspects
 18  35 The Intouchables
 19  37 The Departed
 20  88 The Hunt
 21  14 The Matrix
 22  43 The Lion King
 23  25 The Green Mile
 24  94 Eternal Sunshine of the Spotless Mind
 25  78 Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb
 26  60 Avengers: Infinity War
 27  59 Avengers: Endgame
 28  79 Witness for the Prosecution
 29  45 Cinema Paradiso
 30  72 Raiders of the Lost Ark
 31  54 Ayla: The Daughter of War
 32  46 Grave of the Fireflie

In [45]:
response=vespa_application.query(
    yql='select * from sources * where userQuery() limit 20 ',
    query='Released between 1900 to 2000 '
)
print_hits(response)

  1  76 Anand
  2  96 Snatch
  3  35 The Intouchables
  4  99 Good Will Hunting
  5  92 My Father and My Son
  6   5 The Lord of the Rings: The Return of the King
  7  55 Vikram Vedha
  8  87 Drishyam
  9  78 Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb
 10  13 The Lord of the Rings: The Two Towers
 11  40 American History X
 12  34 Whiplash
 13  36 The Prestige
 14  47 Back to the Future
 15   2 The Dark Knight
 16  86 Andhadhun
 17  56 Your Name.
 18  63 The Dark Knight Rises
 19  67 The Lives of Others
 20  89 A Seperation
