In [None]:
# =======================================
# Oracle OCI AI Services Lab using Python
# =======================================

# This tutorial will walk you through all the different OCI AI services, and use them with the Python SDK

# Overview of the services we'll explore :
# 1. AI Language Service : AIServiceLanguageClient() : for language detection, sentiment analysis and translation
# 2. AI Vision service : AIServiceVisionClient() : for object, faces and text detection in an image
# 3. AI Speech: AIServiceSpeechClient() : transcribe the sound of a video to a text file

# We'll also be using some side services,
# - ObjectStorageClient() : Object Storage buckets because that is the way a number of the
#   AI services take their input, for example for sound and images input
# - IdentityClient() to authenticate to your tenancy


# Before you start you need to make sure you can actually connect to your tenancy from your Python environment
# Setup config for my tenancy
# 1. Install the Python SDK on your system: sudo yum install python36-oci-sdk    More info at https://docs.oracle.com/en-us/iaas/Content/API/SDKDocs/pythonsdk.htm
# 2. Install OCI CLI on your system : brew update && brew install oci-cli     More info at https://docs.oracle.com/en-us/iaas/Content/API/SDKDocs/cliinstall.htm
# 3. Ensure you can connect with the CLI to your OCI instance, your .oci.config file must. be set up correctly
#    - Ensure you have created an api key for your account.  You should obtain a Fingerprint and a private pem key, as well as an example of the .oci/config file
#      See https://docs.oracle.com/en-us/iaas/Content/API/Concepts/apisigningkey.htm

# - import the oci library and store the oci config file in the config variable
# - copy the OCID of the compartment you will be using into the compart variable

# Useful links to documentation on the various API's:
# Top level API Reference doc : https://docs.oracle.com/en-us/iaas/api/
# - Langauge : https://docs.oracle.com/en-us/iaas/api/#/en/language/20221001/
# - Vision : https://docs.oracle.com/en-us/iaas/api/#/en/vision/20220125/
# - Speach: https://docs.oracle.com/en-us/iaas/api/#/en/speech/20220101/
# - Buckets: https://docs.oracle.com/en-us/iaas/api/#/en/objectstorage/20160918/


import oci

# Make sure to configure the .oci/config file as proposed during the creation of an API key for your oci user.
config = oci.config.from_file("~/.oci/config", "DEFAULT")

# Replace the below ocid with the ocid of your compartment
compart = "ocid1.compartment.oc1..aaaaa… your OCID …"

In [None]:
# Validate Authentication
#------------------------
# Before attempting more complex commands, validate your SDK is working correctly
# by displaying the regions of your tenancy, using the IdentityClient()

idd = oci.identity.IdentityClient(config)

regions = idd.list_regions().data
for region in regions:
    print(region.name)

In [None]:
# List buckets and content of the buckets
# ---------------------------------------
# List all the buckets and the objects in each bucket that are present in the compartment you specified

# Lets create a bucket to use in this lab:
bucket_name = "ai-lab-bucket1"

object_storage_client = oci.object_storage.ObjectStorageClient(config)
namespace = object_storage_client.get_namespace().data


# Loop over all the buckets to check if this bucket already exists
buckets = object_storage_client.list_buckets(namespace, compart).data
is_there = False

for bucket in buckets:
    print("Bucket name: ",bucket.name)
    buck_name = bucket.name
    if buck_name == bucket_name:
        is_there = True
    objects = object_storage_client.list_objects(namespace, buck_name).data
    count = 1
    for i in objects.objects:
        print("Object ",count,": ",i.name)
        count+=1

# If the bucket does not yet exist, create it
if is_there == False:
    create_bucket_response = object_storage_client.create_bucket(
        namespace_name=namespace,
        create_bucket_details=oci.object_storage.models.CreateBucketDetails(
            name=bucket_name,
            compartment_id=compart,
            ))

    # Get the data from response
    print(create_bucket_response.data)


In [None]:
#--------------------------------
# AI Service : Language detection
#--------------------------------

ai_language_client = oci.ai_language.AIServiceLanguageClient(config)

# French : 
#text = "Et encore une autre langue, es-possible qu'il le comprend ?"

# Dutch:
text = "Een tekst in mijn moedertaal om het een beetje moeilijker te maken voor de service"

# English:
#text = "This should be fairly easy to detect, I'll avoid using the name of the actual language in this text"

response = ai_language_client.detect_dominant_language(
    oci.ai_language.models.DetectLanguageSentimentsDetails(
        text=text
    )
)

print(response.data.languages)

# You can play with the comment signs before the various languages to try different use-cases

In [None]:
#Sentiment analysis

# We can now use this same service to do sentiment analysis on a longer text : 

# Format the input text in the proper way so the service can interprete it

# first example with some negative language
t_doc = oci.ai_language.models.TextDocument(
    key="doc1",
    text="This hotel is a bad place, I would strongly advise against going there",
    language_code="en")

s_det=oci.ai_language.models.BatchDetectLanguageSentimentsDetails(
    documents=[t_doc], 
    compartment_id = compart)
#print(s_det)

b_rep = ai_language_client.batch_detect_language_sentiments(s_det, level=["ASPECT"])

print (b_rep.data)

In [None]:
# Second example with positive text
my_documents = oci.ai_language.models.TextDocument(
    key = "doc1",
    text = """OCI recently added new services to existing compliance program including SOC, HIPAA, and ISO to enable our customers to solve their 
use cases. We also released new white papers and guidance documents related to Object Storage, the Australian Prudential Regulation Authority (APRA), and the 
Central Bank of Brazil. These resources help regulated customers better understand how OCI supports their regional and industry-specific compliance 
requirements. Not only are we expanding our number of compliance offerings and regulatory alignments, we continue to add regions and services at a faster clip.
        """,
    language_code =  "en"
    )

s_det=oci.ai_language.models.BatchDetectLanguageSentimentsDetails(
    documents=[my_documents], 
    compartment_id = compart)

b_rep = ai_language_client.batch_detect_language_sentiments(s_det, level=["ASPECT"])

print (b_rep.data)

In [None]:
# Translate

# Translate a few sentences from English to Dutch.  Feel free to change the text or the languages

ai_client = oci.ai_language.AIServiceLanguageClient(oci.config.from_file())

key1 = "doc1"
key2 = "doc2"
text1 = "The Indy Autonomous Challenge is the worlds first head-to-head, high speed autonomous race taking place at the Indianapolis Motor Speedway"
text2 = "OCI will be the cloud engine for the artificial intelligence models that drive the MIT Driverless cars."
target_language = "nl" #TODO specify the target language

doc1 = oci.ai_language.models.TextDocument(key=key1, text=text1, language_code="en")
doc2 = oci.ai_language.models.TextDocument(key=key2, text=text2, language_code="en")
documents = [doc1, doc2]


batch_language_translation_details = oci.ai_language.models.BatchLanguageTranslationDetails(
    documents=documents, 
    compartment_id=compart, 
    target_language_code=target_language)
output = ai_client.batch_language_translation (batch_language_translation_details)
print(output.data)

In [None]:
# AI Vision Service
#------------------

# Select a local file and upload it to the bucket we selected earlier

object_storage = oci.object_storage.ObjectStorageClient(config)

# Set up the file path
#file_path = "gg-seminar2.mp4"
file_path = "aa.jpg"

# Upload the file to the bucket
with open(file_path, "rb") as f:
    put_object_response = object_storage.put_object(
        namespace_name=namespace,
        bucket_name = bucket_name, 
        object_name = file_path.split("/")[-1], 
        put_object_body = f)
    
print(put_object_response.data)

# You can run this command a few times with different files.  
# Make sure to upload following files : 
# - one image file that contains some clear objects,
# - an image file with some text
# - an image file with people in the picture
# - a video file with english audio

# A result "None" means the upload succeeded !

In [None]:
# re-run the list command on the bucket to check if the new file is present:
objects = object_storage_client.list_objects(namespace, bucket_name).data
count = 1
for i in objects.objects:
    print("Object ",count,": ",i.name)
    count+=1    

In [None]:
# We are now ready to run the actual Vision commands on the uploaded files

ai_vision_client = oci.ai_vision.AIServiceVisionClient(config)

namespace = object_storage_client.get_namespace().data

# Send the request to service, some parameters are not required, see API
# doc for more info
analyze_image_response = ai_vision_client.analyze_image(
    analyze_image_details=oci.ai_vision.models.AnalyzeImageDetails(
        features=[
            oci.ai_vision.models.ImageClassificationFeature(
                feature_type="IMAGE_CLASSIFICATION",
                max_results=130)],
        image=oci.ai_vision.models.ObjectStorageImageDetails(
            source="OBJECT_STORAGE",
            namespace_name=namespace,
            bucket_name=bucket_name,
            object_name="aa.jpg"),
        compartment_id=compart)
    )

# Get the data from response
print(analyze_image_response.data)

In [None]:
# Find the faces
# Now use the image that has people in it ...
analyze_image_response = ai_vision_client.analyze_image(
    analyze_image_details=oci.ai_vision.models.AnalyzeImageDetails(
        features=[
            oci.ai_vision.models.ImageClassificationFeature(
                # Set the feature type to Face Detection ...
                feature_type="FACE_DETECTION",
                max_results=10)],
        image=oci.ai_vision.models.ObjectStorageImageDetails(
            source="OBJECT_STORAGE",
            namespace_name=namespace,
            bucket_name=bucket_name,
            object_name="aa.jpg"),
        compartment_id=compart)
    )
print(analyze_image_response.data)

In [None]:
# Find text in the image

# Make sure to use an uploaded image with some text in it
analyze_image_response = ai_vision_client.analyze_image(
    analyze_image_details=oci.ai_vision.models.AnalyzeImageDetails(
        features=[
            oci.ai_vision.models.ImageClassificationFeature(
                # Set the feature to text detection
                feature_type="TEXT_DETECTION",
                max_results=10)],
        image=oci.ai_vision.models.ObjectStorageImageDetails(
            source="OBJECT_STORAGE",
            namespace_name=namespace,
            bucket_name=bucket_name,
            object_name="aa.jpg"),
        compartment_id=compart)
    )
#print(analyze_image_response.data)
for i in analyze_image_response.data.image_text.lines:
    print (i.text)

In [None]:
# Speach

# We'll now use a sound or video file we uploaded to extract the speach as text
ai_speech_client = oci.ai_speech.AIServiceSpeechClient(config)

create_transcription_job_response = ai_speech_client.create_transcription_job(
    create_transcription_job_details=oci.ai_speech.models.CreateTranscriptionJobDetails(
        compartment_id=compart,
        input_location=oci.ai_speech.models.ObjectListInlineInputLocation(
            location_type="OBJECT_LIST_INLINE_INPUT_LOCATION",
            object_locations=[
                oci.ai_speech.models.ObjectLocation(
                    namespace_name=namespace,
                    bucket_name=bucket_name,
                    object_names=["gg-seminar.mp4"])]),
        output_location=oci.ai_speech.models.OutputLocation(
            namespace_name=namespace,
            bucket_name=bucket_name),
#            prefix="res-"),
        additional_transcription_formats=["SRT"],
        display_name = "gg-job",
        model_details=oci.ai_speech.models.TranscriptionModelDetails(
            domain="GENERIC",
            language_code="en-GB",
            transcription_settings=oci.ai_speech.models.TranscriptionSettings(
                diarization=oci.ai_speech.models.Diarization(
                    is_diarization_enabled=False,
                    number_of_speakers=2)))))

# Get the data from response
print(create_transcription_job_response.data)

# You see the request submitted, not yet the result !
# If all goes well, you should see a lifecycle_state": "ACCEPTED"

In [None]:
# List the available jobs to see the status, and detect when it has finished

ai_speech_client = oci.ai_speech.AIServiceSpeechClient(config)


list_transcription_jobs_response = ai_speech_client.list_transcription_jobs(
    compartment_id=compart,
    display_name="gg-job")

# Get the data from response
#print(list_transcription_jobs_response.data)

ct=0
for i in list_transcription_jobs_response.data.items:
    print("Job no. ",ct,", date= ", list_transcription_jobs_response.data.items[ct].time_accepted,
         ", Status = ", list_transcription_jobs_response.data.items[ct].lifecycle_state) 
    ct+=1


In [None]:
# Get a few elements from the job to create the get_transcription_job call : 


job_id = create_transcription_job_response.data.id
print ("Job ID: ", job_id)

out_loc = create_transcription_job_response.data.output_location.prefix
print ("Location: ", out_loc)


In [None]:
# Get the data from response
import time

while True:
    get_transcription_job_response = ai_speech_client.get_transcription_job(
        transcription_job_id=job_id)
    print(get_transcription_job_response.data.lifecycle_state, " at ", time.ctime())
    if get_transcription_job_response.data.lifecycle_state == "SUCCEEDED":
        break
    time.sleep(5)
    

# Get the original filename
ori_name = get_transcription_job_response.data.input_location.object_locations[0].object_names[0]
print("Original name = ",ori_name)

# Compose the path to the result file:
res_file = out_loc + namespace + "_" + bucket_name + "_" + ori_name +".json"
print("Result filename: ", res_file)



In [None]:
# Initialize service client with default config file
#object_storage_client = oci.object_storage.ObjectStorageClient(config)

# Send the request to service, some parameters are not required, see API
# doc for more info
get_object_response = object_storage_client.get_object(
    namespace_name=namespace,
    bucket_name=bucket_name,
    http_response_content_type = 'text/plain',
    object_name=res_file)

# Get the data from response
print(get_object_response.data)

In [None]:
from pprint import pprint; 
import pickle
import json

# Intermediate step to dump the content of the response in a text file
the_obj = get_object_response.data
with open("my_test.txt", 'wb') as outp:  # Overwrites any existing file.
        pickle.dump(the_obj, outp)
# Remark : for some reason this dump is required, otherwise I get a "_content"="FALSE" and I only see the header, not the actual data ...
        
# It was a bit hard to understand the response type, 
# the below vars() command (in comment) allowed me to see what was 
# inside the actual object and discover that '_content' was the element to use !
#pprint(vars(the_obj))

# Decode the binary object to a string
decoded_string = the_obj._content.decode("utf-8")
#print(decoded_string)

# Decode the string to proper json
json_object = json.loads(decoded_string)
print (json_object["transcriptions"][0]["transcription"])

In [None]:
# This concludes the lab for now.