## OCI Speech - Transcription 

#https://github.com/oracle/oci-python-sdk/tree/22fd62c8dbbd1aaed6b75754ec1ba8a3c16a4e5a/src/oci/ai_speech
#https://docs.oracle.com/en-us/iaas/Content/speech/home.htm
#oci_speech_service_users or #igiu-innovation-lab slack channel

## Import Libraries

In [1]:
from oci.ai_speech import AIServiceSpeechClient
from oci.ai_speech.models import *
from oci.config import from_file
from oci.signer import load_private_key_from_file
import oci
from oci.object_storage import ObjectStorageClient
import io

## Set input variables

In [2]:

PREFIX = "AAGARWA"
CONFIG_PROFILE = "SANDBOX"
 
COMPARTMENT_ID= "ocid1.compartment.oc1..aaaaaaaaxj6fuodcmai6n6z5yyqif6a36ewfmmovn42red37ml3wxlehjmga" 

NAMESPACE = "axaemuxiyife"
BUCKET_NAME = "workshopbucket"
FILE_NAME ="voiceover_audio.mp3"

## Load OCI config
Set up authentication for OCI by reading configuration from a file and creating a signer instance for secure API communication. The default configuration file location is ```~/.oci/config```.

In [3]:
config = from_file('~/.oci/config', CONFIG_PROFILE)

## [Optional] Upload file



In [4]:
object_storage_client = ObjectStorageClient(config)
print(f"Uploading file {FILE_NAME} ...")
object_storage_client.put_object(NAMESPACE, BUCKET_NAME, FILE_NAME, io.open(FILE_NAME,'rb'))
print("Upload completed !")

Uploading file voiceover_audio.mp3 ...


FileNotFoundError: [Errno 2] No such file or directory: 'voiceover_audio.mp3'

## Create AI service Speech client

In [5]:
speech_client =AIServiceSpeechClient(config=config,signer= oci.signer.Signer(
        tenancy=config["tenancy"],
        user=config["user"],
        fingerprint=config["fingerprint"],
        private_key_file_location=config["key_file"]
        ),
        service_endpoint=" https://speech.aiservice.us-phoenix-1.oci.oraclecloud.com")

## Set the input location

In [6]:
object_location = oci.ai_speech.models.ObjectLocation(namespace_name=NAMESPACE, bucket_name=BUCKET_NAME,
    object_names=[FILE_NAME])
input_location = oci.ai_speech.models.ObjectListInlineInputLocation(
    location_type="OBJECT_LIST_INLINE_INPUT_LOCATION", object_locations=[object_location])

## set the output location

In [7]:
output_location = oci.ai_speech.models.OutputLocation(namespace_name=NAMESPACE, bucket_name=BUCKET_NAME, prefix=PREFIX)
 

## Setup input feature 
You can specify the features you want to call. Note not all features are supported for all calls 


*******   ONLY RUN Either WHISPER or ORACLE MODELS ******
### Oracle 

In [8]:
# features for Oracle model
sample_normalization = oci.ai_speech.models.TranscriptionNormalization(is_punctuation_enabled=True)

transcription_settings = oci.ai_speech.models.TranscriptionSettings(
        diarization= oci.ai_speech.models.Diarization(is_diarization_enabled=True)  # dosnt specify number_of_speakers as its auto detected
    )

model_details = oci.ai_speech.models.TranscriptionModelDetails(
        language_code="en-US", 
        model_type="ORACLE",
        domain = "GENERIC",   # only generic domain is supported for now
        transcription_settings =transcription_settings 
        )


******* ONLY RUN WHISPER OR ORACLE MODELS ******

### Whisper

In [None]:
# features for Whisper model
sample_normalization = oci.ai_speech.models.TranscriptionNormalization(is_punctuation_enabled=True)

transcription_settings = oci.ai_speech.models.TranscriptionSettings(
        diarization= oci.ai_speech.models.Diarization(is_diarization_enabled=True)  # dosnt specify number_of_speakers as its auto detected
    )
model_details = oci.ai_speech.models.TranscriptionModelDetails(
        language_code="en", 
        model_type="WHISPER_MEDIUM",
        domain = "GENERIC",   # only generic domain is supported for now
        transcription_settings =transcription_settings 
        )


## Create Speech Analysis detaiis

In [9]:
# Create Transcription Job with details provided
transcription_job_details = oci.ai_speech.models.CreateTranscriptionJobDetails(display_name="WorkshopTest",
    compartment_id=COMPARTMENT_ID,
    description="testing duirnhg workhop",
    model_details=model_details,
    input_location=input_location,
    additional_transcription_formats=["SRT"],
    normalization=sample_normalization,
    output_location=output_location
    )

## Run the job


In [10]:
transcription_job = None
try:
    transcription_job = speech_client.create_transcription_job(create_transcription_job_details=transcription_job_details)
except Exception as e:
    print(e)
else:
    print(transcription_job.data)

{
  "additional_transcription_formats": [
    "SRT"
  ],
  "compartment_id": "ocid1.compartment.oc1..aaaaaaaa5wmdeu3rf5s4rs4l66rksphne2orz4buauniiqtar63du6ni7icq",
  "created_by": "ocid1.user.oc1..aaaaaaaaacm2bvhb2idv5jk2vzwge6fwiuus4o47j4xa3q7niwexnvtr6weq",
  "defined_tags": {
    "Oracle-Tags": {
      "CreatedBy": "workshopusers/ashish.ag.agarwal@oracle.com",
      "CreatedOn": "2024-12-11T09:38:19.174Z"
    }
  },
  "description": "testing duirnhg workhop",
  "display_name": "WorkshopTest",
  "freeform_tags": {},
  "id": "ocid1.aispeechtranscriptionjob.oc1.phx.amaaaaaaghwivzaaulto2be2bkpx2cj4pkkbxomne4zreuuobs6unnnwko3q",
  "input_location": {
    "location_type": "OBJECT_LIST_INLINE_INPUT_LOCATION",
    "object_locations": [
      {
        "bucket_name": "workshopbucket",
        "namespace_name": "axaemuxiyife",
        "object_names": [
          "voiceover_audio.mp3"
        ]
      }
    ]
  },
  "lifecycle_details": null,
  "lifecycle_state": "ACCEPTED",
  "model_details": 

## Exercise : transcription

1. Create an App that  takes in an audio
    * Uses diarization to transcribe
    * Compare Oracle & Wisper models
    * Compare with original question  ( from tts exercise)

1. Take an zoom recording
    * Transcribe
      * With captions
    * Summarize using llm

