In [1]:
!pip install boto3 requests




In [83]:
import boto3
import time
import json
import requests
import csv
from io import StringIO

In [84]:
def transcribe_audio(bucket_name, file_name, job_name):
    transcribe_client = boto3.client('transcribe')

    # Sanitize job_name to match Transcribe naming constraints
    sanitized_job_name = job_name.replace(" ", "-").replace("--", "-").replace("_", "-")

    # Start transcription job
    media_uri = f's3://{bucket_name}/{file_name}'
    transcribe_client.start_transcription_job(
        TranscriptionJobName=sanitized_job_name,
        Media={'MediaFileUri': media_uri},
        MediaFormat='wav',
        LanguageCode='ru-RU',  # Update if needed
        OutputBucketName=bucket_name  # Store transcription results in the same bucket
    )

    # Wait for the transcription job to complete
    while True:
        response = transcribe_client.get_transcription_job(TranscriptionJobName=sanitized_job_name)
        status = response['TranscriptionJob']['TranscriptionJobStatus']
        if status in ['COMPLETED', 'FAILED']:
            break
        print("Waiting for transcription to complete...")
        time.sleep(10)

    if status == 'COMPLETED':
        output_file_uri = response['TranscriptionJob']['Transcript']['TranscriptFileUri']
        print(f"Transcription completed: {output_file_uri}")
        return output_file_uri
    else:
        print("Transcription failed!")
        return None

In [85]:
def read_transcription_from_s3(bucket_name, job_name):
    s3_client = boto3.client('s3')
    key = f"{job_name}.json"

    response = s3_client.get_object(Bucket=bucket_name, Key=key)
    transcript_data = json.loads(response['Body'].read().decode('utf-8'))
    return transcript_data['results']['transcripts'][0]['transcript']

In [86]:
def analyze_text_with_bedrock(client, model_id, text):
    request_body = json.dumps({
        "anthropic_version": "bedrock-2023-05-31",
        "max_tokens": 300,
        "temperature": 0.7,
        "messages": [{"role": "user", "content": f"Answer in the same language as text. Provide the following analysis for the text:\n\n1) Short summary of the text in 10 or less words, with header, in 1 line, no carriage return 2) Call result: positive, neutral, or negative\n3) Call sentiment score from -5 to 5 (negative to positive).\n\nText:\n{text}"}]
    })
    # Send request to Amazon Bedrock
    response = bedrock_client.invoke_model(
        modelId='anthropic.claude-3-5-sonnet-20240620-v1:0',
        body=request_body
    )

    # Parse the response
    response_body = json.loads(response['body'].read())
    generated_text = response_body['content'][0]['text']
    print(f"generated_text: {generated_text}...")
    lines = [line.strip() for line in generated_text.split('\n') if line.strip()]
    
    summary = ""
    result = ""
    sentiment = ""
    for line in lines:
        if line.startswith("1)"):
            summary = line.split(":", 1)[-1].strip()
        elif line.startswith("2)"):
            result = line.split(":", 1)[-1].strip()
        elif line.startswith("3)"):
            sentiment = line.split(":", 1)[-1].strip()
    return {
        "Summary": summary,
        "Result": result,
        "Sentiment": sentiment
    }
    #return generated_text

In [81]:
def save_results_to_s3(bucket_name, file_name, results):
    s3_client = boto3.client('s3')
    csv_buffer = StringIO()
    csv_writer = csv.writer(csv_buffer, delimiter=';')

    # Write header
    csv_writer.writerow(["Summary", "Result", "Sentiment"])

    # Write results
    for result in results:
        csv_writer.writerow([result['Summary'], result['Result'], result['Sentiment']])

    # Upload CSV to S3
    s3_client.put_object(Bucket=bucket_name, Key=file_name, Body=csv_buffer.getvalue().encode("utf-8"))


In [91]:
s3_bucket = "abernads-cc-calls-rukz-transcribe"
audio_files = ["Collection RU.wav", "TLM RU.wav","Infoline RU.wav"]  # Update file names
model_id = "claude-sonnet-3"  # Bedrock Claude Sonnet 3 model ID
analysis_results_bucket = "abernads-cc-rukz-analysis-results"
results_file_name = "ru-calls-result.csv"

# Initialize Bedrock client
bedrock_client = boto3.client('bedrock-runtime')

In [92]:
all_results = []
for audio_file in audio_files:
    job_name = f"transcribe-{audio_file.split('.')[0]}-{int(time.time())}"
    job_name = job_name.replace(" ", "-").replace("--", "-").replace("_", "-")
    print(f"Starting transcription for {audio_file}...")

    output_file_uri = transcribe_audio(s3_bucket, audio_file, job_name)
    if not output_file_uri:
        continue

    print(f"Reading transcription for {job_name} from S3...")
    transcription_text = read_transcription_from_s3(s3_bucket, job_name)
    print(f"Transcription for {audio_file}: {transcription_text}\n")

    print(f"Analyzing text with Bedrock for {audio_file}...")
    analysis = analyze_text_with_bedrock(bedrock_client, model_id, transcription_text)
    print(analysis)
    #print(f"Analysis Results for {audio_file}:{analysis_results}")
    analysis_results = {
        "Summary": analysis.get("Summary", ""),
        "Result": analysis.get("Result", ""),
        "Sentiment": analysis.get("Sentiment", "")
    }
    print(f"Analysis Results for {audio_file}:\n{analysis_results}\n")
    all_results.append(analysis_results)
    print(f"Saving analysis results to S3 bucket {analysis_results_bucket}...")
    save_results_to_s3(analysis_results_bucket, results_file_name, all_results)
    print("Analysis results saved successfully!")

Starting transcription for Collection RU.wav...
Waiting for transcription to complete...
Waiting for transcription to complete...
Transcription completed: https://s3.us-west-2.amazonaws.com/abernads-cc-calls-rukz-transcribe/transcribe-Collection-RU-1735469975.json
Reading transcription for transcribe-Collection-RU-1735469975 from S3...
Transcription for Collection RU.wav: Сети. Здравствуйте. Здравствуйте. Я хотел узнать. Хочу сделать отручку на кредит. Добрый день. Меня золотая с слухом в кредитбанк выведомляю запись разговора. Для полного информации надо пройтификацию полностью кредит оформлен на вас. Да, да, оформи на восемнадцатого восемнадцатого июля взял три платежа сделать потом можно. номер телефона, который вы сейчас звоните. Это ваш номер тридцать восемь один. Спасибо за информацию на линии. Ожидайте проверю информацию. Угу. Спасибо за ожидание. Я проверила информацию. Один стандартный кредит и один карточный кредит по стандартному кредиту у вас имеется страховка перестраховку