# Generate Podcast Synopsis

## Speech To Text

In [5]:
import azure.cognitiveservices.speech as speechsdk
import time
import os
from dotenv import load_dotenv

text = []

# 파일에서 음성 인식 
def from_file(filename):
    speech_config = speechsdk.SpeechConfig(subscription=os.getenv("SPEECH_API_KEY"), region="koreacentral")
    speech_config.speech_recognition_language="en-US"

    audio_config = speechsdk.AudioConfig(filename=filename)
    speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)

    # 음성 인식 상태
    done = False
    
    # evt가 수신되면 연속 인식을 중지하는 콜백 생성 
    def stop_cb(evt):
        print('CLOSING on {}'.format(evt))
        # evt가 수신되면 인식을 중지하기 위해 stop_continuous_recognition 호출 
        speech_recognizer.stop_continuous_recognition()
        nonlocal done
        done = True

    def recognized(evt: speechsdk.SpeechRecognitionEventArgs):
        text.append(evt.result.text)
        print('RECOGNIZED: {}'.format(evt.result.text))

    # 최종 인식 결과가 포함된 이벤트에 대한 신호 
    speech_recognizer.recognized.connect(recognized)
    # 인식 세션의 시작을 나타내는 이벤트에 대한 신호 
    speech_recognizer.session_started.connect(lambda evt: print('SESSION STARTED: {}'.format(evt)))
    # 인식 세션의 끝을 나타내는 이벤트에 대한 신호 
    speech_recognizer.session_stopped.connect(lambda evt: print('SESSION STOPPED {}'.format(evt)))
    # 취소된 인식 결과가 포함된 이벤트에 대한 신호 
    speech_recognizer.canceled.connect(lambda evt: print('CANCELED {}'.format(evt)))

    speech_recognizer.session_stopped.connect(stop_cb)
    speech_recognizer.canceled.connect(stop_cb)

    # 연속 인식 작업 시작 
    result = speech_recognizer.start_continuous_recognition()

    while not done:
        time.sleep(.5)

from_file("../data/Why SVB’s collapse is not a 2008 repeat.wav")

SESSION STARTED: SessionEventArgs(session_id=13a649feb5084c0e97f8451073e23d09)
RECOGNIZED: So general question here, I mean seeing bank runs does that term, I mean like Yikes, should we be freaking out?
RECOGNIZED: Problem. You know you put me in a difficult situation, right? Because.
RECOGNIZED: There's a feedback loop here, right? If everyone else is freaking out, you should freak out.
RECOGNIZED: But it would be much better if everyone didn't freak out.
RECOGNIZED: But CFTC U.S. financial commentator Rob Armstrong, and he's been thinking about panic a lot recently. That's because he, like a lot of financial journalists, has been watching a story over the last few days where panic has played a central role.
RECOGNIZED: And that story I'm talking about is the collapse of Silicon Valley Bank.
RECOGNIZED: And as this news broke last week.
RECOGNIZED: The headlines have been dire. Tech lender Silicon Valley Bank collapsed on Friday, the biggest American bank to fail since the 2008 financ

In [None]:
# 리스트를 문자열로 변환
text = ' '.join(text)
print(text)

## Set Up Azure OpenAI

In [11]:
import os
import openai
from dotenv import load_dotenv

# Set up Azure OpenAI
openai.api_type = "azure"
openai.api_base = os.getenv("SOUTH_CENTRAL_US_OPENAI_API_BASE")
openai.api_version = "2022-12-01"
openai.api_key = os.getenv("SOUTH_CENTRAL_US_OPENAI_API_KEY")

## Deploy a Model

In [12]:
desired_model = 'text-davinci-003' # TEXT 생성에 적합한 모델 
desired_capability = 'completion'

# 배포 모델 목록 확인
deployment_id = None
result = openai.Deployment.list()

for deployment in result.data:
    if deployment["status"] != "succeeded":
        continue
    
    model = openai.Model.retrieve(deployment["model"])

    # desired_model과 desired_capability에 해당하는 모델이 있다면 deployment_id에 해당 배포의 ID를 할당
    if model["id"] == desired_model and model['capabilities'][desired_capability]:
        deployment_id = deployment["id"]
        
# deployment_id가 할당되지 않은 경우 새로운 배포를 진행 
if not deployment_id:
    print('No deployment with status: succeeded found.')

    # 모델 배포
    print(f'Creating a new deployment with model: {desired_model}')
    result = openai.Deployment.create(model=desired_model, scale_settings={"scale_type":"standard"})
    deployment_id = result["id"]
    print(f'Successfully created {desired_model} that supports text {desired_capability} with id: {deployment_id}.')
else:
    print(f'Found a succeeded deployment of "{desired_model}" that supports text {desired_capability} with id: {deployment_id}.')

Found a succeeded deployment of "text-davinci-003" that supports text completion with id: text-davinci-003.


## Text chunks generator

In [13]:
# A generator that split a text into smaller chunks of size n, preferably ending at the end of a sentence
def chunk_generator(text, n, tokenizer):
    tokens = tokenizer.encode(text)
    i = 0
    while i < len(tokens):
        # Find the nearest end of sentence within a range of 0.5 * n and 1.5 * n tokens
        j = min(i + int(1.5 * n), len(tokens))
        while j > i + int(0.5 * n):
            # Decode the tokens and check for full stop or newline
            chunk = tokenizer.decode(tokens[i:j])
            if chunk.endswith(".") or chunk.endswith("\n"):
                break
            j -= 1
        # If no end of sentence found, use n tokens as the chunk size
        if j == i + int(0.5 * n):
            j = min(i + n, len(tokens))
        yield tokens[i:j]
        i = j

## Request API

In [14]:
def request_api(document, prompt_postfix, max_tokens):
    prompt = prompt_postfix.replace('<document>',document)
    #print(f'>>> prompt : {prompt}')

    response = openai.Completion.create(  
    deployment_id=deployment_id, 
    prompt=prompt,
    temperature=0,
    max_tokens=max_tokens,
    top_p=1,
    frequency_penalty=1,
    presence_penalty=1,
    stop='###')

    return response['choices'][0]['text']

## Generate Synopsis

In [15]:
# Prompt postfix
prompt_postfix = """ <document>
  \n###
  \nSummarise the transcript of a podcast above into a synopsis. 
  \nSynopsis : 
"""
print(prompt_postfix)

 <document>
  
###
  
Summarise the transcript of a podcast above into a synopsis. 
  
Synopsis : 



In [16]:
import tiktoken

synopsis_chunck = []
n = 2000 # max tokens for chuncking
max_tokens = 1000 # max tokens for response

tokenizer = tiktoken.get_encoding('p50k_base')

# 청크 생성 
chunks = chunk_generator(text, n, tokenizer)

# 각각의 청크를 디코딩
text_chunks = [tokenizer.decode(chunk) for chunk in chunks]

# 청크마다 요약 문장을 생성 
for chunk in text_chunks:
    synopsis_chunck.append(request_api(chunk, prompt_postfix, max_tokens))
    #print(chunk)
    #print('>>> synopsis: \n' + synopsis_chunck[-1])

# Synopsis
synopsis = ' '.join(synopsis_chunck)
print(synopsis)

Rob Armstrong, a U.S. financial commentator for the Financial Times, discusses the recent collapse of Silicon Valley Bank and what it means for the rest of the banking system. He explains that two factors led to its failure: bad decisions at the bank and a rapid increase in interest rates which caused their cost of funding to rise faster than their profits from lending out deposits. The FDIC, Federal Reserve and Treasury have stepped in to ensure depositors get 100 cents on the dollar back but Rob cautions against panic as most banks are still solvent. He also explains why there is a two-tier regulatory system for banks - one set of rules for large banks and another set for smaller ones - and how this may have played into SVB's collapse. Finally, he advises entrepreneurs and investors to ask questions about their bank's capital structure before investing money with them. Rob, an expert on banking regulation, discusses the recent collapse of SVB and how it may affect the banking industr

## Translate Synopsis

In [17]:
# Prompt postfix
prompt_postfix = """ <document>
  \n###
  \nTranslate synopsis into Mandarin.  
  \nTranslation : 
"""
print(prompt_postfix)

 <document>
  
###
  
Translate synopsis into Mandarin.  
  
Translation : 



In [18]:
max_tokens = 1000
translation = request_api(synopsis, prompt_postfix, max_tokens)
print(translation)


美国财务时报的金融评论员罗布·阿姆斯特朗（Rob Armstrong）就硅谷银行的最近崩溃以及它对其他银行体系意味着什么进行了讨论。他解释说，导致其失败的两个因素是银行内部的不当决定和利率急剧上升，使得其成本比出借存款所带来的利润快速上升。 FDIC、 联邦储备委员会和财政部已采取步骤，以保证存款人能够得到100% 的回报，但 Rob 告诫大家不要惊慌失措，因为大多数银行仍然流动性强。 他还解释了为何会针对大型银行和小型银行分别制定一套法律法��� - 这可能是 SVB 崩盘的原因之一。 最后，他建议企业家和投��者在将金​​


美国金融评论家罗伯特·阿姆斯特朗（Robert Armstrong）在《金融时报》上就硅谷银行的崩溃进行了讨论，并且说明这不是2008年金融危机的复制。他解释说，导致SVB倒闭的原因有两方面：一是银行内部出现了不当决定; 二是利率急剧上升。此外，Rob 还概述了银行如何运作、SVB具体出现什么问题以及是否存在更大的体系性原因。此外，他还详细说明了为什么我们在监管方面存在双标准制度, 以及道德弗兰克法案(Dodd-Frank Act) 的退减对此情况会不会造成影响。最后,  Robert Armstrong 向听众保证, 只要人民不惊慌失措, 情况应当一切安好, 250k 美元之下的储户由美国政府承保.
Robert Armstrong 金融专家提出了 SVB 最新崩盘及其对相关监管带来的影响。 他表明由于 2008 年危机之后所施加的法律法规使得如今的银行已然比 2008 年时更加强壮。 此外， 也告诫投者将存款存入需要留意看看能耐性情况。 最后 ， Robert Armstrong 预测随之考勒将使得 bank equity capital 更加昂���耗时曲緩效应将随之考勒使效能受到影响.

## Generate Tag Lines

In [33]:
# Prompt postfix
prompt_postfix = """ <document>
  \n###
  \nGenerate 2 to 3 tag lines based on the podcast synopsis above. 
""" 
print(prompt_postfix)

 <document>
  
###
  
Generate 2 to 3 tag lines based on the podcast synopsis above. 



In [34]:
max_tokens = 500
tag_lines = request_api(synopsis, prompt_postfix, max_tokens)
print(tag_lines)


1. Get the facts on Silicon Valley Bank's collapse and what it means for banking. 
2. Don't Panic: Expert Advice on Banking Regulation After SVB's Collapse. 
3. Invest Wisely: Ask Questions About Your Bank's Capital Structure Before Investing Money With Them.


## Generate Search Engine Optimised (SEO) Keywords

In [22]:
# Prompt postfix
prompt_postfix = """ <document>
  \n###
  \nGenerate 5 search engine optimised keywords based on text above.  
"""
print(prompt_postfix)

 <document>
  
###
  
Generate 5 search engine optimised keywords based on text above.  



In [24]:
keywords_chunck = []
n = 2000 # max tokens for chuncking
max_tokens = 100

tokenizer = tiktoken.get_encoding('p50k_base')

# Generate chunkcs    
chunks = chunk_generator(text, n, tokenizer)

# Decode chunk of text
text_chunks = [tokenizer.decode(chunk) for chunk in chunks]

# Request api
for chunk in text_chunks:
    keywords_chunck.append(request_api(chunk, prompt_postfix, max_tokens))

# Keywords
keywords = ' '.join(keywords_chunck)
print(keywords)

1. Bank Runs
2. Silicon Valley Bank Collapse
3. Financial Crisis 2008
4. US Banking Regulation 
5. FDIC Resolution 1. Rob Smith Banking Regulation
2. Financial Crisis 2008
3. Bank Equity Capital 
4. Charlotte Casiraghi Podcast 
5. House of Chanel Leron Control


In [25]:
# Prompt postfix
prompt_postfix = """ <document>
  \n###
  \nReduce keywords above to 5 search engine optimised keywords. 
"""
print(prompt_postfix)

 <document>
  
###
  
Reduce keywords above to 5 search engine optimised keywords. 



In [26]:
max_tokens = 50
seo_keywords = request_api(keywords, prompt_postfix, max_tokens)
print(seo_keywords)


1. Bank Runs
2. Financial Crisis 2008
3. Banking Regulation 
4. FDIC Resolution 
5. Bank Equity Capital
