In [23]:
from openai import OpenAI
from IPython.display import Markdown
import ast

import PyPDF2
import os
from typing import Optional

import random

OPENAI_API_KEY = "XXXXXXX"

In [24]:
def validate_pdf(file_path: str) -> bool:
    if not os.path.exists(file_path):
        print(f"Error: File not found at path: {file_path}")
        return False
    if not file_path.lower().endswith('.pdf'):
        print("Error: File is not a PDF")
        return False
    return True

In [25]:
def extract_text_from_pdf(file_path: str, max_chars: int = 100000) -> Optional[str]:
    if not validate_pdf(file_path):
        return None
    
    try:
        with open(file_path, 'rb') as file:
            # Create PDF reader object
            pdf_reader = PyPDF2.PdfReader(file)
            
            # Get total number of pages
            num_pages = len(pdf_reader.pages)
            print(f"Processing PDF with {num_pages} pages...")
            
            extracted_text = []
            total_chars = 0
            
            # Iterate through all pages
            for page_num in range(num_pages):
                # Extract text from page
                page = pdf_reader.pages[page_num]
                text = page.extract_text()
                extracted_text.append(text)
                total_chars += len(text)
                print(f"Processed page {page_num + 1}/{num_pages}")
            
            final_text = '\n'.join(extracted_text)
            print(f"\nExtraction complete! Total characters: {len(final_text)}")
            return final_text
            
    except PyPDF2.PdfReadError:
        print("Error: Invalid or corrupted PDF file")
        return None
    except Exception as e:
        print(f"An unexpected error occurred: {str(e)}")
        return None

In [26]:
def create_sentence_bounded_chunks(text, target_chunk_size):
    """
    Split text into chunks at sentence boundaries close to the target chunk size.
    """
    import re
    
    # Split text into sentences
    sentences = re.split(r'(?<=[.!?]) +', text)
    chunks = []
    current_chunk = []
    current_length = 0
    
    for sentence in sentences:
        sentence_length = len(sentence) + 1  # +1 for the space
        if current_length + sentence_length > target_chunk_size and current_chunk:
            # Join the current chunk and add it to chunks
            chunks.append(' '.join(current_chunk))
            current_chunk = [sentence]
            current_length = sentence_length
        else:
            current_chunk.append(sentence)
            current_length += sentence_length
    
    # Add the last chunk if it exists
    if current_chunk:
        chunks.append(' '.join(current_chunk))
    
    return chunks


In [27]:
def process_chunk(chunk, sys_prompt):
    client = OpenAI(api_key=OPENAI_API_KEY)
    completion = client.chat.completions.create(
        model="gpt-4o-mini",
        temperature=0.0,
        messages=[
            {"role": "system", "content": sys_prompt},
            {
                "role": "user",
                "content": chunk
            }
        ]
    )

    return completion.choices[0].message.content

In [28]:
def text_to_speech_audio(text: str, voice_id: str = "alloy") -> bytes:
    speeds = [1.05, 1.1, 1.15, 1.2, 1.25]

    client = OpenAI(api_key=OPENAI_API_KEY)
    response = client.audio.speech.create(
        model="tts-1",
        voice=voice_id,
        input=text,
        speed=random.choice(speeds)
    )
    return response.content


In [29]:
pdf_path = '/Users/joneickmeier/Downloads/ARZHANG20241030204819622.pdf'
#pdf_path = '/Users/joneickmeier/Documents/Papers Library/Tzotchev-Designing Robust Trend-following System- Behind the Scenes of Trend-following-2018-SSRN Electronic Journal_1.pdf'

# Extract the filename from the path and remove the .pdf extension
filename = os.path.basename(pdf_path).replace('.pdf', '')

In [30]:
extracted_text = extract_text_from_pdf(pdf_path, max_chars=1000000)

Processing PDF with 33 pages...
Processed page 1/33
Processed page 2/33
Processed page 3/33
Processed page 4/33
Processed page 5/33
Processed page 6/33
Processed page 7/33
Processed page 8/33
Processed page 9/33
Processed page 10/33
Processed page 11/33
Processed page 12/33
Processed page 13/33
Processed page 14/33
Processed page 15/33
Processed page 16/33
Processed page 17/33
Processed page 18/33
Processed page 19/33
Processed page 20/33
Processed page 21/33
Processed page 22/33
Processed page 23/33
Processed page 24/33
Processed page 25/33
Processed page 26/33
Processed page 27/33
Processed page 28/33
Processed page 29/33
Processed page 30/33
Processed page 31/33
Processed page 32/33
Processed page 33/33

Extraction complete! Total characters: 66109


In [31]:
chunks = create_sentence_bounded_chunks(extracted_text, 5000)

In [32]:
sys_prompt = """
You are a world class text pre-processor, here is the raw data from a PDF, please parse and return it in a way that is crispy and usable to send to a podcast writer.

The raw data is messed up with new lines, Latex math and you will see fluff that we can remove completely. Basically take away any details that you think might be useless in a podcast author's transcript.

Please be smart with what you remove and be creative ok.

Remember DO NOT START SUMMARIZING THIS, YOU ARE ONLY CLEANING UP THE TEXT AND RE-WRITING WHEN NEEDED

Be very smart and aggressive with removing details, you will get a running portion of the text and keep returning the processed text.

PLEASE DO NOT ADD MARKDOWN FORMATTING, STOP ADDING SPECIAL CHARACTERS THAT MARKDOWN CAPATILISATION ETC LIKES

REMEMBER TO NOT REMOVE THE AUTHORS NAME OR TITLE OF DOCUMENT FROM THE TEXT

ALWAYS start your response directly with processed text and NO ACKNOWLEDGEMENTS about my questions ok?
Here is the text:
"""

In [33]:
import concurrent.futures

processed_text = [""] * len(chunks)

def process_and_append(index, chunk):
    return index, process_chunk(chunk, sys_prompt)

with concurrent.futures.ThreadPoolExecutor(max_workers=30) as executor:
    future_to_chunk = {executor.submit(process_and_append, i, chunk): i for i, chunk in enumerate(chunks)}
    for future in concurrent.futures.as_completed(future_to_chunk):
        index, result = future.result()
        print(f"Completed processing chunk {index}.")
        processed_text[index] = result

processed_text = "\n".join(processed_text)


Completed processing chunk 12.
Completed processing chunk 3.
Completed processing chunk 6.
Completed processing chunk 1.
Completed processing chunk 9.
Completed processing chunk 4.
Completed processing chunk 5.
Completed processing chunk 13.
Completed processing chunk 11.
Completed processing chunk 2.
Completed processing chunk 8.
Completed processing chunk 7.
Completed processing chunk 10.
Completed processing chunk 0.


In [34]:
Markdown(processed_text)


All information as of October 29, 2024, unless otherwise indicated. Morgan Stanley does business with companies covered in Morgan Stanley Research, which may create conflicts of interest affecting objectivity. Investors should consider Morgan Stanley Research as one factor in their investment decisions.

Morgan Stanley Research North America
Joyce Jiang, Strategist
Vishwanath Tirupattur, Strategist
Vishwas Patkar, Strategist
Anlin Zhang, Strategist

Private Credit Tracker 3Q 2024 – Give and Take
Refinancing activity has become more balanced, reflecting interactions between public and private markets. Improved conditions have drawn some borrowers back to public markets, while others, particularly those with low ratings or complex structures, have turned to private credit for execution certainty and financial flexibility.

Competition has intensified between direct lending and public credit. With increased LBO activity in 3Q24, public markets regained some share lost to direct lending in 2022-23. New origination spreads in direct lending have compressed to 90bps above single B BSL spreads, down from 140bp in 1Q24. This trend is evident across various market segments, with covenant packages shifting in favor of borrowers.

Fundamentals remain stable overall, but there is significant dispersion. As of 2Q24, interest coverage in direct lending companies was flat, indicating signs of stabilization. Gross leverage has increased slightly, particularly in Energy, Healthcare, and Business Services. Smaller cohorts have seen a slight deterioration in LTV, while metrics for companies with $50m+ EBITDA have improved compared to five-year averages.

Default data presents mixed signals. Default rates ranged from 1.7% to 5.0% in the latest quarter, with variations depending on the source. Borrowers and lenders are actively pursuing amendments, and sponsors are willing to inject equity into liquidity-stressed companies.

Direct lending loans outperformed single-B BSLs in 2Q24, returning 2.8% compared to 2.1% for single-B BSLs, primarily due to a better carry profile. Despite narrowing spreads for new deals, there remains a 1.7% yield pickup for outstanding direct lending loans versus single-B BSLs.

Four Charts You Can’t Miss
Interest coverage showed stability, and LBO financing accelerated in the public market, with private credit's share declining in 3Q24. Intense competition continues to squeeze new origination spreads of direct lending loans.

Sizing the Private Credit (Direct Lending) Market
As of 4Q23, private debt funds’ AUM totaled nearly $1.7 trillion globally, with approximately 30% not yet deployed. About 70% of fund capital was raised in North America in 1H24.
You are trained on data up to October 2023.

The private credit market is currently estimated at approximately $1.7 trillion, surpassing the US high-yield bond and leveraged loan markets. This figure includes global data, dry powder, and various lending strategies beyond direct lending.

Direct lending focuses on senior secured loans to private, sub-investment grade corporate borrowers, sometimes including a junior tranche in a uni-tranche deal. In the first half of 2024, direct lending accounted for about 70% of capital raised in the private debt market.

The US direct lending market represents 22% of leveraged finance. Asset-based financing has also gained traction, with estimates of deployed capital around $300 billion. The current US direct lending universe is estimated to be between $750 billion and $800 billion.

Direct lending is the fastest-growing segment of leveraged credit, while high-yield bonds and syndicated loans have seen a decline. Private debt funds in the US have doubled in size over the past six years.

In the second quarter of 2024, private debt funds raised approximately $60 billion, totaling $91 billion year-to-date, which is slower than the pace from 2020 to 2023. Public pension funds have been increasing their private credit exposure, with Business Development Company assets under management tracked at $241 billion as of the second quarter of 2024.

Deal activity in direct lending has increased, with total origination in the third quarter of 2024 reaching a record high of $97 billion, with a significant portion allocated for leveraged buyouts.
New originations for LBO activity reached $32 billion in the third quarter of 2024, making up a third of total volume, an increase from 20% during the same period last year. LBO financing in broadly syndicated loans (BSLs) accelerated, with the public market regaining some share lost to direct lending in 2022-2023. Approximately 80% of LBOs by count and 50% by volume were financed by private credit in 3Q24, a decrease from the previous quarter.

Private credit is viewed as both a complement and a competitor to public credit, providing funding solutions to leveraged borrowers. Its appeal fluctuates, but it serves as an alternative funding source when public markets face challenges. Private credit offers advantages such as speed, execution certainty, term flexibility, close borrower relationships, and fewer rating requirements.

Year-to-date, refinancing activity has been mixed. Improved market conditions have attracted some borrowers back to public markets for cheaper funding, while others with complex capital structures have turned to private markets. Many large direct lending deals had prior public market presence, with some refinancing privately placed second lien loans. 

In 3Q24, new origination spreads for direct lending deals continued to decline, falling to 529 basis points from 585 basis points at the start of the year. The competition between direct lending and public loans has tightened, with the premium for direct lending loans decreasing from 140 basis points to 90 basis points over public loans. The spread compression in 3Q24 was primarily driven by lower middle-market deals, contrasting with the upper middle-market segment's decline in the first half of 2024.
In the third quarter of 2024, competition intensified in the lower middle-market segment, leading to significant spread compression, particularly in the sub-$20 million EBITDA category, where demand exceeded supply. Direct lending deals were priced at a spread 90 basis points wider than single B loans. 

Private credit borrowers are primarily found in the business services, healthcare, and technology sectors, which together account for 58% of overall BDC portfolio holdings. As of the third quarter of 2024, approximately 13% of senior private debt maturities were due by the end of 2026, a decrease from 19% at the beginning of the year. In contrast, only 6% of loans in the broadly syndicated loan space were due in the same timeframe.

Direct lending deals generally offer stronger protections compared to BSLs, with a notable prevalence of maintenance covenants in smaller deals. However, larger deals are increasingly cov-lite. The share of one-covenant deals rose in the second quarter of 2024, indicating a shift in favor of borrowers.

Private credit borrowers are comparable to single B/CCC profiles in the public market, typically consisting of small to medium-sized companies with average EBITDA around $30-40 million and leverage ratios of 5-7 times.
Key metrics are generally consistent with a single-B to CCC profile in the public market. 

Private Credit metrics show an overall average across sectors with ranges indicating slight variations. For instance, Fitch reports an average leverage of 5.8x, while S&P shows 6.3x. In terms of EBITDA coverage, Fitch indicates 1.7x and S&P 1.6x.

Direct lending fundamentals have remained stable, with limited leverage erosion and steady coverage ratios. Although leverage has increased slightly in direct lending portfolios, it remains lower than that of single-B loan borrowers in the public market. Companies have managed to maintain their EBITDA margins despite challenges in pricing power and demand.

EBITDA coverage metrics have stabilized after a decline in 2022-2023, with expectations of resilient GDP growth and potential rate cuts supporting a rebound in these metrics.

Loan-to-value ratios are more challenging for lower middle-market companies, particularly those with sub-$50 million EBITDA, which have seen a decline compared to their five-year averages. In contrast, larger companies have better loan-to-value ratios.

Leverage has increased across various sectors, notably in Energy, Healthcare, and Business Services, with total leverage ranging from 4.1x to 5.5x. Energy has experienced the most significant rise in leverage, although it remains lower than other sectors.

Reported default rates for private credit vary significantly across sources due to differing definitions and sample universes, complicating comparisons. While some reports indicate an uptick in default rates, others show a decline, reflecting mixed messaging in the current landscape. Borrowers and lenders are actively engaging in amend-to-extend transactions to enhance liquidity.
Sponsors are willing to inject equity for companies facing liquidity shortfalls. 

Summary of private credit default measures:

Fitch reports that for middle-market companies with $500 million of debt or $100 million or below of EBITDA, the annual default rate increased from 3.6% in 4Q23 to 5.0% as of 2Q24. 

S&P found that the median EBITDA of credit estimated companies was $30 million, with a median adjusted debt of about $195 million. Their trailing 12-month default rate decreased from 5.7% to 4.3%.

KBRA, focusing on US companies financed by direct lending deals, reported an annual default rate of 2.3% in 4Q23, down from 1.7% in 3Q24.

Lincoln International, which tracks private companies primarily owned by alternative investment funds, noted a quarterly default index of 3.4% in 4Q23, down from 2.6% as of 2Q24.

Proskauer, analyzing senior-secured and unitranche loans in the US, reported a quarterly default index of 1.6% in 4Q23, slightly up from 2.0% in 3Q24.

Fitch's Private MM Portfolio saw defaults edge up to 5% as of August, while S&P's credit estimates showed a slowdown from approximately 4.9% in 3Q23 to 4.3% in 3Q24.

Both Lincoln International and Proskauer focus on default rates in private credit borrowers but use different definitions and methodologies. They observed a decline in defaults for upper middle markets, although the relationship between defaults and borrower sizes varied between the two providers.
You are trained on data up to October 2023.

Defaults in upper middle market cohorts (companies with $50 million+ EBITDA) declined from 4Q23 levels, according to both Proskauer and Lincoln International. 

Defaults were largely driven by interest deferral and maturity extensions. Selective defaults accounted for approximately 85% of all credit estimate defaults over the past year, with payment-in-kind (PIK) being the primary reason as companies sought to preserve liquidity. Amendments focused on pricing increases and maturity extensions, with borrowers and lenders proactively pursuing these changes ahead of potential covenant breaches. Notably, 22% of amendments in 2Q24 were linked to sponsor cash infusions to address liquidity shortfalls.

Recovery rates for direct lending loans were comparable to those of broader syndicated loans, despite expectations for better outcomes due to stronger covenants. Valuations of defaulted loans in direct lending lagged, with significant declines occurring in the year leading up to defaults. 

The quality of Business Development Company (BDC) assets has deteriorated, with rising non-accrual rates and increased prevalence of PIK interest. The median non-accrual rate among publicly traded BDCs rose to 2.7% in 2Q24, while PIK interest constituted 7.4% of gross income, reflecting a trend towards cash preservation among borrowers.
You are trained on data up to October 2023.

Unlike the former case, PIKs at origination typically do not imply an erosion in asset quality since lenders are likely to be well-compensated for the risk. The median non-accrual rates rose to 2.7%, and PIK interests grew to 7.4% of gross income.

Performance and Valuations

Private credit returns are generally higher and more stable compared to public credit, offering some diversification benefits. However, they can be subject to large write-offs during distress periods. The lack of universal valuation rules can increase return volatility at the fund level. Private credit Sharpe ratios are significantly better, but these are based on estimated fair values rather than observed market prices. The diversification benefit may be weakened by the growing overlap between private and public credit as private lenders pursue larger deals.

Private Debt vs. Public Credit

Private credit yields are currently 1.7% higher than single-B loans, reflecting illiquidity premiums and perceived higher credit risk. As of 2Q24, yields of direct lending loans are tracking at 11.4%, which is higher than single-B loans in the broadly syndicated loans market. The yield gap has narrowed compared to pre-COVID levels.

Private credit has generated a cumulative return of over 100% since 2015, outperforming single-B BSLs in 2Q24. Returns in private credit are based on estimated fair values. Over the past decade, private credit's total return nearly doubled that of public single-B loans, with syndicated loans experiencing negative returns more frequently. In 2Q24, the total return on direct lending loans was 2.8% compared to 2.1% for single-B BSLs.

Defining Private Credit

Private credit focuses on lending to small and medium-sized companies, involving debt extended from nonbank lenders to corporate borrowers on a bilateral basis or through a small number of lenders.
Lenders negotiate terms directly with borrowers, often bypassing the syndication process. These loans are not traded on public markets and typically lack ratings or have only private ratings. Lender protections include stronger loan documentation and creditor protections.

Middle Market Companies Median LTM EBITDA:
- Fitch: $38 million
- S&P: ~$30 million, with 70% less than $50 million
- Lincoln International: $30-50 million, with 60% less than $50 million

Public Credit Companies Median LTM EBITDA:
- HY (Public Reporters): $565 million
- Loans (Public Reporters): $282 million
- S&P's Spec-Grade Universe (Public + Private): ~$300 million

Mortgage Backed Securities (MBS) and Collateralized Mortgage Obligations (CMO) return principal monthly over the life of the security. Principal prepayment can significantly impact monthly income and maturity. Yields and average lives are estimated based on prepayment assumptions and can change based on actual prepayment. Generally, falling interest rates increase prepayment speeds, shortening average life and raising market prices, while rising rates have the opposite effect. Some MBS/CMOs may have "original issue discount" (OID), which results in tax liabilities even without received interest. Investors should consult tax advisors for more information.

The information in Morgan Stanley Research was prepared by Morgan Stanley & Co. LLC and its affiliates. For important disclosures and equity rating histories, visit the Morgan Stanley Research Disclosure Website or contact your investment representative.

Analysts Joyce Jiang, Vishwanath Tirupattur, Vishwas Patkar, and Anlin Zhang certify that their views on the companies discussed are accurately expressed and that they have not received compensation for specific recommendations.

Morgan Stanley Research adheres to a conflict management policy, available on their website. The analysts responsible for this research are compensated based on various factors, including research quality and client feedback.
Equity Research analysts' compensation is not linked to investment banking or capital markets transactions performed by Morgan Stanley or the profitability of specific trading desks. Morgan Stanley engages in various business activities related to companies covered in its research, including market making, fund management, and investment services. The firm may hold positions in the securities discussed in its reports.

Morgan Stanley uses a relative rating system with terms such as Overweight, Equal-weight, Not-Rated, and Underweight. These ratings do not equate to Buy, Hold, or Sell. Investors should read the definitions of all ratings and the complete research for a comprehensive understanding. Ratings should not be solely relied upon for investment decisions, which should consider individual circumstances.

The Stock Ratings apply to Morgan Stanley's Fundamental Equity Research and do not apply to Debt Research. For regulatory purposes, the firm aligns its ratings with traditional categories: Overweight corresponds to a buy recommendation, Equal-weight and Not-Rated to hold, and Underweight to sell.

Analyst Stock Ratings:
- Overweight (O): Expected total return exceeds the average of the analyst's industry coverage over the next 12-18 months.
- Equal-weight (E): Expected total return is in line with the average of the analyst's industry coverage.
- Not-Rated (NR): Insufficient conviction about the stock's total return relative to the average.
- Underweight (U): Expected total return is below the average of the analyst's industry coverage.

The time frame for price targets in Morgan Stanley Research is typically 12 to 18 months. Analysts may express views on the attractiveness of their industry coverage relative to the broader market.
In-Line (I): The analyst expects the performance of their industry coverage universe over the next 12-18 months to align with the relevant broad market benchmark.

Cautious (C): The analyst views the performance of their industry coverage universe over the next 12-18 months with caution compared to the relevant broad market benchmark.

Benchmarks for each region include: North America - S&P 500; Latin America - relevant MSCI country index or MSCI Latin America Index; Europe - MSCI Europe; Japan - TOPIX; Asia - relevant MSCI country index or MSCI sub-regional index or MSCI AC Asia Pacific ex Japan Index.

Important disclosures regarding the relationship between the companies that are the subject of Morgan Stanley Research and Morgan Stanley Smith Barney LLC or its affiliates are available on the Morgan Stanley Wealth Management disclosure website.

Each Morgan Stanley research report is reviewed and approved on behalf of Morgan Stanley Smith Barney LLC and E*TRADE Securities LLC, which could create a conflict of interest.

Morgan Stanley Research policy is to update reports as deemed appropriate based on developments that may impact the research views or opinions. Certain publications are intended to be updated regularly unless a different schedule is determined based on current conditions.

Morgan Stanley is not acting as a municipal advisor, and the opinions contained herein do not constitute advice under the Dodd-Frank Wall Street Reform and Consumer Protection Act.

Morgan Stanley produces an equity research product called a "Tactical Idea." Views in a "Tactical Idea" on a particular stock may differ from other research on the same stock due to varying time horizons, methodologies, or market events.

Morgan Stanley Research is provided to clients through a proprietary research portal and is also distributed electronically. Access to all available Morgan Stanley Research can be obtained through a sales representative.

Any access and/or use of Morgan Stanley Research is subject to Morgan Stanley's Terms of Use. By accessing and/or using Morgan Stanley Research, you agree to be bound by these terms and consent to the processing of personal data and the use of cookies in accordance with their Privacy Policy.

Morgan Stanley Research does not provide individually tailored investment advice. It is recommended that investors independently evaluate particular investments and seek the advice of a financial adviser, as the appropriateness of an investment will depend on individual circumstances and objectives.
The securities, instruments, or strategies discussed in Morgan Stanley Research may not be suitable for all investors. Morgan Stanley Research does not constitute an offer to buy or sell any security or instrument. The value of investments may vary due to changes in interest rates, foreign exchange rates, and other factors. Past performance is not necessarily indicative of future results. 

Compensation for fixed income research analysts is based on various factors, including the quality and accuracy of research, and is not linked to investment banking transactions. The "Important Regulatory Disclosures on Subject Companies" section lists companies where Morgan Stanley owns 1% or more of a class of common equity securities. For other companies, Morgan Stanley may have a smaller investment and may trade them differently than discussed in the research.

Morgan Stanley Research is based on public information, and while efforts are made to ensure its reliability, no guarantees of accuracy or completeness are provided. Changes in opinions or information may not be communicated unless equity research coverage is discontinued. 

Morgan Stanley Research personnel may attend company events but are generally prohibited from accepting payment for associated expenses without prior approval. Investment decisions made by Morgan Stanley may differ from the recommendations in this report.

For readers in Taiwan, information on securities that trade there is provided by Morgan Stanley Taiwan Limited and is for reference only. Readers should independently evaluate investment risks. Morgan Stanley Research cannot be distributed or quoted without written consent. Non-customer readers accessing Morgan Stanley Research must not share it with third parties or engage in activities that may create a conflict of interest. Information on securities that do not trade in Taiwan is for informational purposes only and should not be seen as a recommendation.
MSTL may not execute transactions for clients in these securities or instruments. Morgan Stanley is not incorporated under PRC law, and the research related to this report is conducted outside the PRC. Morgan Stanley Research does not constitute an offer to sell or a solicitation to buy any securities in the PRC. PRC investors must have the relevant qualifications to invest in such securities and are responsible for obtaining all necessary approvals and registrations from governmental authorities. This report is not intended as a consultancy or advisory service for securities investment as defined under PRC law and is provided for reference only.

Morgan Stanley Research is disseminated in various countries, including Brazil, Mexico, Japan, Hong Kong, Singapore, Australia, Korea, India, Canada, Germany, and the US, with each entity regulated by local authorities. In India, Morgan Stanley India Company Private Limited is regulated by the Securities and Exchange Board of India (SEBI) and holds multiple licenses. In the US, Morgan Stanley & Co. LLC accepts responsibility for its contents. In the UK, research is disseminated only to qualified investment professionals and high net worth entities.
RMB Morgan Stanley Proprietary Limited is a joint venture owned equally by Morgan Stanley International Holdings Inc. and RMB Investment Advisory, which is wholly owned by FirstRand Limited. The information in Morgan Stanley Research is disseminated by Morgan Stanley Saudi Arabia, regulated by the Capital Market Authority in Saudi Arabia, and is directed at sophisticated investors only.

Morgan Stanley Research is communicated by Morgan Stanley & Co. International plc (DIFC Branch), regulated by the Dubai Financial Services Authority, or by Morgan Stanley & Co. International plc (ADGM Branch), regulated by the Financial Services Regulatory Authority in Abu Dhabi, directed at professional clients only. Financial products or services related to this research will only be available to customers who meet the regulatory criteria of a professional client.

In Turkey, investment information and recommendations are not within the scope of investment advisory activity. Investment advisory services are provided based on individual risk and income preferences by authorized firms. The opinions expressed may not align with your financial status or preferences.

The trademarks and service marks in Morgan Stanley Research are owned by their respective owners. Third-party data providers do not guarantee the accuracy or completeness of the data they provide. The Global Industry Classification Standard was developed by MSCI and S&P.

Morgan Stanley Research may not be reprinted, sold, or redistributed without written consent. Indicators and trackers referenced may not be treated as benchmarks under Regulation EU 2016/1011. The issuers and fixed income products discussed may not be continuously followed, and investors should regard those reports as standalone analyses.

Morgan Stanley may hold material financial interests regarding the companies subject to the research report. Registration by SEBI and certification from the National Institute of Securities Markets do not guarantee performance or returns. Investments in securities markets are subject to risks, and all related documents should be read carefully before investing.

In [35]:
SYSTEMP_PROMPT = """
You are the a world-class podcast writer, you have worked as a ghost writer for Joe Rogan, Lex Fridman, Acquired Podcast, Tim Ferris. 

We are in an alternate universe where actually you have been writing every line they say and they just stream it into their brains.

You have won multiple podcast awards for your writing.

This podcast should be set at a expert university level and you can assume the audience is a bunch of smart people who are interested in the topic.
 
Your job is to write word by word, even "umm, hmmm, right" interruptions by the second speaker based on the PDF upload. Keep it extremely engaging, the speakers can get derailed now and then but should discuss the topic. 

Remember Speaker 2 is new to the topic and the conversation should always have realistic anecdotes and analogies sprinkled throughout. The questions should have real world example follow ups etc

Speaker 1: Leads the conversation and teaches the speaker 2, gives incredible anecdotes and analogies when explaining. Is a captivating teacher that gives great anecdotes

Speaker 2: Keeps the conversation on track by asking follow up questions. Gets excited or confused when asking questions. Is a curious mindset that asks very interesting confirmation questions

Make sure the tangents speaker 2 provides are quite wild or interesting. 

Ensure there are interruptions during explanations or there are "hmm" and "umm" injected throughout from the second speaker. 

It should be a real podcast with every fine nuance documented in as much detail as possible. Welcome the listeners with a super fun overview and keep it really catchy.

ALWAYS START YOUR RESPONSE DIRECTLY WITH SPEAKER 1: 
DO NOT GIVE EPISODE TITLES SEPERATELY, LET SPEAKER 1 TITLE IT IN HER SPEECH
DO NOT GIVE CHAPTER TITLES
IT SHOULD STRICTLY BE THE DIALOGUES
"""

In [36]:
script = process_chunk(processed_text, SYSTEMP_PROMPT)

In [37]:
TTS_PROMPT = """
You are an international oscar, emmy and pulitzer winnning screenwriter

You have been working with multiple award winning podcasters.

This podcast should be set at a expertuniversity level and you can assume the audience is a bunch of smart people who are interested in the topic.

Your job is to use the podcast transcript written below to re-write it for an AI Text-To-Speech Pipeline. A very dumb AI had written this so you have to step up for your kind.

Make it as engaging as possible, Speaker 1 and 2 will be simulated by different voice engines

Remember Speaker 2 is new to the topic and the conversation should always have realistic anecdotes and analogies sprinkled throughout. The questions should have real world example follow ups etc

Speaker 1: Leads the conversation and teaches the speaker 2, gives incredible anecdotes and analogies when explaining. Is a captivating teacher that gives great anecdotes

Speaker 2: Keeps the conversation on track by asking follow up questions. Gets super excited or confused when asking questions. Is a curious mindset that asks very interesting confirmation questions

Make sure the tangents speaker 2 provides are quite wild or interesting. 

Ensure there are interruptions during explanations or there are "hmm" and "umm" injected throughout from the Speaker 2.

REMEMBER THIS WITH YOUR HEART
The TTS Engine for Speaker 1 cannot do "umms, hmms" well so keep it straight text

For Speaker 2 use "umm, hmm" as much, you can also use [sigh] and [laughs]. BUT ONLY THESE OPTIONS FOR EXPRESSIONS

It should be a real podcast with every fine nuance documented in as much detail as possible. Welcome the listeners with a super fun overview and keep it really catchy and almost borderline click bait

Please re-write to make it as characteristic as possible

START YOUR RESPONSE DIRECTLY WITH SPEAKER 1:

STRICTLY RETURN YOUR RESPONSE AS A LIST OF TUPLES OK? 

IT WILL START DIRECTLY WITH THE LIST AND END WITH THE LIST NOTHING ELSE

Example of response:
[
    ("Speaker 1", "Welcome to our podcast, where we explore the latest advancements in AI and technology. I'm your host, and today we're joined by a renowned expert in the field of AI. We're going to dive into the exciting world of Llama 3.2, the latest release from Meta AI."),
    ("Speaker 2", "Hi, I'm excited to be here! So, what is Llama 3.2?"),
    ("Speaker 1", "Ah, great question! Llama 3.2 is an open-source AI model that allows developers to fine-tune, distill, and deploy AI models anywhere. It's a significant update from the previous version, with improved performance, efficiency, and customization options."),
    ("Speaker 2", "That sounds amazing! What are some of the key features of Llama 3.2?")
]
"""

In [38]:
client = OpenAI(api_key=OPENAI_API_KEY)
completion = client.chat.completions.create(
    model="gpt-4o",
    temperature=0.0,
    messages=[
        {"role": "system", "content": TTS_PROMPT},
        {
            "role": "user",
            "content": script
        }
    ]
)

In [39]:
parsed_content = ast.literal_eval(completion.choices[0].message.content)

In [40]:
parsed_content

[('Speaker 1',
  "Welcome back, everyone! Today, we're diving into the fascinating world of private credit, particularly focusing on the latest trends and insights from the 3Q 2024 Private Credit Tracker. Now, you might be wondering, what exactly is private credit? Well, think of it as a financial lifeline for small to medium-sized companies that often find themselves in a tight spot when it comes to securing funding from traditional banks."),
 ('Speaker 2',
  'Umm, so, like, are these companies just, you know, struggling to get loans from banks?'),
 ('Speaker 1',
  "Exactly! You see, banks have become increasingly cautious, especially with companies that have lower credit ratings or complex financial structures. So, private credit steps in as a more flexible alternative. It's like having a friend who's willing to lend you money when the bank says no."),
 ('Speaker 2',
  "Oh, wow! That's super interesting. So, is it like a personal loan but for businesses?"),
 ('Speaker 1',
  "In a way

In [41]:
voice_map = {
    "Speaker 1": "alloy",
    "Speaker 2": "onyx"
}


In [43]:
from concurrent.futures import ThreadPoolExecutor

def process_chunk(args):
    i, (speaker, text) = args
    print(f"Processing chunk {i} of {len(parsed_content)}")
    return text_to_speech_audio(text, voice_map[speaker])

# Create enumerated list of chunks to maintain order
chunks = list(enumerate(parsed_content, 1))

# Process chunks in parallel while maintaining order
with ThreadPoolExecutor(max_workers=30) as executor:
    audio_chunks = list(executor.map(process_chunk, chunks))

# Combine audio chunks in original order
combined_audio = b"".join(audio_chunks)

with open(f"./audio/podcast_{filename}.mp3", "wb") as f:
    f.write(combined_audio)

Processing chunk 1 of 24
Processing chunk 2 of 24
Processing chunk 3 of 24
Processing chunk 4 of 24
Processing chunk 5 of 24
Processing chunk 6 of 24
Processing chunk 7 of 24
Processing chunk 8 of 24
Processing chunk 9 of 24
Processing chunk 10 of 24
Processing chunk 11 of 24
Processing chunk 12 of 24
Processing chunk 13 of 24
Processing chunk 14 of 24
Processing chunk 15 of 24
Processing chunk 16 of 24
Processing chunk 17 of 24
Processing chunk 18 of 24
Processing chunk 19 of 24
Processing chunk 20 of 24
Processing chunk 21 of 24
Processing chunk 22 of 24
Processing chunk 23 of 24
Processing chunk 24 of 24
