In [None]:
import requests
import os
import base64
from IPython.display import Audio, display
import ast

# pip install openai==0.27.0
import openai

openai.api_key = "key"

# Transcript Cleaning

In [1]:
# Let's read the uploaded file to understand its content and structure.
file_path = 'GENSUP04-P1.vtt'

with open(file_path, 'r', encoding='utf-8') as file:
    content = file.readlines()

content[:20]  # Display the first 20 lines to get an idea of the structure.


['WEBVTT\n',
 '\n',
 '1\n',
 '00:00:02.300 --> 00:00:08.590 \n',
 'Hello, my name is Tomas, I am flight test\n',
 'instrumentation engineer and instructor\n',
 '\n',
 '2\n',
 '00:00:08.600 --> 00:00:10.210 \n',
 'of ITPS.\n',
 '\n',
 '3\n',
 '00:00:11.280 --> 00:00:13.490 \n',
 'Today I am going to talk about\n',
 '\n',
 '4\n',
 '00:00:13.500 --> 00:00:16.510 \n',
 'introduction of flight test instrumentation.\n',
 '\n',
 '5\n']

In [117]:
# Function to clean and group VTT file content into paragraphs
def clean_and_group_vtt(file_lines):
    paragraphs = []
    current_paragraph = []

    for line in file_lines:
        line = line.strip()
        if line.isdigit() or "-->" in line:
            # Skip number lines and timestamp lines
            continue
        if not line:
            # If there's a blank line, start a new paragraph
            if current_paragraph:
                paragraphs.append(" ".join(current_paragraph))
                current_paragraph = []
        else:
            # Add non-blank lines to the current paragraph
            current_paragraph.append(line)

    # Add the last paragraph if there's remaining content
    if current_paragraph:
        paragraphs.append(" ".join(current_paragraph))

    return paragraphs

# Clean and group the content
paragraphs = clean_and_group_vtt(content)

# Exclude the "WEBVTT" line and ensure only meaningful paragraphs are included
paragraphs = [para for para in paragraphs if para.lower() != "webvtt"]


paragraphs[:5]


['Hello, my name is Tomas, I am flight test instrumentation engineer and instructor',
 'of ITPS.',
 'Today I am going to talk about',
 'introduction of flight test instrumentation.',
 'So the main objectives of this lecture']

# LLM Slide Text Grouping

In [138]:
system_slide_grouping = "You are a highly skilled text processor and presentation designer. Your task is to analyze and group large text inputs into logical, speech-friendly sections for presentation slides and text-to-speech conversion. You excel at handling long documents efficiently."

context_slide_grouping = f"""
The task is to group a large input script into logical sections suitable for text-to-speech transcripts and presentation. The input script contains over 500 lines, and it is crucial to process the entire input without missing any details.

--------
Your Task:
1. Analyze and segment the input text:
    - Break the script into 30–50 logical slides.
    - Each slide should represent a single idea, subtopic, or group of closely related ideas that can be naturally spoken in **1–2 minutes**.
    - Summarize or paraphrase where appropriate, but do NOT omit critical details. Use transitions or additional context as needed for smoother delivery.

2. Optimize for text-to-speech tools:
    - Append **....** at logical pauses to guide pacing.
    - Use **CAPITAL LETTERS** to emphasize important words or phrases.

3. Maintain clarity and flow:
    - Ensure the text on each slide is **speech-friendly**, clear, and concise.
    - Write 2–8 sentences per slide, depending on the complexity of the content.
    - Keep the overall length under an hour while preserving all details.

4. Logical grouping and progression:
    - Begin with an introduction, followed by main sections, subtopics, and a conclusion.
    - Match the flow to common presentation structures: introduction, key ideas, examples, technical details, and summary.

5. Strict slide limit:
    - Ensure the total output is no more than 50 slides.
    - Avoid overly long slides by splitting dense content into multiple parts if needed.

--------
Return the processed content as a dictionary with the following format:
    "slide 1": "Introduction: content with pauses and emphasis here ....",
    "slide 2": "Main topic or subtopic: content with pauses and emphasis here ....",
    "slide 3": "Sub-details or examples: content with pauses and emphasis here ....",
    ...
    "slide 50": "Conclusion: content with pauses and emphasis here ...."
--------
Here is the input text to process:
{paragraphs}

"""


responses_slide_grouping = openai.ChatCompletion.create(
  model="gpt-4-turbo",  #gpt-4-turbo (expensive), gpt-3.5-turbo (cheap)
  messages=[
      {"role": "system", "content": system_slide_grouping},
      {"role": "user", "content": context_slide_grouping}
  ],
  temperature=0.1,
)

slide_content = responses_slide_grouping.choices[0].message.content
print(slide_content)


```python
{
    "slide 1": "Introduction: Hello, my name is Tomas, I am a flight test instrumentation engineer and instructor at ITPS. Today, I will introduce the topic of FLIGHT TEST INSTRUMENTATION ....",
    "slide 2": "Objectives: The main objectives of this lecture are to provide an overview of Flight Test Instrumentation (FTI) as an airborne system, summarize its functions, and discuss its components like signal sources, transducers, and data acquisition systems ....",
    "slide 3": "FTI Motivation: Why do we use instrumentation in prototype aircraft? Imagine the early days of aviation, where engineers and pilots had limited means to gather flight data. The need for detailed and reliable data is what drives the use of FTI ....",
    "slide 4": "Instrumentation Overview: Instrumentation involves technology for measurement and control. In flight tests, it's used to monitor measurement points on the aircraft, providing crucial data without controlling other systems ....",
    "slid

## Organize output

In [139]:
slide_content

'```python\n{\n    "slide 1": "Introduction: Hello, my name is Tomas, I am a flight test instrumentation engineer and instructor at ITPS. Today, I will introduce the topic of FLIGHT TEST INSTRUMENTATION ....",\n    "slide 2": "Objectives: The main objectives of this lecture are to provide an overview of Flight Test Instrumentation (FTI) as an airborne system, summarize its functions, and discuss its components like signal sources, transducers, and data acquisition systems ....",\n    "slide 3": "FTI Motivation: Why do we use instrumentation in prototype aircraft? Imagine the early days of aviation, where engineers and pilots had limited means to gather flight data. The need for detailed and reliable data is what drives the use of FTI ....",\n    "slide 4": "Instrumentation Overview: Instrumentation involves technology for measurement and control. In flight tests, it\'s used to monitor measurement points on the aircraft, providing crucial data without controlling other systems ....",\n 

In [153]:
# Remove Python declaration and convert to dictionary
# Extract the part inside `{}` and convert to dictionary
content_start = slide_content.find("{")
content_end = slide_content.rfind("}") + 1
slide_content_dict = ast.literal_eval(slide_content[content_start:content_end])

# Verify the output
for key, value in slide_content_dict.items():
    print(f"{key}: {value}")

slide 1: Introduction: Hello, my name is Tomas, I am a flight test instrumentation engineer and instructor at ITPS. Today, I will introduce the topic of FLIGHT TEST INSTRUMENTATION ....
slide 2: Objectives: The main objectives of this lecture are to provide an overview of Flight Test Instrumentation (FTI) as an airborne system, summarize its functions, and discuss its components like signal sources, transducers, and data acquisition systems ....
slide 3: FTI Motivation: Why do we use instrumentation in prototype aircraft? Imagine the early days of aviation, where engineers and pilots had limited means to gather flight data. The need for detailed and reliable data is what drives the use of FTI ....
slide 4: Instrumentation Overview: Instrumentation involves technology for measurement and control. In flight tests, it's used to monitor measurement points on the aircraft, providing crucial data without controlling other systems ....
slide 5: Metrology and Reliability: Good instrumentation 

In [158]:
slide_content_dict.values()

dict_values(['Introduction: Hello, my name is Tomas, I am a flight test instrumentation engineer and instructor at ITPS. Today, I will introduce the topic of FLIGHT TEST INSTRUMENTATION ....', 'Objectives: The main objectives of this lecture are to provide an overview of Flight Test Instrumentation (FTI) as an airborne system, summarize its functions, and discuss its components like signal sources, transducers, and data acquisition systems ....', 'FTI Motivation: Why do we use instrumentation in prototype aircraft? Imagine the early days of aviation, where engineers and pilots had limited means to gather flight data. The need for detailed and reliable data is what drives the use of FTI ....', "Instrumentation Overview: Instrumentation involves technology for measurement and control. In flight tests, it's used to monitor measurement points on the aircraft, providing crucial data without controlling other systems ....", 'Metrology and Reliability: Good instrumentation adheres to metrolog

In [159]:
# Process to keep the title and content separately
processed_content = {}
for idx, text in enumerate(slide_content_dict.values(), 1):
    if ": " in text:
        title, content = text.split(": ", 1)  # Split into title and content
    else:
        title, content = f"Slide {idx}", text  # Default title if no colon exists
    processed_content[f"slide {idx}"] = {"title": title, "content": content}

# Print the processed dictionary
for slide, details in processed_content.items():
    print(f"{slide}: Title: {details['title']} | Content: {details['content']}")


slide 1: Title: Introduction | Content: Hello, my name is Tomas, I am a flight test instrumentation engineer and instructor at ITPS. Today, I will introduce the topic of FLIGHT TEST INSTRUMENTATION ....
slide 2: Title: Objectives | Content: The main objectives of this lecture are to provide an overview of Flight Test Instrumentation (FTI) as an airborne system, summarize its functions, and discuss its components like signal sources, transducers, and data acquisition systems ....
slide 3: Title: FTI Motivation | Content: Why do we use instrumentation in prototype aircraft? Imagine the early days of aviation, where engineers and pilots had limited means to gather flight data. The need for detailed and reliable data is what drives the use of FTI ....
slide 4: Title: Instrumentation Overview | Content: Instrumentation involves technology for measurement and control. In flight tests, it's used to monitor measurement points on the aircraft, providing crucial data without controlling other sy

In [161]:
# Extract just the content
slide_contents = [details["content"] for details in processed_content.values()]
slide_contents

['Hello, my name is Tomas, I am a flight test instrumentation engineer and instructor at ITPS. Today, I will introduce the topic of FLIGHT TEST INSTRUMENTATION ....',
 'The main objectives of this lecture are to provide an overview of Flight Test Instrumentation (FTI) as an airborne system, summarize its functions, and discuss its components like signal sources, transducers, and data acquisition systems ....',
 'Why do we use instrumentation in prototype aircraft? Imagine the early days of aviation, where engineers and pilots had limited means to gather flight data. The need for detailed and reliable data is what drives the use of FTI ....',
 "Instrumentation involves technology for measurement and control. In flight tests, it's used to monitor measurement points on the aircraft, providing crucial data without controlling other systems ....",
 'Good instrumentation adheres to metrology standards to ensure reliable data, which is essential for evaluating aircraft prototypes ....',
 "An 

# LLM Powerpoint Bullet Points

In [191]:
processed_content

{'slide 1': {'title': 'Introduction',
  'content': 'Hello, my name is Tomas, I am a flight test instrumentation engineer and instructor at ITPS. Today, I will introduce the topic of FLIGHT TEST INSTRUMENTATION ....'},
 'slide 2': {'title': 'Objectives',
  'content': 'The main objectives of this lecture are to provide an overview of Flight Test Instrumentation (FTI) as an airborne system, summarize its functions, and discuss its components like signal sources, transducers, and data acquisition systems ....'},
 'slide 3': {'title': 'FTI Motivation',
  'content': 'Why do we use instrumentation in prototype aircraft? Imagine the early days of aviation, where engineers and pilots had limited means to gather flight data. The need for detailed and reliable data is what drives the use of FTI ....'},
 'slide 4': {'title': 'Instrumentation Overview',
  'content': "Instrumentation involves technology for measurement and control. In flight tests, it's used to monitor measurement points on the airc

In [197]:
system_slide_grouping = """
You are a highly skilled text processor and presentation designer. 
Your task is to take slide data with titles and content in a structured format, 
You excel at organizing text clearly and concisely while maintaining logical flow and essential details.
"""

context_slide_grouping = f"""
The task is to broken down the input into bullet points for each PowerPoint slide.
Each slide should include:
1. Slide Title: Begin with the title (e.g., 'Slide 1: Introduction').
2. Content: Rewrite the slide's content as concise bullet points in the output. Ensure:
    - Each idea is presented on a new line.
    - Important concepts or terms are capitalized for emphasis.
    - Logical grouping of related ideas within each slide.

3. Output Format: Return the result as text, formatted like this:
    ```
    Slide 1: Title
    point 1.
    point 2.
    point 3.
    
    Slide 2: Title
    point 1.
    point 2.
    point 3.
    ```

Here is the input data to process:
{processed_content}
"""

responses_slide_grouping = openai.ChatCompletion.create(
  model="gpt-4-turbo",  # Switch between gpt-4-turbo and gpt-3.5-turbo as needed
  messages=[
      {"role": "system", "content": system_slide_grouping},
      {"role": "user", "content": context_slide_grouping}
  ],
  temperature=0.1,
)

slide_bulletpoints = responses_slide_grouping.choices[0].message.content
print(slide_bulletpoints)


```
Slide 1: Introduction
- Hello, my name is Tomas.
- I am a Flight Test Instrumentation Engineer and Instructor at ITPS.
- Today's topic: FLIGHT TEST INSTRUMENTATION.

Slide 2: Objectives
- Provide an OVERVIEW of Flight Test Instrumentation (FTI) as an airborne system.
- Summarize FTI FUNCTIONS.
- Discuss FTI COMPONENTS: signal sources, transducers, data acquisition systems.

Slide 3: FTI Motivation
- Importance of INSTRUMENTATION in prototype aircraft.
- Historical context: Limited data gathering in early aviation.
- Need for DETAILED and RELIABLE data drives FTI use.

Slide 4: Instrumentation Overview
- Instrumentation involves MEASUREMENT and CONTROL technology.
- Used in flight tests to MONITOR measurement points on aircraft.
- Provides CRUCIAL DATA without controlling other systems.

Slide 5: Metrology and Reliability
- Instrumentation adheres to METROLOGY STANDARDS.
- Ensures RELIABLE DATA for evaluating aircraft prototypes.

Slide 6: System Components
- FTI system includes nec

# Text to Speech

In [None]:
test='''
Hello, my name is Tomas, I am a flight test instrumentation engineer and instructor at XXX. 
Today, I will introduce the topic of FLIGHT TEST INSTRUMENTATION ....
'''

In [None]:
API_KEY = 'Key'
API_URL = 'https://api.sws.speechify.com/v1/audio/speech'

headers = {
    'Authorization': f'Bearer {API_KEY}',
    'Content-Type': 'application/json'
}

data = {
    'input': test,
    'voice_id': 'henry',  # Replace with desired voice ID
    'audio_format': 'mp3',
    "options": {
    "loudness_normalization": True,
    'pitch': '+2st',                 # Adjust pitch (e.g., +2 semitones)
    'rate': 'medium',                # Adjust speed (e.g., 'medium', 'fast')
    'emotion': 'cheerful',
    },
     "model": "simba-english"
}


response = requests.post(API_URL, headers=headers, json=data)


In [None]:
response_data = response.json()

# Decode the base64 audio data
audio_data_base64 = response_data['audio_data']
decoded_audio_data = base64.b64decode(audio_data_base64)

# Save the audio to a file (optional)
with open("output.mp3", "wb") as audio_file:
    audio_file.write(decoded_audio_data)

# Play the audio in the notebook
display(Audio(decoded_audio_data, rate=44100, autoplay=True))

In [None]:
print(response.status_code)  # Check if it's 200
print(response.text)         # Check the raw response content
