In [66]:
import requests
from IPython.display import JSON
import pandas as pd
import numpy as np 
import time

# Visualization
from wordcloud import WordCloud, STOPWORDS
import plotly.express as px

# Panel/hvplot (holoviz)
import panel as pn
pn.extension()
import param
import hvplot.pandas

# Others
import pickle
from io import StringIO

In [67]:
# import keys using dotenv
import os
from dotenv import load_dotenv

# ID = os.getenv('API_KEY')

# def configure():
   # load_dotenv()
   
API_KEY = os.getenv('API_KEY')

### Get audio transcriptrion with AssemlyAI

In [68]:
# Submitting Files for Transcription
import requests
endpoint = "https://api.assemblyai.com/v2/transcript"
json = {
    "audio_url": "https://github.com/thu-vu92/audio_analyzer_assemblyai/blob/main/How_I_Would_Learn_to_Code.mp3?raw=true",
    "auto_highlights": True,
    "sentiment_analysis": True,
    "auto_chapters": True,
    "iab_categories": True,
}
headers = {
    "authorization": API_KEY,
    "content-type": "application/json"
}
response = requests.post(endpoint, json=json, headers=headers)
print(response.json())

{'id': '3280590c-f50b-4b39-88b6-1e28690e10ca', 'language_model': 'assemblyai_default', 'acoustic_model': 'assemblyai_default', 'language_code': 'en_us', 'status': 'queued', 'audio_url': 'https://github.com/thu-vu92/audio_analyzer_assemblyai/blob/main/How_I_Would_Learn_to_Code.mp3?raw=true', 'text': None, 'words': None, 'utterances': None, 'confidence': None, 'audio_duration': None, 'punctuate': True, 'format_text': True, 'dual_channel': None, 'webhook_url': None, 'webhook_status_code': None, 'webhook_auth': False, 'webhook_auth_header_name': None, 'speed_boost': False, 'auto_highlights_result': None, 'auto_highlights': True, 'audio_start_from': None, 'audio_end_at': None, 'word_boost': [], 'boost_param': None, 'filter_profanity': False, 'redact_pii': False, 'redact_pii_audio': False, 'redact_pii_audio_quality': None, 'redact_pii_policies': None, 'redact_pii_sub': None, 'speaker_labels': False, 'content_safety': False, 'iab_categories': True, 'content_safety_labels': {}, 'iab_categories

In [76]:
# Getting the Transcription Result
result_endpoint = endpoint + "/" + response.json()["id"]
headers_auth = {
    "authorization": API_KEY,
}
transcript_response = requests.get(result_endpoint, headers=headers_auth)
print(transcript_response.json())

# While loop for requesting transcription
while response.json()['status'] != "completed":
    response = requests.get(result_endpoint, headers=headers_auth)
    time.sleep(5)

{'id': '3280590c-f50b-4b39-88b6-1e28690e10ca', 'language_model': 'assemblyai_default', 'acoustic_model': 'assemblyai_default', 'language_code': 'en_us', 'status': 'completed', 'audio_url': 'https://github.com/thu-vu92/audio_analyzer_assemblyai/blob/main/How_I_Would_Learn_to_Code.mp3?raw=true', 'text': "I recently got an amazing opportunity. I've been learning jujitsu and a friend offered to coach me in exchange for teaching him how to code. At this point in my life where I'm trying to improve my jujitsu as quickly as possible, this was an incredible trade for me. In my personal opinion. I was all for it until I had to think about how I'd actually go about teaching someone how to code. Starting from nothing, I quickly realized that this was going to be a lot harder than I thought. After all, it's been almost ten years since I wrote my very first line of code. That's a pretty long time. Now, this video is about the very specific and detailed plan that I wrote for my friend to teach me th

In [77]:
JSON(transcript_response.json())

<IPython.core.display.JSON object>

In [78]:
# Save pickle
with open('speech_data.pkl', 'wb') as f:
    pickle.dump(transcript_response.json().copy(), f)

### Dashboard

In [79]:
# Load data pickle
with open('speech_data.pkl', 'rb') as f:
    data = pickle.load(f)

In [80]:
buffer = StringIO()
buffer.write(data["text"])
buffer.seek(0)

0

### Download .txt widget

In [81]:
transcript_download = pn.widgets.FileDownload(file=buffer, 
                                              filename="transcript.txt", 
                                              button_type='success',
                                              sizing_mode='stretch_width')
transcript_download.width = 300 
transcript_download.height = 100

transcript_download

BokehModel(combine_events=True, render_bundle={'docs_json': {'b39cc522-930c-4461-a8b9-e366b20986da': {'version…

### Audio Play Widget

In [82]:
audio_url = "https://github.com/thu-vu92/audio_analyzer_assemblyai/blob/main/How_I_Would_Learn_to_Code.mp3?raw=true"
audio_play = pn.pane.Audio(audio_url,
                           name='Audio',
                           time = 360,
                           volume=0.5,
                           loop=False,
                           autoplay=False)

audio_play

BokehModel(combine_events=True, render_bundle={'docs_json': {'00215b2f-6ffc-45ac-8f88-bf3f2c23ce9e': {'version…

### Sentiment plot

In [83]:
sentiment = data["sentiment_analysis_results"]

In [84]:
sentiment_df = pd.DataFrame(sentiment)
sentiment_df

Unnamed: 0,text,start,end,sentiment,confidence,speaker
0,I recently got an amazing opportunity.,250,2078,POSITIVE,0.985436,
1,I've been learning jujitsu and a friend offere...,2244,7262,POSITIVE,0.552380,
2,At this point in my life where I'm trying to i...,7396,13294,POSITIVE,0.955428,
3,In my personal opinion.,13332,14282,NEUTRAL,0.809723,
4,I was all for it until I had to think about ho...,14346,18478,NEUTRAL,0.652979,
...,...,...,...,...,...,...
131,"Also, Google the errors and this will save you...",633884,637042,POSITIVE,0.551812,
132,"Finally, the third thing is to take it easy on...",637106,639414,NEUTRAL,0.523670,
133,The only people that can't code are the ones t...,639532,643750,NEGATIVE,0.902655,
134,"Thank you all so much for watching, and good l...",643900,647990,POSITIVE,0.986962,


In [85]:
sentiment_df_grouped = sentiment_df['sentiment'].value_counts()
sentiment_df_grouped

sentiment
NEUTRAL     71
POSITIVE    50
NEGATIVE    15
Name: count, dtype: int64

In [86]:
# Bar plot using hvplot
sentiment_plot = sentiment_df_grouped.hvplot(title = "Sentences by Sentiment Category", kind="bar")
pn.Row(sentiment_plot)

BokehModel(combine_events=True, render_bundle={'docs_json': {'79e75e13-c244-4ab2-8fa4-0728ca2c36a0': {'version…

In [87]:
positive_df = sentiment_df[sentiment_df["sentiment"] == "POSITIVE"][["text", "sentiment"]]
negative_df = sentiment_df[sentiment_df["sentiment"] == "NEGATIVE"][["text", "sentiment"]]
neutral_df = sentiment_df[sentiment_df["sentiment"] == "NEUTRAL"][["text", "sentiment"]]

sentiment_tabs = pn.Tabs(('Sentiment overview', sentiment_plot), 
                       ('Positive', pn.widgets.DataFrame(positive_df, autosize_mode='fit_columns', width=700, height=300)),
                       ('Negative', pn.widgets.DataFrame(negative_df, autosize_mode='fit_columns', width=700, height=300)),
                       ('Neutral', pn.widgets.DataFrame(neutral_df, autosize_mode='fit_columns', width=700, height=300))
                        )
sentiment_tabs

BokehModel(combine_events=True, render_bundle={'docs_json': {'b695fc87-5a17-4dff-81a8-483adec708a4': {'version…

### WordCloud

In [88]:
stopwords = set(STOPWORDS)
transcript = data["text"]
transcript_lower = [item.lower() for item in str(transcript).split()]

In [89]:
all_words = ' '.join(transcript_lower) 
all_words

"i recently got an amazing opportunity. i've been learning jujitsu and a friend offered to coach me in exchange for teaching him how to code. at this point in my life where i'm trying to improve my jujitsu as quickly as possible, this was an incredible trade for me. in my personal opinion. i was all for it until i had to think about how i'd actually go about teaching someone how to code. starting from nothing, i quickly realized that this was going to be a lot harder than i thought. after all, it's been almost ten years since i wrote my very first line of code. that's a pretty long time. now, this video is about the very specific and detailed plan that i wrote for my friend to teach me the basics of coding for data science. since i did all the work for him, i figured i might as well share the details with you as well here. there have been plenty of other great videos similar to this about how to start coding, and i've linked many of them in the description. again, those are great, but 

In [90]:
# Word cloud plot
wordcloud = WordCloud(background_color='black',
                      stopwords = stopwords,
                      max_words = 130,
                      colormap='cividis',
                      collocations=False).generate(all_words)

wordcloud_plot = px.imshow(wordcloud)

# Remove labels on axes
wordcloud_plot.update_xaxes(showticklabels=False)
wordcloud_plot.update_yaxes(showticklabels=False)
wordcloud_plot

In [91]:
# Create interactive slider
class Controller(param.Parameterized):
    word_slider = param.Integer(30, bounds=(5, 50), step=5)

controller = Controller()

@pn.depends(controller.param.word_slider, watch=True)
def update_wordcloud(num_words):
    # Word cloud plot
    wordcloud = WordCloud(background_color='black', stopwords = stopwords, max_words = num_words,
                          colormap='viridis', collocations=False).generate(all_words)

    wordcloud_plot = px.imshow(wordcloud) 
    # Remove labels on axes
    wordcloud_plot.update_xaxes(showticklabels=False)
    wordcloud_plot.update_yaxes(showticklabels=False)
    return wordcloud_plot

### Autochapter Summary

In [92]:
chapters = data["chapters"]
chapters

[{'summary': 'A friend offered to coach me in exchange for teaching him how to code. This video is more focused on coding for data and it uses a very specific real world case study. At the end, I highlight the three most important things a new programmer should know.',
  'gist': 'How to Teach a New Coder How to Code',
  'headline': 'A friend offered to coach me in exchange for teaching me how to code',
  'start': 250,
  'end': 57710},
 {'summary': 'About 48% of respondents use Python, while only about 5% use r. From 2020 to 2021, Python has gained an adoption, while R usage has seen some attrition. What coding skills should my friend learn first?',
  'gist': 'Python vs R: The Language to Start Learning',
  'headline': '48% of respondents use Python, while only 5% use r',
  'start': 57780,
  'end': 272402},
 {'summary': "At the most basic level, beginner coding can be broken down into seven concepts. The first one we start with is data types. After that, learn about functions. Finally, 

In [93]:
chapter_summary = pn.widgets.StaticText(value=chapters[0]["summary"], 
                                        width=1000, 
                                        height_policy = "fit")
chapter_summary

BokehModel(combine_events=True, render_bundle={'docs_json': {'a1fe04c5-f370-4f56-83c6-ed5a1dce1fa5': {'version…

In [94]:
button = pn.widgets.Button(name=str(int(chapters[0]["start"]/1000)), button_type='primary')
button

BokehModel(combine_events=True, render_bundle={'docs_json': {'18c419b8-c98f-4392-afa7-ea9dea2c34a6': {'version…

In [95]:
chapter_audio = pn.pane.Audio(audio_url, name='Audio', time = round(chapters[0]["start"]/1000))
chapter_audio

BokehModel(combine_events=True, render_bundle={'docs_json': {'74b8e0f6-a548-48b7-bb2b-f41a6a4a1456': {'version…

In [96]:
# Create chapter summary layout
chapters_layout = pn.Column(pn.pane.Markdown("### Auto Chapter Summary"))

class ButtonAudio():
    def __init__(self, start_time):
        self.start_time = start_time
        self.button = pn.widgets.Button(name=str(int(self.start_time/1000)), button_type='primary', width=60)
        self.chapter_audio = pn.pane.Audio(audio_url, name='Audio', time = round(self.start_time/1000))
        self.button.on_click(self.move_audio_head)

    def move_audio_head(self, event):
        self.chapter_audio.time = self.start_time/1000
        
for chapter in chapters:
    chapter_summary = pn.widgets.StaticText(value=chapter["summary"], width=1000, height_policy = "fit")
    button_audio = ButtonAudio(chapter["start"])
    button = button_audio.button
    chapter_audio = button_audio.chapter_audio
    chapters_layout.append(pn.Row(pn.Column(button), pn.Column(chapter_audio), pn.Column(chapter_summary)))
    
chapters_layout

BokehModel(combine_events=True, render_bundle={'docs_json': {'0a9c6c46-f1d9-4a1a-8ab8-bf39ace8efd2': {'version…

### Auto highlights

In [97]:
highlights = data["auto_highlights_result"]["results"]
highlights_df = pd.DataFrame(highlights)
highlights_df

Unnamed: 0,count,rank,text,timestamps
0,1,0.07,Python code,"[{'start': 270152, 'end': 270882}]"
1,7,0.06,data science,"[{'start': 33388, 'end': 34002}, {'start': 679..."
2,1,0.06,data science learning,"[{'start': 88908, 'end': 89782}]"
3,1,0.06,tangible data science skills,"[{'start': 470136, 'end': 471406}]"
4,3,0.06,data scientists,"[{'start': 82508, 'end': 83378}, {'start': 100..."
5,1,0.05,Data science style,"[{'start': 67934, 'end': 69310}]"
6,1,0.05,data frames,"[{'start': 412404, 'end': 412954}]"
7,1,0.05,different data structures,"[{'start': 329480, 'end': 330450}]"
8,1,0.05,data types,"[{'start': 323272, 'end': 323982}]"
9,1,0.05,other great videos,"[{'start': 38748, 'end': 39510}]"
