In [1]:
import pandas as pd
import requests
from time import sleep 

In [2]:
assemblyai_auth_key = "YOUR_API KEY"

In [3]:
headers = {
    'authorization': assemblyai_auth_key, 
    'content-type': 'application/json',
}

transcription_endpoint = "https://api.assemblyai.com/v2/transcript"

input_url = "https://download.ted.com/products/146256.mp4"

In [4]:
def post_transcription_request(input_url): 

    json = {"audio_url": input_url, "iab_categories": True}
    
    response = requests.post(transcription_endpoint, json=json, headers=headers)
    transcription_id = response.json()['id']

    return transcription_id

In [5]:
def get_transcription_result(transcription_id): 

    current_status = "queued"

    endpoint = f"https://api.assemblyai.com/v2/transcript/{transcription_id}"

    while current_status not in ("completed", "unavailable"):
        
        response = requests.get(endpoint, headers=headers)
        current_status = response.json()['status']
        
        if current_status in ("completed", "unavailable"):
            return response.json()
        else:
            sleep(10)

In [6]:
transcription_id = post_transcription_request(input_url)

results = get_transcription_result(transcription_id)

In [7]:
results['iab_categories_result']['summary']

{'EventsAndAttractions>PoliticalEvent': 1.0,
 'NewsAndPolitics>Politics>PoliticalIssues': 0.8536341786384583,
 'NewsAndPolitics>Politics': 0.8001156449317932,
 'NewsAndPolitics>Politics>Elections': 0.26783692836761475,
 'NewsAndPolitics>Crime': 0.2527463436126709,
 'Home&Garden>HomeImprovement': 0.19234398007392883,
 'FamilyAndRelationships>Parenting>AdoptionAndFostering': 0.15178827941417694,
 'NewsAndPolitics>Law': 0.10496598482131958,
 'Television>RealityTV': 0.08490221947431564,
 'PersonalFinance>FinancialAssistance>GovernmentSupportAndWelfare': 0.05182255059480667,
 'FamilyAndRelationships>Parenting': 0.05133328214287758,
 'PopCulture>CelebrityScandal': 0.028794826939702034,
 'NewsAndPolitics>Politics>WarAndConflicts': 0.0171405840665102,
 'Shopping>GroceryShopping': 0.009660312905907631,
 'Education>EarlyChildhoodEducation': 0.008355177007615566,
 'HealthyLiving>FitnessAndExercise>ParticipantSports': 0.006075866520404816,
 'Television>FactualTV': 0.005588793195784092,
 'FamilyAnd

In [8]:
results['iab_categories_result']

{'status': 'success',
 'results': [{'text': "Listen, I'm worried about our democracy. Nowadays we have leaders who use division itself as a political tool. They downplay or even encourage, in some cases a deadly assault to overturn an election. And a bunch of them are working really hard to make it harder to vote. The retreat from these processes of democracy, ballot access or legislative debate, judicial review, they are worrisome enough. But what's even more concerning to me is the retreat from the purposes of democracy.",
   'labels': [{'relevance': 0.9263674020767212,
     'label': 'NewsAndPolitics>Politics>PoliticalIssues'},
    {'relevance': 0.9063184261322021,
     'label': 'EventsAndAttractions>PoliticalEvent'},
    {'relevance': 0.8999685645103455,
     'label': 'NewsAndPolitics>Politics>Elections'},
    {'relevance': 0.7793756723403931, 'label': 'NewsAndPolitics>Politics'},
    {'relevance': 0.0007177300867624581, 'label': 'NewsAndPolitics>Law'},
    {'relevance': 0.000444085

In [8]:
results['iab_categories_result']

{'status': 'success',
 'results': [{'text': "Listen, I'm worried about our democracy. Nowadays we have leaders who use division itself as a political tool. They downplay or even encourage, in some cases a deadly assault to overturn an election. And a bunch of them are working really hard to make it harder to vote. The retreat from these processes of democracy, ballot access or legislative debate, judicial review, they are worrisome enough. But what's even more concerning to me is the retreat from the purposes of democracy.",
   'labels': [{'relevance': 0.9263674020767212,
     'label': 'NewsAndPolitics>Politics>PoliticalIssues'},
    {'relevance': 0.9063184261322021,
     'label': 'EventsAndAttractions>PoliticalEvent'},
    {'relevance': 0.8999685645103455,
     'label': 'NewsAndPolitics>Politics>Elections'},
    {'relevance': 0.7793756723403931, 'label': 'NewsAndPolitics>Politics'},
    {'relevance': 0.0007177300867624581, 'label': 'NewsAndPolitics>Law'},
    {'relevance': 0.000444085

In [9]:
result_df = pd.DataFrame(columns = ["Text_ID", "Text", "Topic_Label", "Relevance"])

for idx, result in enumerate(results['iab_categories_result']['results']):
    for label in result['labels']:
        
        temp_list = []
        temp_list.append(idx+1)
        temp_list.append(result['text'])
        temp_list.append(label['label'])
        temp_list.append(round(label['relevance'], 2))

        result_df.loc[result_df.shape[0]] = temp_list

result_df.head()

Unnamed: 0,Text_ID,Text,Topic_Label,Relevance
0,1,"Listen, I'm worried about our democracy. Nowad...",NewsAndPolitics>Politics>PoliticalIssues,0.93
1,1,"Listen, I'm worried about our democracy. Nowad...",EventsAndAttractions>PoliticalEvent,0.91
2,1,"Listen, I'm worried about our democracy. Nowad...",NewsAndPolitics>Politics>Elections,0.9
3,1,"Listen, I'm worried about our democracy. Nowad...",NewsAndPolitics>Politics,0.78
4,1,"Listen, I'm worried about our democracy. Nowad...",NewsAndPolitics>Law,0.0


In [10]:
summary_df = pd.DataFrame(columns = ["Label_ID", "Topic_Label", "Relevance"])

for idx, (key, value) in enumerate(results['iab_categories_result']['summary'].items()):

    summary_df.loc[summary_df.shape[0]] = [idx+1, key, round(value, 2)]

summary_df.head()

Unnamed: 0,Label_ID,Topic_Label,Relevance
0,1,EventsAndAttractions>PoliticalEvent,1.0
1,2,NewsAndPolitics>Politics>PoliticalIssues,0.85
2,3,NewsAndPolitics>Politics,0.8
3,4,NewsAndPolitics>Politics>Elections,0.27
4,5,NewsAndPolitics>Crime,0.25


In [11]:
result_df.value_counts("Topic_Label")

Topic_Label
EventsAndAttractions>PoliticalEvent         18
NewsAndPolitics>Politics>PoliticalIssues    18
NewsAndPolitics>Politics                    16
NewsAndPolitics>Politics>WarAndConflicts    15
NewsAndPolitics>Politics>Elections          13
                                            ..
Automotive>AutoBuyingAndSelling              1
MusicAndAudio>HipHopMusic                    1
MusicAndAudio>TalkRadio>EducationalRadio     1
MusicAndAudio>UrbanContemporaryMusic         1
Movies>FamilyAndChildrenMovies               1
Name: count, Length: 91, dtype: int64

In [12]:
print(summary_df.nlargest(3, "Relevance"))

   Label_ID                               Topic_Label  Relevance
0         1       EventsAndAttractions>PoliticalEvent       1.00
1         2  NewsAndPolitics>Politics>PoliticalIssues       0.85
2         3                  NewsAndPolitics>Politics       0.80
