# Helper Functions

In [44]:
import json

import requests
import time


upload_endpoint = "https://api.assemblyai.com/v2/upload"
transcript_endpoint = "https://api.assemblyai.com/v2/transcript"

API_KEY = "a6f53d3fe7f542be8ce148432f0ff9a0"

## Make Header

In [2]:
def make_header(api_key):
    return {
    'authorization': api_key,
    'content-type': 'application/json'
    }

In [3]:
header = make_header(API_KEY)

In [4]:
header

{'authorization': 'a6f53d3fe7f542be8ce148432f0ff9a0',
 'content-type': 'application/json'}

## Upload File

In [5]:
# Helper for `upload_file()`
def _read_file(filename, chunk_size=5242880):
    with open(filename, "rb") as f:
        while True:
            data = f.read(chunk_size)
            if not data:
                break
            yield data


# Uploads a file to AAI servers
def upload_file(audio_file, header):
    upload_response = requests.post(
        upload_endpoint,
        headers=header, 
        data=_read_file(audio_file)
    )
    return upload_response#.json()

In [6]:
upload_response = upload_file("../s5_gettysburg.wav", header)

In [8]:
upload_response

<Response [200]>

In [12]:
for i in dir(upload_response):
    if not i.startswith("_"):
        print('-'*50)
        print(i, '---', getattr(upload_response, i))

--------------------------------------------------
apparent_encoding --- ascii
--------------------------------------------------
close --- <bound method Response.close of <Response [200]>>
--------------------------------------------------
connection --- <requests.adapters.HTTPAdapter object at 0x0000022703C387C0>
--------------------------------------------------
content --- b'{"upload_url": "https://cdn.assemblyai.com/upload/42bef839-18c7-4252-9fec-0ea53075d633"}'
--------------------------------------------------
cookies --- <RequestsCookieJar[<Cookie AWSALB=u4jxFFVYKbpQaexBa6qR3efyQJFaddP7dzurpb30gCysbRtebDRgYLfpjnP9UdQ8IapoqnVwTcFJB9llSr0KMEuIZnXXDGHcmhqfVze0iPBU/2llPTL5jx5Kow/+ for api.assemblyai.com/>, <Cookie AWSALBCORS=u4jxFFVYKbpQaexBa6qR3efyQJFaddP7dzurpb30gCysbRtebDRgYLfpjnP9UdQ8IapoqnVwTcFJB9llSr0KMEuIZnXXDGHcmhqfVze0iPBU/2llPTL5jx5Kow/+ for api.assemblyai.com/>]>
--------------------------------------------------
elapsed --- 0:00:00.875986
-------------------------------

In [13]:
upload_response.status_code == 200

True

In [14]:
upload_response.json()

{'upload_url': 'https://cdn.assemblyai.com/upload/42bef839-18c7-4252-9fec-0ea53075d633'}

In [16]:
# For restarting so dont have to re run
upload_response = 'https://cdn.assemblyai.com/upload/42bef839-18c7-4252-9fec-0ea53075d633'

## Request Transcript

In [17]:
def request_transcript(upload_url, header, **kwargs):
    # If input is a dict returned from `upload_file` rather than a raw upload_url string
    if type(upload_url) is dict:
        upload_url = upload_url['upload_url']

    # Create request
    transcript_request = {
        'audio_url': upload_url,
        **kwargs
    }

    # POST request
    transcript_response = requests.post(
        transcript_endpoint,
        json=transcript_request,
        headers=header
    )

    return transcript_response.json()

In [18]:
json_dict = {
    'redact_pii_policies': ['drug', 'injury', 'person_name'],
    'auto_chapters': 'true',
    'auto_highlights': 'true',
    'iab_categories': 'true',
    'entity_detection': 'true',
    'sentiment_analysis': 'true',
    'redact_pii': 'true',
    'content_safety': 'true',
}

In [19]:
transcript_request_response = request_transcript(upload_response, header, **json_dict)

In [20]:
transcript_request_response

{'id': 'owke4eku0d-8ac7-4729-bdb9-1a42fcf6f70b',
 'language_model': 'assemblyai_default',
 'acoustic_model': 'assemblyai_default',
 'language_code': 'en_us',
 'status': 'queued',
 'audio_url': 'https://cdn.assemblyai.com/upload/42bef839-18c7-4252-9fec-0ea53075d633',
 'text': None,
 'words': None,
 'utterances': None,
 'confidence': None,
 'audio_duration': None,
 'punctuate': True,
 'format_text': True,
 'dual_channel': None,
 'webhook_url': None,
 'webhook_status_code': None,
 'webhook_auth': False,
 'webhook_auth_header_name': None,
 'speed_boost': False,
 'auto_highlights_result': None,
 'auto_highlights': True,
 'audio_start_from': None,
 'audio_end_at': None,
 'word_boost': [],
 'boost_param': None,
 'filter_profanity': False,
 'redact_pii': True,
 'redact_pii_audio': False,
 'redact_pii_audio_quality': 'mp3',
 'redact_pii_policies': ['drug', 'injury', 'person_name'],
 'redact_pii_sub': 'hash',
 'speaker_labels': False,
 'content_safety': True,
 'iab_categories': True,
 'content_s

In [24]:
for i in dir(transcript_request_response):
    if not i.startswith("_"):
        print('-'*50)
        print(i, '---', getattr(transcript_request_response, i))

--------------------------------------------------
clear --- <built-in method clear of dict object at 0x0000022713DCC300>
--------------------------------------------------
copy --- <built-in method copy of dict object at 0x0000022713DCC300>
--------------------------------------------------
fromkeys --- <built-in method fromkeys of type object at 0x00007FFFCDDBEBB0>
--------------------------------------------------
get --- <built-in method get of dict object at 0x0000022713DCC300>
--------------------------------------------------
items --- <built-in method items of dict object at 0x0000022713DCC300>
--------------------------------------------------
keys --- <built-in method keys of dict object at 0x0000022713DCC300>
--------------------------------------------------
pop --- <built-in method pop of dict object at 0x0000022713DCC300>
--------------------------------------------------
popitem --- <built-in method popitem of dict object at 0x0000022713DCC300>
--------------------------

In [28]:
print(transcript_request_response)

{'id': 'owke4eku0d-8ac7-4729-bdb9-1a42fcf6f70b', 'language_model': 'assemblyai_default', 'acoustic_model': 'assemblyai_default', 'language_code': 'en_us', 'status': 'queued', 'audio_url': 'https://cdn.assemblyai.com/upload/42bef839-18c7-4252-9fec-0ea53075d633', 'text': None, 'words': None, 'utterances': None, 'confidence': None, 'audio_duration': None, 'punctuate': True, 'format_text': True, 'dual_channel': None, 'webhook_url': None, 'webhook_status_code': None, 'webhook_auth': False, 'webhook_auth_header_name': None, 'speed_boost': False, 'auto_highlights_result': None, 'auto_highlights': True, 'audio_start_from': None, 'audio_end_at': None, 'word_boost': [], 'boost_param': None, 'filter_profanity': False, 'redact_pii': True, 'redact_pii_audio': False, 'redact_pii_audio_quality': 'mp3', 'redact_pii_policies': ['drug', 'injury', 'person_name'], 'redact_pii_sub': 'hash', 'speaker_labels': False, 'content_safety': True, 'iab_categories': True, 'content_safety_labels': {}, 'iab_categories

## Make Polling Endpoint

In [31]:
def make_polling_endpoint(transcript_id):
    # If upload response is input rather than raw upload_url string
    if type(transcript_id) is dict:
        transcript_id = transcript_id['id']
        
    polling_endpoint = "https://api.assemblyai.com/v2/transcript/" + transcript_id
    return polling_endpoint

In [32]:
polling_endpoint = make_polling_endpoint(transcript_request_response)

In [33]:
polling_endpoint

'https://api.assemblyai.com/v2/transcript/owke4eku0d-8ac7-4729-bdb9-1a42fcf6f70b'

In [35]:
# so dont have to rerun transcript if restart notebook
polling_endpoint = "https://api.assemblyai.com/v2/transcript/owke4eku0d-8ac7-4729-bdb9-1a42fcf6f70b"

## Wait for Completion

In [36]:
def wait_for_completion(polling_endpoint, header):
    while True:
        polling_response = requests.get(polling_endpoint, headers=header)
        polling_response = polling_response.json()

        if polling_response['status'] == 'completed':
            break
        elif polling_response['status'] == 'error':
            raise Exception(f"Error: {polling_response['error']}")

        time.sleep(5)

In [37]:
wait_for_completion(polling_endpoint, header)

In [38]:
polling_response = requests.get(polling_endpoint, headers=header)
polling_response.json()['status']

'completed'

## Fetch Results

In [45]:
with open('response.json', 'w') as f:
    f.write(json.dumps(requests.get(polling_endpoint, headers=header).json()))

In [14]:
# Get the paragraphs of the transcript
def get_paragraphs(polling_endpoint, header):
    paragraphs_response = requests.get(polling_endpoint + "/paragraphs", headers=header)
    paragraphs_response = paragraphs_response.json()

    paragraphs = []
    for para in paragraphs_response['paragraphs']:
        paragraphs.append(para)

    return paragraphs

get_paragraphs(polling_endpoint, header)

In [16]:
# Just to see what else is here
paragraphs_response = requests.get(polling_endpoint + "/paragraphs", headers=header)
paragraphs_response = paragraphs_response.json()

# Testing Stuff

## IAB Categories response

In [46]:
j = json.loads("response.json")

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [130]:
dic = {
      "Travel>TravelLocations>AfricaTravel": 1.0,
      "Travel>TravelLocations>EuropeTravel": 0.7229765057563782,
      "NewsAndPolitics>Politics": 0.22560375928878784,
      "Religion&Spirituality>Christianity": 0.17373540997505188,
      "NewsAndPolitics>Law": 0.10182195901870728,
      "EventsAndAttractions>PoliticalEvent": 0.09067431092262268,
      "FamilyAndRelationships>MarriageAndCivilUnions": 0.05113469809293747,
      "Travel>TravelLocations>NorthAmericaTravel": 0.04764898121356964,
      "NewsAndPolitics>Politics>PoliticalIssues": 0.03767641261219978,
      "NewsAndPolitics>Politics>WarAndConflicts": 0.025518830865621567,
      "Travel>TravelLocations>AustraliaAndOceaniaTravel": 0.023581648245453835,
      "Hobbies&Interests>GenealogyAndAncestry": 0.013975286856293678,
      "Religion&Spirituality>Judaism": 0.012308650650084019,
      "NewsAndPolitics>Crime": 0.012244840152561665,
      "Science>Geography": 0.010910360142588615,
      "BooksAndLiterature>Biographies": 0.010122659616172314,
      "Technology&Computing>Computing>Internet": 0.00969504751265049,
      "PersonalFinance>HomeUtilities>InternetServiceProviders": 0.00960697140544653,
      "Religion&Spirituality": 0.008793771266937256,
      "NewsAndPolitics>Politics>Elections": 0.008565575815737247
    }

c = list(dic.keys())

In [131]:
c.sort()

In [132]:
c

['BooksAndLiterature>Biographies',
 'EventsAndAttractions>PoliticalEvent',
 'FamilyAndRelationships>MarriageAndCivilUnions',
 'Hobbies&Interests>GenealogyAndAncestry',
 'NewsAndPolitics>Crime',
 'NewsAndPolitics>Law',
 'NewsAndPolitics>Politics',
 'NewsAndPolitics>Politics>Elections',
 'NewsAndPolitics>Politics>PoliticalIssues',
 'NewsAndPolitics>Politics>WarAndConflicts',
 'PersonalFinance>HomeUtilities>InternetServiceProviders',
 'Religion&Spirituality',
 'Religion&Spirituality>Christianity',
 'Religion&Spirituality>Judaism',
 'Science>Geography',
 'Technology&Computing>Computing>Internet',
 'Travel>TravelLocations>AfricaTravel',
 'Travel>TravelLocations>AustraliaAndOceaniaTravel',
 'Travel>TravelLocations>EuropeTravel',
 'Travel>TravelLocations>NorthAmericaTravel']

In [291]:
def make_tree(c, ukey=''):
    # Create empty dict for current sublist
    d = dict()
    # If leaf, return None (alternative behavior: get rid of ukey and return None for if c is None)
    if c is None and ukey is None:
        return None
    elif c is None:
        return {None: None}
    else:
        for n, i in enumerate(c):
            # For topics with sublist e.g. if ['NewsAndPolitics' 'Politics'] and ['NewsAndPolitics' 'Politics', 'Elections] are both in list - need way to signify politics itself included
            if i is None:
                d[None] = None
            # If next subtopic not in dict, add it. If the remaining list empty, make value None
            elif i[0] not in d.keys():
                topic = i.pop(0)
                d[topic] = None if i == [] else [i]
            # If subtopic already in dict
            else:
                # If the value for this subtopic is only None (i.e. subject itself is a leaf), then append sublist
                if d[i[0]] is None:
                    d[i[0]] = [None, i[1:]]
                # If value for this subtopic is a list itself, then append the remaining list
                else:
                    d[i[0]].append(i[1:])
        # Recurse on remaining leaves
        for key in d:
            d[key] = make_tree(d[key], key)
    return d

In [135]:
c_orig = c

['BooksAndLiterature>Biographies',
 'EventsAndAttractions>PoliticalEvent',
 'FamilyAndRelationships>MarriageAndCivilUnions',
 'Hobbies&Interests>GenealogyAndAncestry',
 'NewsAndPolitics>Crime',
 'NewsAndPolitics>Law',
 'NewsAndPolitics>Politics',
 'NewsAndPolitics>Politics>Elections',
 'NewsAndPolitics>Politics>PoliticalIssues',
 'NewsAndPolitics>Politics>WarAndConflicts',
 'PersonalFinance>HomeUtilities>InternetServiceProviders',
 'Religion&Spirituality',
 'Religion&Spirituality>Christianity',
 'Religion&Spirituality>Judaism',
 'Science>Geography',
 'Technology&Computing>Computing>Internet',
 'Travel>TravelLocations>AfricaTravel',
 'Travel>TravelLocations>AustraliaAndOceaniaTravel',
 'Travel>TravelLocations>EuropeTravel',
 'Travel>TravelLocations>NorthAmericaTravel']

In [300]:
c = [i.split(">") for i in c_orig]

In [301]:
c

[['BooksAndLiterature', 'Biographies'],
 ['EventsAndAttractions', 'PoliticalEvent'],
 ['FamilyAndRelationships', 'MarriageAndCivilUnions'],
 ['Hobbies&Interests', 'GenealogyAndAncestry'],
 ['NewsAndPolitics', 'Crime'],
 ['NewsAndPolitics', 'Law'],
 ['NewsAndPolitics', 'Politics'],
 ['NewsAndPolitics', 'Politics', 'Elections'],
 ['NewsAndPolitics', 'Politics', 'PoliticalIssues'],
 ['NewsAndPolitics', 'Politics', 'WarAndConflicts'],
 ['PersonalFinance', 'HomeUtilities', 'InternetServiceProviders'],
 ['Religion&Spirituality'],
 ['Religion&Spirituality', 'Christianity'],
 ['Religion&Spirituality', 'Judaism'],
 ['Science', 'Geography'],
 ['Technology&Computing', 'Computing', 'Internet'],
 ['Travel', 'TravelLocations', 'AfricaTravel'],
 ['Travel', 'TravelLocations', 'AustraliaAndOceaniaTravel'],
 ['Travel', 'TravelLocations', 'EuropeTravel'],
 ['Travel', 'TravelLocations', 'NorthAmericaTravel']]

In [302]:
m = make_tree(c, '')

In [303]:
m

{'BooksAndLiterature': {'Biographies': {None: None}},
 'EventsAndAttractions': {'PoliticalEvent': {None: None}},
 'FamilyAndRelationships': {'MarriageAndCivilUnions': {None: None}},
 'Hobbies&Interests': {'GenealogyAndAncestry': {None: None}},
 'NewsAndPolitics': {'Crime': {None: None},
  'Law': {None: None},
  'Politics': {None: None,
   'Elections': {None: None},
   'PoliticalIssues': {None: None},
   'WarAndConflicts': {None: None}}},
 'PersonalFinance': {'HomeUtilities': {'InternetServiceProviders': {None: None}}},
 'Religion&Spirituality': {None: None,
  'Christianity': {None: None},
  'Judaism': {None: None}},
 'Science': {'Geography': {None: None}},
 'Technology&Computing': {'Computing': {'Internet': {None: None}}},
 'Travel': {'TravelLocations': {'AfricaTravel': {None: None},
   'AustraliaAndOceaniaTravel': {None: None},
   'EuropeTravel': {None: None},
   'NorthAmericaTravel': {None: None}}}}

In [205]:
print(json.dumps(m, indent=4))

{
    "BooksAndLiterature": {
        "Biographies": {
            "null": null
        }
    },
    "EventsAndAttractions": {
        "PoliticalEvent": {
            "null": null
        }
    },
    "FamilyAndRelationships": {
        "MarriageAndCivilUnions": {
            "null": null
        }
    },
    "Hobbies&Interests": {
        "GenealogyAndAncestry": {
            "null": null
        }
    },
    "NewsAndPolitics": {
        "Crime": {
            "null": null
        },
        "Law": {
            "null": null
        },
        "Politics": {
            "null": null,
            "Elections": {
                "null": null
            },
            "PoliticalIssues": {
                "null": null
            },
            "WarAndConflicts": {
                "null": null
            }
        }
    },
    "PersonalFinance": {
        "HomeUtilities": {
            "InternetServiceProviders": {
                "null": null
            }
        }
    },
    "Religion&

In [304]:
import re


def split_on_capital(string):
    """Adds spaces between capitalized words of a string"""
    return ' '.join(re.findall("[A-Z][^A-Z]*", string))

def make_html_tree(dic, level=0, HTML = ''):
    for key in dic:
        # Add the topic to HTML, specifying the current level and whether or not it is a topic
        if type(dic[key]) == dict:
            # If the topic itself is a detected topic, delete None from dict and write html
            if None in dic[key].keys():
                del dic[key][None]
                HTML += f'<p class="topic-L{level} istopic">{split_on_capital(key)}</p>'
            else:
                HTML += f'<p class="topic-L{level}">{split_on_capital(key)}</p>'

            HTML = make_html_tree(dic[key], level=level+1, HTML=HTML)
        # If leaf, add HTML and do not recurse
        else:
            HTML += f'<p class="topic-L{level} istopic">{split_on_capital(key)}</p>'
    return HTML

def make_html_body(dic):
    HTML = '<body>'
    HTML += '<p class="detected-topics">Detected Topics</p>'
    HTML += make_html_tree(dic)
    HTML += "</body>"
    return HTML


def make_html(dic):
    HTML = '<!DOCTYPE html>' \
       '<html>' \
       '<head>' \
       '<title>Another simple example</title>' \
       '<link rel="stylesheet" type="text/css" href="styles.css"/>' \
       '</head>'
    HTML += make_html_body(dic)
    HTML += "</html>"
    return HTML

with open('topics.html', 'w') as f:
    f.write(make_html(m))

### Making CSS for Tree

In [256]:
css = ".istopic {\n" \
      "color: rgb(255, 0, 0);" \
      "\n}" \
      "\n\n"
starting_fs = 32

for i in range(10):
    css += f".topic-L{i} {{\n" \
           f"font-size: {max(32-i*6, 16)}px;\n" \
           f"text-indent: {8*i}px;\n" \
           f"}}" \
           f"\n\n"

print(css)
with open('styles.css', 'w') as f:
    f.write(css)

.istopic {
color: rgb(255, 0, 0);
}

.topic-L0 {
font-size: 32px;
text-indent: 0px;
}

.topic-L1 {
font-size: 26px;
text-indent: 8px;
}

.topic-L2 {
font-size: 20px;
text-indent: 16px;
}

.topic-L3 {
font-size: 16px;
text-indent: 24px;
}

.topic-L4 {
font-size: 16px;
text-indent: 32px;
}

.topic-L5 {
font-size: 16px;
text-indent: 40px;
}

.topic-L6 {
font-size: 16px;
text-indent: 48px;
}

.topic-L7 {
font-size: 16px;
text-indent: 56px;
}

.topic-L8 {
font-size: 16px;
text-indent: 64px;
}

.topic-L9 {
font-size: 16px;
text-indent: 72px;
}




In [295]:
# Load assembly response from stored json file
with open('response.json', 'r') as f:
    j = json.load(f)

# FULL FUNCTION GIVEN IAB CATEGORIES JSON
def make_html_from_topics(dic, threshold=0.0):
    # Filter low probab items out
    dic = {k:v for k,v in dic.items() if float(v) >= threshold}

    # Get list of remaining topics
    cats = list(dic.keys())

    # Sort remaining topics
    cats.sort()

    # Split items into lists
    cats = [i.split(">") for i in cats]

    tree = make_tree(cats)

    return make_html(tree)

r = make_html_from_topics(j['iab_categories_result']['summary'])

with open('topics.html', 'w') as f:
    f.write(r)

## Highlighted Text

In [394]:
# Load assembly response from stored json file
with open('response.json', 'r') as f:
    j = json.load(f)

import requests
endpoint = "https://api.assemblyai.com/v2/transcript/owke4eku0d-8ac7-4729-bdb9-1a42fcf6f70b/paragraphs"
headers = {
    "authorization": API_KEY,
}
response = requests.get(endpoint, headers=headers)

def make_paras_string(response):
    paras_list = response.json()['paragraphs']
    paras = [i['text'] for i in paras_list]
    paras = paras + paras
    paras = '\n\n'.join(paras)
    return paras


r = j['auto_highlights_result']['results']
def create_highlighted_list(paragraphs_string, highlights_result, rank=0):
    """Creates list for argument `gr.HighlightedText()`"""
    # Max and min opacities to highlight to
    MAX_HIGHLIGHT = 1
    MIN_HIGHLIGHT = 0.25

    # Filter list for everything above the input rank
    highlights_result = [i for i in highlights_result if i['rank'] >= rank]

    # Get max/min ranks and find scale/shift we'll need so ranks are mapped to [MIN_HIGHLIGHT, MAX_HIGHLIGHT]
    max_rank = max([i['rank'] for i in highlights_result])
    min_rank = min([i['rank'] for i in highlights_result])
    scale = (MAX_HIGHLIGHT-MIN_HIGHLIGHT)/(max_rank-min_rank)
    shift = (MAX_HIGHLIGHT-max_rank*scale)

    # Isolate only highlight text and rank
    highlights_result = [(i['text'], i['rank']) for i in highlights_result]

    entities = []
    for highlight, rank in highlights_result:
        # For each highlight, find all starting character instances
        starts = [c.start() for c in re.finditer(highlight, paragraphs_string)]
        # Create list of locations for this highlight with entity value (highlight opacity) scaled properly
        # TODO: REPLACE WITH LIST COMPREHENSION
        e = [{"entity": rank*scale+shift,
              "start": start,
              "end": start + len(highlight)}
              for start in starts]
        entities += e

    # Create dictionary
    highlight_dict = {"text": paragraphs_string, "entities": entities}

    # Sort entities by start char - a bug in Gradio
    highlight_dict['entities'] = sorted(highlight_dict['entities'], key= lambda x: x['start'])

    return highlight_dict

ps = make_paras_string(response)
with open('paras.txt', 'w') as f:
    f.write(ps)
print(ps)
print(create_highlighted_list(ps, r))

Four score and seven years ago our fathers brought forth on this continent a new nation conceived in liberty and dedicated to the proposition that all men are created equal.

Four score and seven years ago our fathers brought forth on this continent a new nation conceived in liberty and dedicated to the proposition that all men are created equal.
{'text': 'Four score and seven years ago our fathers brought forth on this continent a new nation conceived in liberty and dedicated to the proposition that all men are created equal.\n\nFour score and seven years ago our fathers brought forth on this continent a new nation conceived in liberty and dedicated to the proposition that all men are created equal.', 'entities': [{'entity': 0.25, 'start': 15, 'end': 30}, {'entity': 0.4642857142857143, 'start': 35, 'end': 42}, {'entity': 0.25, 'start': 65, 'end': 74}, {'entity': 0.7857142857142858, 'start': 77, 'end': 87}, {'entity': 1.0, 'start': 101, 'end': 108}, {'entity': 0.25, 'start': 130, 'end'

## Summarization

In [395]:
with open('response.json', 'r') as f:
    j = json.load(f)

In [399]:
chapters = j['chapters']

def make_summary(chapters):
    """input = response.json()['chapters']"""
    html = "<div>"
    for chapter in chapters:
        html += "<details>" \
                f"<summary>{chapter['headline']}</summary>" \
                f"{chapter['summary']}" \
                "</details>"
    html += "</div>"
    return html

In [400]:
make_summary(chapters)

'<div><details><summary>Seven years ago, the fathers brought a new nation to this continent.</summary>Seven years ago, the fathers brought a new nation to this continent.</details></div>'

## Sentiment Analysis

In [410]:
with open('response.json', 'r') as f:
    j = json.load(f)

In [411]:
sentiment_analysis_results = j['sentiment_analysis_results']

In [433]:
green = "background-color: #159609"
red = "background-color: #cc0c0c"

def to_hex(num, max_opacity=128):
    return hex(int(max_opacity*num))[2:]

def make_sentiment_output(sentiment_analysis_results):
    p = "<p>"
    for sentiment in sentiment_analysis_results:
        if sentiment['sentiment'] == 'POSITIVE':
            p += f'<mark style="{green+to_hex(sentiment["confidence"])}">' + sentiment['text'] + '</mark>'
        elif sentiment['sentiment'] == "NEGATIVE":
            p += f'<mark style="{red+to_hex(sentiment["confidence"])}">' + sentiment['text'] + '</mark>'
        else:
            p += sentiment['text']
    p += "</p>"
    return p


#import gradio as gr

#with gr.Blocks() as demo:
#    gr.HighlightedText(h, show_label=True).style(color_map={"+": "#11730099", "-": "red", ".": "#FFFFFF99"})

#demo.launch()

<p><mark style="background-color: #cc0c0c46">You will never believe what happened to me last week.</mark><mark style="background-color: #cc0c0c6b">My SUV broke down, so I had to send it to an auto shop to get a new gasket installed.</mark><mark style="background-color: #cc0c0c69">Yesterday I was walking in South Boston to pick the car up and some guy got thrown through the window of a pub right in front of me.</mark>A few guys had been drinking and they got into an argument about the Red Sox, which resulted in a fight.<mark style="background-color: #cc0c0c5b">When I went to break up the fight, one of the guys accidentally hit me with his elbow in the face, so I fell back and ##### ## #####.</mark><mark style="background-color: #cc0c0c7c">I went to the emergency room and had to get surgery, which sucks because I have to wear a cast for two weeks and it cost me almost $#,###.</mark><mark style="background-color: #cc0c0c7a">My wrist still feels like s***, and I've had to take ##### all we

## Entity Detection

In [434]:
entities = j['entities']

In [435]:
entities

[{'entity_type': 'location',
  'text': 'South Boston',
  'start': 12884,
  'end': 13678},
 {'entity_type': 'organization',
  'text': 'Red Sox',
  'start': 24044,
  'end': 24718},
 {'entity_type': 'injury',
  'text': 'broke my wrist',
  'start': 33812,
  'end': 34950},
 {'entity_type': 'organization',
  'text': 'emergency room',
  'start': 36224,
  'end': 36990},
 {'entity_type': 'medical_process',
  'text': 'surgery',
  'start': 37724,
  'end': 38314},
 {'entity_type': 'money_amount',
  'text': '1,000',
  'start': 42656,
  'end': 44000},
 {'entity_type': 'drug', 'text': 'Advil', 'start': 47660, 'end': 48154},
 {'entity_type': 'occupation',
  'text': 'Political Science',
  'start': 53684,
  'end': 54394},
 {'entity_type': 'occupation',
  'text': 'professors',
  'start': 59192,
  'end': 60030},
 {'entity_type': 'occupation', 'text': 'law', 'start': 62924, 'end': 63114},
 {'entity_type': 'medical_process',
  'text': 'physical therapy',
  'start': 80504,
  'end': 81274}]

In [495]:
d = {}
for entity in entities:
    OFFSET = 40
    t = j['text']
    s = t.find(entity['text'])
    len_text = len(j['text'])
    len_entity = len(entity['text'])
    p = t[max(0, s-OFFSET):min(s+len_entity+OFFSET, len_text)]
    p = '... '+ ' '.join(p.split(' ')[1:-1]) + ' ...'
    if entity['entity_type'] in d:
        d[entity['entity_type']] += [[p, entity['text']]]
    else:
        d[entity['entity_type']] = [[p, entity['text']]]

In [504]:
h = "<ul>"
for i in d:
    h += f'<li>{i}'
    h += "<ul>"
    for sent, ent in d[i]:
        h += f'<li>{sent.replace(ent, f"<mark>{ent}</mark>")}</li>'
    h += '</ul>'
    h += '</li>'
h += "</ul>"

In [503]:
h

"<ul><li>location<ul><li>... installed. Yesterday I was walking in <mark>South Boston</mark> to pick the car up and some guy got ...</li></ul></li><li>organization<ul><li>... they got into an argument about the <mark>Red Sox</mark>, which resulted in a fight. When I ...</li><li>... back and ##### ## #####. I went to the <mark>emergency room</mark> and had to get surgery, which sucks ...</li></ul></li><li>injury<ul><li>... will never believe what happened to me last ...</li></ul></li><li>medical_process<ul><li>... to the emergency room and had to get <mark>surgery</mark>, which sucks because I have to wear a ...</li><li>... has a ton of good nutrition and <mark>physical therapy</mark> resources, too. I'm really excited to ...</li></ul></li><li>money_amount<ul><li>... will never believe what happened to me ...</li></ul></li><li>drug<ul><li>... will never believe what happened to me ...</li></ul></li><li>occupation<ul><li>... good. I started my master's degree in <mark>Political Science</

## Content Moderation

In [505]:
import plotly.express as px

In [508]:
j['content_safety_labels']['summary']

{'accidents': 0.2611353717731301,
 'alcohol': 0.46347422408861444,
 'crime_violence': 0.27030136438470115}

In [538]:
import pandas as pd

d = {'label': [], 'severity': []}
for key in j['content_safety_labels']['summary']:
    d['label'] += [key]
    d['severity'] += [j['content_safety_labels']['summary'][key]]

fig = px.bar(d, x='severity',y='label')
fig.update_xaxes(range=[0, 1])
fig.show()

In [510]:
data_canada

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap,iso_alpha,iso_num
240,Canada,Americas,1952,68.75,14785584,11367.16112,CAN,124
241,Canada,Americas,1957,69.96,17010154,12489.95006,CAN,124
242,Canada,Americas,1962,71.3,18985849,13462.48555,CAN,124
243,Canada,Americas,1967,72.13,20819767,16076.58803,CAN,124
244,Canada,Americas,1972,72.88,22284500,18970.57086,CAN,124
245,Canada,Americas,1977,74.21,23796400,22090.88306,CAN,124
246,Canada,Americas,1982,75.76,25201900,22898.79214,CAN,124
247,Canada,Americas,1987,76.86,26549700,26626.51503,CAN,124
248,Canada,Americas,1992,77.95,28523502,26342.88426,CAN,124
249,Canada,Americas,1997,78.61,30305843,28954.92589,CAN,124


## SCRIPT:

In [409]:
import requests
# id = owke4eku0d-8ac7-4729-bdb9-1a42fcf6f70b # 4 score
# id = "owp262ksb9-2f7d-4e3e-95c4-c25af6ca01a1" # First new script
# id = "ow2eugxj18-6ff6-4921-b141-b489c1ea9b95"  # revisted new script
# id = "ow27qi17ov-242b-497b-bfba-e6f344db7b13" #PHONE NUMBER PII TEST
id = "ow2seyirvx-f0d2-4a4c-b344-86618d612855"
endpoint = f"https://api.assemblyai.com/v2/transcript/{id}/paragraphs"
headers = {
    "authorization": API_KEY,
}
response = requests.get(endpoint, headers=headers)

def make_paras_string(response):
    paras_list = response.json()['paragraphs']
    paras = [i['text'] for i in paras_list]
    #paras = paras + paras
    paras = '\n\n'.join(paras)
    return paras

ps = make_paras_string(response)
with open('paras.txt', 'w') as f:
    f.write(ps)

endpoint = f"https://api.assemblyai.com/v2/transcript/{id}"
headers = {
    "authorization": API_KEY,
}

with open('response.json', 'w') as f:
    f.write(json.dumps(requests.get(endpoint, headers=headers).json()))

# Gradio App Demo

In [1]:
%%capture

!pip install gradio

In [None]:
# virtualenv venv
# .\venv\Scripts\activate.bat
# pip install jupyter
# ipython kernel install --name "gradio-venv" --user

In [316]:
API_KEY = "a6f53d3fe7f542be8ce148432f0ff9a0"

In [3]:
import gradio as gr

In [6]:
def greet(name):
    return "hello"+name

with gr.Blocks(css="body {background-color: red}") as demo:
    name = gr.Textbox()
    output = gr.Textbox()
    greet_btn = gr.Button()
    greet_btn.click(greet, name, output)
    
demo.launch()

Running on local URL:  http://127.0.0.1:7861/

To create a public link, set `share=True` in `launch()`.


(<gradio.routes.App at 0x211296a7640>, 'http://127.0.0.1:7861/', None)

In [7]:
import gradio as gr

scores = []

def track_score(score):
    scores.append(score)
    top_scores = sorted(scores, reverse=True)[:3]
    return top_scores

demo = gr.Interface(
    track_score, 
    gr.Number(label="Score"), 
    gr.JSON(label="Top Scores")
)
demo.launch()

Running on local URL:  http://127.0.0.1:7862/

To create a public link, set `share=True` in `launch()`.


(<gradio.routes.App at 0x2112972f1f0>, 'http://127.0.0.1:7862/', None)

In [8]:
import random
import gradio as gr

def chat(message, history):
    history = history or []
    message = message.lower()
    if message.startswith("how many"):
        response = random.randint(1, 10)
    elif message.startswith("how"):
        response = random.choice(["Great", "Good", "Okay", "Bad"])
    elif message.startswith("where"):
        response = random.choice(["Here", "There", "Somewhere"])
    else:
        response = "I don't know"
    history.append((message, response))
    return history, history

chatbot = gr.Chatbot().style(color_map=("green", "pink"))
demo = gr.Interface(
    chat,
    ["text", "state"],
    [chatbot, "state"],
    allow_flagging="never",
)
demo.launch()


Running on local URL:  http://127.0.0.1:7863/

To create a public link, set `share=True` in `launch()`.


(<gradio.routes.App at 0x211298b30a0>, 'http://127.0.0.1:7863/', None)

In [82]:
import gradio as gr
import random

secret_word = "gradio"

with gr.Blocks() as demo:    
    used_letters_var = gr.State([])
    with gr.Row() as row:
        with gr.Column():
            input_letter = gr.Textbox(label="Enter letter")
            btn = gr.Button("Guess Letter")
        with gr.Column():
            hangman = gr.Textbox(
                label="Hangman",
                value="_"*len(secret_word)
            )
            used_letters_box = gr.Textbox(label="Used Letters")

    def guess_letter(letter, used_letters):
        used_letters.append(letter)
        answer = "".join([
            (letter if letter in used_letters else "_")
            for letter in secret_word
        ])
        return {
            used_letters_var: used_letters,
            used_letters_box: ", ".join(used_letters),
            hangman: answer
        }
    btn.click(
        guess_letter, 
        [input_letter, used_letters_var],
        [used_letters_var, used_letters_box, hangman]
        )
demo.launch()

AttributeError: module 'gradio' has no attribute 'State'

In [85]:
with gr.Blocks() as demo:
    with gr.Tab("Lion"):
        gr.Image("lion.jpg")
        gr.Button("New Lion")
    with gr.Tab("Tiger"):
        gr.Image("tiger.jpg")
        gr.Button("New Tiger")
        
demo.launch()

AttributeError: module 'gradio' has no attribute 'Tab'

# Audio Intelligence Dashboard

In [30]:
import gradio as gr

def chg_file_type(choice):
    if choice == "Local File":
        return gr.Textbox(label="prompt 1234")
    elif choice == "Remote File":
        return gr.Textbox(label="prompt 1234234")

    def txt_chg():
        return gr.update()

with gr.Blocks() as demo:
    with gr.Row():
        text11 = gr.Textbox(label="prompt 1")
        text12 = gr.Textbox(label="prompt 1")
        
        file_type = gr.Radio(choices=["Local File", "Remote File"], value="Local File", interactive=True)
        file_type.change(fn=chg_file_type, inputs=None, outputs=text11)
        
    text21 = gr.HTML("<p>this is some text</p>")
        
    with gr.Row():
        with gr.Column():
            text1 = gr.Textbox(label="prompt 1")
            text2 = gr.Textbox(label="prompt 2")
        with gr.Column():
            img1 = gr.Image("https://upload.wikimedia.org/wikipedia/commons/thumb/b/b6/Image_created_with_a_mobile_phone.png/800px-Image_created_with_a_mobile_phone.png")
            btn = gr.Button("Go").style(full_width=True)
demo.launch()

Running on local URL:  http://127.0.0.1:7881/

To create a public link, set `share=True` in `launch()`.


(<gradio.routes.App at 0x2112c82e1a0>, 'http://127.0.0.1:7881/', None)

In [32]:
import gradio as gr

def change_textbox(choice):
    if choice == "short":
        return gr.update(lines=2, visible=True, value="Short story: ")
    elif choice == "long":
        return gr.update(lines=8, visible=True, value="Long story...")
    else:
        return gr.update(visible=False)

with gr.Blocks() as demo:
    gr.Markdown(
    """
    # Hello World!
    Start typing below to see the output.
    """)
    inp = gr.Textbox(placeholder="What is your name?")
    out = gr.Textbox()
    inp.change(welcome, inp, out)

demo.launch()


Thanks for being a Gradio user! If you have questions or feedback, please join our Discord server and chat with us: https://discord.gg/feTf9x3ZSB
Running on local URL:  http://127.0.0.1:7884/

To create a public link, set `share=True` in `launch()`.


(<gradio.routes.App at 0x2112c9e2140>, 'http://127.0.0.1:7884/', None)

In [79]:
# Blocks Example
counter = 1

class OtherTextbox(gr.Textbox):
    
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
    

def change_audio_source(val):
    global counter
    counter += 1
    if counter % 2 == 0:
        return [gr.Audio.update(visible=False), gr.Image.update(visible=True)]
    else:
        return [gr.Audio.update(visible=True), gr.Image.update(visible=False)]
    
def fn2(val):
    return val
    

import gradio as gr
with gr.Blocks() as demo:
    radio = gr.Radio([1, 2, 4], label="Set the value of the number")
    with gr.Box():
        audio_file = gr.Audio(interactive=True)
        mic_recording = gr.Audio(source="microphone", visible=False, interactive=True)
        
    number = gr.Textbox()
    #audio_wave = gr.Plot()
    
    
    radio.change(fn=change_audio_source, inputs=radio, outputs=[audio_file, mic_recording])
    for component in [audio_file, mic_recording]:
        getattr(component, 'change')(fn=fn2, inputs=component, outputs=number)
    
    
demo.launch()

Running on local URL:  http://127.0.0.1:7921/

To create a public link, set `share=True` in `launch()`.


(<gradio.routes.App at 0x211304a6da0>, 'http://127.0.0.1:7921/', None)