In [1]:
%matplotlib inline

# Importing the requires libraries
import os
import shutil

import speech_recognition as sr

from pydub import AudioSegment
from pydub.playback import play
from pydub.effects import normalize
from pydub.silence import split_on_silence

import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.tokenize import sent_tokenize

from punctuator import Punctuator

import spacy
from spacy.pipeline import EntityRuler

import pandas as pd
from pprint import pprint

# Import text classification packages
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.model_selection import train_test_split

#Global variables
PUNCTUATOR_MODULE = r'C:\Anaconda3\PUNCTUATOR_DATA_DIR\INTERSPEECH-T-BRNN.pcl'

# 04 Processing text transcribed from spoken language

In this chapter, you'll put everything you've learned together by building a speech processing proof of concept project for a technology company, Acme Studios. You'll start by transcribing customer support call phone call audio snippets to text. Then you'll perform sentiment analysis using NLTK, named entity recognition using spaCy and text classification using scikit-learn on the transcribed text.

## 04.01 Creating transcription helper functions

See the video.

**Examples from the video - Exploring audio**

In [2]:
# Check the folder of audio files
folder = "./"
os.listdir(folder)

['.ipynb_checkpoints',
 '00 Spoken Language Processing in Python.rtf',
 '01 Introduction to Spoken Language Processing with Python.pdf',
 '01 Introduction to Spoken Language Processing with Python.rtf',
 '01. Introduction to Spoken Language Processing with Python.ipynb',
 '01.01 Introduction to audio data in Python.mp4',
 '01.03 Converting sound wave bytes to integers.mp4',
 '01.08 Visualizing sound waves.mp4',
 '01_Introduction_to_Spoken_Language_Processing_with_Python.py',
 '02 Using the Python SpeechRecognition library.pdf',
 '02. Using the Python SpeechRecognition library.ipynb',
 '02. Using the Python SpeechRecognition library.rtf',
 '02.01 SpeechRecognition Python library.mp4',
 '02.05 Reading audio files with SpeechRecognition.mp4',
 '02.08 Dealing with different kinds of audio.mp4',
 '02_Using_the_Python_SpeechRecognition_library.py',
 '03 Manipulating Audio Files with PyDub.pdf',
 '03. Manipulating Audio Files with PyDub.ipynb',
 '03. Manipulating Audio Files with PyDub.rtf',


**For the following examples, we use <code>ex4_call_1_stereo_mp3.mp3</code> audio file**

In [3]:
file = "acme_studios_audio/ex4_call_1_stereo_mp3.mp3"
wav_file = "acme_studios_audio/ex4_call_1_stereo_mp3.wav"

# Import audio file
audio = AudioSegment.from_file(file)
play(audio)

**Creating a format conversion function**

In [4]:
# Create function to convert audio file to wav
def convert_to_wav(filename):
    "Takes an audio file of non .wav format and converts to .wav"
    
    # Import audio file
    audio = AudioSegment.from_file(filename)
    
    # Increase the volume by 10 dB
    audio = audio + 10
    
    # Improving the quality
    audio = normalize(audio)
    
    # Create new filename
    new_filename = filename.split(".")[0] + ".wav"
    
    # Export file as .wav
    audio.export(new_filename, format="wav")
    
    print(f"Converting {filename} to {new_filename}...")
    return new_filename

# Using the file format conversion function
_ = convert_to_wav(file)

Converting acme_studios_audio/ex4_call_1_stereo_mp3.mp3 to acme_studios_audio/ex4_call_1_stereo_mp3.wav...


**Creating an attribute showing function**

In [5]:
def show_pydub_stats(filename):
    "Returns different audio attributes related to an audio file."
    
    # Create AudioSegment instance
    audio_segment = AudioSegment.from_file(filename)
    
    # Print attributes
    print(f"Channels: {audio_segment.channels}")
    print(f"Sample width: {audio_segment.sample_width}")
    print(f"Frame rate (sample rate): {audio_segment.frame_rate}")
    print(f"Frame width: {audio_segment.frame_width}")
    print(f"Length (ms): {len(audio_segment)}")
    print(f"Frame count: {audio_segment.frame_count()}")
    
    return audio_segment
    
# Using the attribute showing function
_ = show_pydub_stats(wav_file)

Channels: 1
Sample width: 2
Frame rate (sample rate): 32000
Frame width: 2
Length (ms): 54888
Frame count: 1756416.0


**Creating a transcribe function**

In [6]:
# Create a function to transcribe audio
def transcribe_audio(filename, language, energy_threshold=300, duration=None, offset=None, show_all=None, noise=0):
    "Takes a .wav format audio file and transcribes it to text."
    
    # Setup a recognizer instance
    recognizer = sr.Recognizer()
    
    # Import the audio file and convert to audio data
    audio_file = sr.AudioFile(filename)
    with audio_file as source:
         # Adjust for ambient noise and record
        if noise>0:
            recognizer.adjust_for_ambient_noise(source, duration=noise)
            
        # Record the audio
        audio_data = recognizer.record(source,
                                       duration=duration, #Listen from the begining to duration value.
                                       offset=offset) #used to skip over a specific seconds at the start.
    # Set the energy threshold
    recognizer.energy_threshold = energy_threshold
    
    # Return the transcribed text
    return recognizer.recognize_google(audio_data, language=language, show_all=show_all)

# Using the transcribe function
transcribe_audio(wav_file, language='en-GB')

'hello welcome to Acme Studio support lawn my name is Daniel how can I best help you hi Daniel this is John'

In [85]:
# a function that splits the audio file into chunks and applies speech recognition
def transcribe_long_audio(file, language, 
                          energy_threshold=300, duration=None, offset=None, show_all=None, noise=0,
                          chunk_folder=r'acme_studios_audio\temp'):
    """
    Splitting the large audio file into chunks and apply speech recognition on each of these chunks
    """
    
    # create a speech recognition object
    recognizer = sr.Recognizer()

    # open the audio file using pydub
    audio_file = AudioSegment.from_file(file)  
    
    # split audio_file where silence is 700 miliseconds or more and get chunks
    chunks = split_on_silence(audio_file,
                              min_silence_len = 500, # experiment with this value for your target audio file
                              silence_thresh = audio_file.dBFS-14, # adjust this per requirement
                              keep_silence=500, # keep the silence for 1 second, adjustable as well
                             )
    
    # create a directory to store the audio chunks
    if os.path.isdir(chunk_folder):
        shutil.rmtree(chunk_folder)
    os.mkdir(chunk_folder)
    
    whole_text = ""
    for i, audio_chunk in enumerate(chunks, start=1):
        # export audio chunk and save it in the `folder_name` directory.
        chunk_filename = os.path.join(chunk_folder, f"chunk{i}.wav")
            
        audio_chunk.export(chunk_filename, format="wav")
            
        # recognize the chunk
        with sr.AudioFile(chunk_filename) as source:
            # Adjust for ambient noise and record
            if noise>0:
                recognizer.adjust_for_ambient_noise(source, duration=noise)
                
            # Record the audio
            audio_listened = recognizer.record(source,
                                               duration=duration, #Listen from the begining to duration value.
                                               offset=offset) #used to skip over a specific seconds at the start.
                
            # Set the energy threshold
            recognizer.energy_threshold = energy_threshold
    
            # try converting it to text
            try:
                text = recognizer.recognize_google(audio_listened, language=language)
            except sr.UnknownValueError as e:
                print(f"Error: {str(e)}")
            else:
                whole_text += f'{text} '
                
    # return the text for all chunks detected
    return whole_text

# Using the transcribe function
print(transcribe_long_audio(wav_file, language='en-US'))

PayPal just looking to place an order but before I receive I'm just wondering if this offer still stands 


## 04.02 Converting audio to the right format

Acme Studios have asked you to do a proof of concept to find out more about their audio files.

After exploring them briefly, you find there's a few calls but they're in the wrong file format for transcription.

As you'll be interacting with many audio files, you decide to begin by creating some helper functions.

The first one, __convert_to_wav(filename)__ takes a file path and uses __PyDub__ to convert it from a __non-wav__ format to __.wav__ format.

Once it's built, we'll use the function to convert Acme's first call (https://assets.datacamp.com/production/repositories/4637/datasets/83ef1650407e911a0f52f491068e3082661db743/ex4_call_1_stereo_mp3.mp3), __call_1.mp3__, from __.mp3__ format to __.wav__.

__PyDub__'s __AudioSegment__ class has already been imported. Remember, to work with non-wav files, you'll need __ffmpeg__.

**Instructions**

1. Import the filename parameter using AudioSegment's from_file().
2. Set the export format to "wav".
3. Pass the target audio file, call_1.mp3, to the function.

**Results:**<br>
<font color=darkgreen>The first function down! Beautiful. Now to convert any audio file to .wav format, you can pass the filename to convert_to_wav(). Creating functions like this at the start of your projects saves plenty of coding later on.</font>

In [8]:
file = "acme_studios_audio/ex4_call_1_stereo_formatted_mp3.mp3"

# Using the file format conversion function
_ = convert_to_wav(file)

Converting acme_studios_audio/ex4_call_1_stereo_formatted_mp3.mp3 to acme_studios_audio/ex4_call_1_stereo_formatted_mp3.wav...


## 04.03 Finding PyDub stats

You decide it'll be helpful to know the audio attributes of any given file easily. This will be especially helpful for finding out how many channels an audio file has or if the frame rate is adequate for transcription.

In this exercise, we'll create __show_pydub_stats()__ which takes a filename of an audio file as input. It then imports the audio as a __PyDub__ __AudioSegment__ instance and prints attributes such as number of channels, length and more.

It then returns the __AudioSegment__ instance so it can be used later on.

We'll use our function on the newly converted __.wav__ file (https://assets.datacamp.com/production/repositories/4637/datasets/43c5aff8c419d07f8cef70fdf40e4657b78b70be/ex4_call_1_stereo_formatted.wav), __call_1.wav__

__AudioSegment__ has already imported from __PyDub__.

**Instructions**

1. Create an AudioSegment instance called audio_segment by importing the filename parameter.
2. Print the number of channels using the channels attribute.
3. Return the audio_segment variable.
4. Test the function on "call_1.wav".

**Results:**<br>
<font color=darkgreen>Nice! Now you'll be able to find the PyDub attribute parameters of any audio file in one line! It seems call_1.wav has two channels, potentially they could be split using PyDubs's split_to_mono() and transcribed separately.</font>

In [9]:
wav_file = "acme_studios_audio/ex4_call_1_stereo_formatted_mp3.wav"

# Using the attribute showing function
_ = show_pydub_stats(wav_file)

Channels: 2
Sample width: 2
Frame rate (sample rate): 32000
Frame width: 4
Length (ms): 54888
Frame count: 1756416.0


## 04.04 Transcribing audio with one line

Alright, now you've got functions to convert audio files and find out their attributes, it's time to build one to transcribe them.

In this exercise, you'll build __transcribe_audio()__ which takes a __filename__ as input, imports the __filename__ using __speech_recognition__'s __AudioFile__ class and then transcribes it using __recognize_google()__.

You've seen these functions before but now we'll put them together so they're accessible in a function.

To test it out, we'll transcribe Acme's first call (https://assets.datacamp.com/production/repositories/4637/datasets/43c5aff8c419d07f8cef70fdf40e4657b78b70be/ex4_call_1_stereo_formatted.wav), "__call_1.wav__".

__speech_recognition__ has been imported as __sr__.

**Instructions**

1. Define a function called transcribe_audio which takes filename as an input parameter.
2. Setup a Recognizer() instance as recognizer.
3. Use recognize_google() to transcribe the audio data.
4. Pass the target call to the function.

**Results:**<br>
<font color=darkgreen>Massive! You'll notice the recognizer didn't transcribe the words 'fast as' adequately on the last line, starring them out as a potential expletive, this is a reminder speech recognition still isn't perfect. But now you've now got a function which can transcribe the audio of a .wav file with one line of code. They're a bit of effort to setup but once you've got them, helper functions like transcribe_audio() save time and prevent errors later on.</font>

In [10]:
wav_file = "acme_studios_audio/ex4_call_1_stereo_formatted_mp3.wav"

# Using the transcribe function
print(transcribe_long_audio(wav_file, language='en-US'))

hello welcome to Acme Studio support lawn my name is Daniel how can I best help you hey Daniel this is John I've recently bought a small fire from you last 3 weeks ago and I'm already having issues with it oh no that's not good to hear John let's let's get your serial number and then we can we can set up a way to fix it for you just one second and grab my serial number it is full 175 7 I'm very displeased how long do you reckon this is going to take Abby on hold for about an hour now well John we're going to try out back there I will get the steel number will start up a support case we're just really really really really just ways of this talk. I've been trying to contact support pause three four days now and has been coronavirus in the morning and then Erin Haas I'm not really happy I kinda want to get this issue fix this possible 


**Result of the ex. in Datacamp platform**
>hello welcome to Acme studio support line my name is Daniel how can I best help you hey Daniel this is John I've recently bought a smart from you guys 3 weeks ago and I'm already having issues with it I know that's not good to hear John let's let's get your cell number and then we can we can set up a way to fix it for you one number for 17 varies how long do you reckon this is going to try our best to get the steel number will start up this support case I'm just really really really really I've been trying to contact past three 4 days now and I've been put on hold more than an hour and a half so I'm not really happy I kind of wanna get this issue 6 is f***** possible

## 04.05 Using the helper functions you've built

Okay, now we've got some helper functions ready to go, it's time to put them to use!

You'll first use __convert_to_wav()__ to convert Acme's __call_1.mp3__ to __.wav__ format and save it as __call_1.wav__

Using __show_pydub_stats()__ you find __call_1.wav__ has 2 channels so you decide to split them using PyDub's __split_to_mono()__. Acme tells you the customer channel is likely channel 2. So you export channel 2 using __PyDub__'s __.export()__.

Finally, you'll use __transcribe_audio()__ to transcribe channel 2 only.

**Instructions**
1. Convert the .mp3 version of call_1 to .wav and then check the stats of the .wav version.
2. Split call_1 to mono and then export the second channel in .wav format.
3. Transcribe the audio of call 1's channel 2.

**Results:**<br>
<font color=darkgreen>Look at that! Thanks to the helper functions we implemented earlier, you converted an audio file, check its stats, split it into separate channels and transcribed it all within a few lines of code! Well done. Now we've got some ways to turn our audio files into text, let's use some natural language processing to find out more.</font>

In [11]:
file = "acme_studios_audio/ex4_call_1_stereo_formatted_mp3.mp3"

# Convert mp3 file to wav
print('Converting to wav format...')
wav_file = convert_to_wav(file)

# Check the stats of new file
print('\nChecking its stats...')
call_1 = show_pydub_stats(wav_file)



# Split call_1 to mono
print('\nGetting the client audio (channnel 2)...')
call_1_split = call_1.split_to_mono()

# Export channel 2 (the customer channel)
file_channel2 = file.split('.')[0] + '_channel_2.wav'
call_1_split[1].export(file_channel2, format='wav')



# Transcribe the single channel
print('\nTranscribing the client audio...')
print(transcribe_long_audio(file_channel2, language='en-US'))

Converting to wav format...
Converting acme_studios_audio/ex4_call_1_stereo_formatted_mp3.mp3 to acme_studios_audio/ex4_call_1_stereo_formatted_mp3.wav...

Checking its stats...
Channels: 2
Sample width: 2
Frame rate (sample rate): 32000
Frame width: 4
Length (ms): 54888
Frame count: 1756416.0

Getting the client audio (channnel 2)...

Transcribing the client audio...
hey Daniel this is John I've recently bought a small fire from you last 3 weeks ago and I'm already having issues with it just one second and grab my serial number it is full 175 7 I'm very displeased how long do you reckon this is going to take Abby on hold for about an hour now we're just really really really really just ways of this talk time I've been trying to contact the pool at Cintas pause three four days now and I've been coronavirus in the morning and then Aaron Haas I'm not really happy I kind of want to get this issue fix this possible 


## 04.06 Sentiment analysis on spoken language text

See the video.

**Installing sentiment analysis libraries**

In [12]:
nltk.download("punkt")
nltk.download("vader_lexicon") #Sentimental analysis in English only.

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\jaces\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\jaces\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

>[nltk_data] Downloading package punkt to <br>
>[nltk_data]     C:\Users\jaces\AppData\Roaming\nltk_data...<br>
>[nltk_data]   Package punkt is already up-to-date!<br>
>[nltk_data] Downloading package vader_lexicon to<br>
>[nltk_data]     C:\Users\jaces\AppData\Roaming\nltk_data...

**Sentiment analysis with VADER**

In [13]:
# Create sentiment analysis instance
sid = SentimentIntensityAnalyzer()

# Test sentiment analysis on negative text
print(sid.polarity_scores("This customer service is terrible."))
#the compound value can be thought of as the overall score with -1 being most negative and positive 1 being most positive.

{'neg': 0.437, 'neu': 0.563, 'pos': 0.0, 'compound': -0.4767}


**Sentiment analysis on transcribed text**

In [14]:
# Transcribe customer channel of call_3
call_3_channel_2_text = transcribe_long_audio(file_channel2, language='en-US')
print(call_3_channel_2_text)

hey Daniel this is John I've recently bought a small fire from you last 3 weeks ago and I'm already having issues with it just one second and grab my serial number it is full 175 7 I'm very displeased how long do you reckon this is going to take Abby on hold for about an hour now we're just really really really really just ways of this talk time I've been trying to contact the pool at Cintas pause three four days now and I've been coronavirus in the morning and then Aaron Haas I'm not really happy I kind of want to get this issue fix this possible 


In [15]:
# Sentiment analysis on customer channel of call_3
sid.polarity_scores(call_3_channel_2_text)

{'neg': 0.078, 'neu': 0.899, 'pos': 0.023, 'compound': -0.8024}

**Add punctuation to the text (English only)**

In [16]:
# Trying with puntuator module
p = Punctuator(PUNCTUATOR_MODULE)
print(p.punctuate('some text'))

Some text.


In [17]:
p = Punctuator(PUNCTUATOR_MODULE)
call_3_channel_2_text_with_punctuation = p.punctuate(call_3_channel_2_text)
print(call_3_channel_2_text_with_punctuation)

Hey, Daniel, this is John I've recently, bought a small fire from you last 3 weeks ago and I'm already having issues with it just one second and grab my serial number. It is full 175 7, I'm, very displeased. How long do you reckon this is going to take Abby on hold for about an hour. Now, we're, just really really really really just ways of this talk time, I've been trying to contact the pool at Cintas pause, three four days now and I've been coronavirus in the morning and then Aaron Haas I'm, not really happy I kind of want to get this issue. Fix this possible.


**Sentiment analysis Sentence by sentence**

In [18]:
# Create sentiment analysis instance
sid = SentimentIntensityAnalyzer()

# Find sentiment on each sentence
for sentence in sent_tokenize(call_3_channel_2_text_with_punctuation):
    print(sentence)
    print(sid.polarity_scores(sentence))

Hey, Daniel, this is John I've recently, bought a small fire from you last 3 weeks ago and I'm already having issues with it just one second and grab my serial number.
{'neg': 0.076, 'neu': 0.883, 'pos': 0.041, 'compound': -0.2732}
It is full 175 7, I'm, very displeased.
{'neg': 0.313, 'neu': 0.687, 'pos': 0.0, 'compound': -0.4927}
How long do you reckon this is going to take Abby on hold for about an hour.
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
Now, we're, just really really really really just ways of this talk time, I've been trying to contact the pool at Cintas pause, three four days now and I've been coronavirus in the morning and then Aaron Haas I'm, not really happy I kind of want to get this issue.
{'neg': 0.062, 'neu': 0.912, 'pos': 0.025, 'compound': -0.4432}
Fix this possible.
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}


## 04.07 Analyzing sentiment of a phone call

Once you've transcribed the text from an audio file, it's possible to perform natural language processing on the text.

In this exercise, we'll use __NLTK__'s VADER (Valence Aware Dictionary and sEntiment Reasoner) to analyze the sentiment of the transcribed text of __call_2.wav__ ().

To transcribe the text, we'll use the __transcribe_audio()__ function we created earlier.

Once we have the text, we'll use __NLTK__'s __SentimentIntensityAnalyzer()__ class to obtain a sentiment polarity score.

__.polarity_scores(text)__ returns a value for pos (positive), neu (neutral), neg (negative) and compound. Compound is a mixture of the other three values. The higher it is, the more positive the text. Lower means more negative.

**Instructions**

1. Instantiate an instance of SentimentIntensityAnalyzer() and save it to the variable sid.
2. Transcribe the target call and save it to call_2_text.
3. Print the polarity_scores() of call_2_text.

**Results:**<br>
<font color=darkgreen>Consider it analyzed! Reading back the transcribed text and listening to the phone call, a compound score of close to 1 (more positive) makes sense since the customer states they're very happy and enjoying their device. Let's keep going!</font>

In [19]:
file = "acme_studios_audio/ex4_call_2_stereo_native.wav"


# Check the stats of new file
print('Checking its stats...')
audio = show_pydub_stats(file)


# Play audio file
print('\nPlaying the audio file...')
play(audio)


# Transcribe the audio
print('\nTranscribing the audio...')
file_text = transcribe_long_audio(file, language='en-US')
print(file_text)


# Create SentimentIntensityAnalyzer instance
sid = SentimentIntensityAnalyzer()

Checking its stats...
Channels: 1
Sample width: 2
Frame rate (sample rate): 32000
Frame width: 2
Length (ms): 52756
Frame count: 1688192.0

Playing the audio file...

Transcribing the audio...
Error: 
Error: 
hello my name is Daniel thank you for calling Acme Studios how can I best help you hi Daniel my name is belly smartfind from you guys and extremely happy with it I'll just go to East nobody street but I'm just what someone a little bit more about the message prank I have but I'm finding it had I thought you on the corner of Edward and Elizabeth according to Google according to the maps but some would you be able to help me in some way because I think actually walk straight past your shop yeah sure thing well thank you Sally that's good to hear you're enjoying it let me let me find out where the nearest stories for you 


In [20]:
# Display sentiment polarity scores at once
print(sid.polarity_scores(file_text))

{'neg': 0.0, 'neu': 0.753, 'pos': 0.247, 'compound': 0.9858}


In [21]:
# Add punctuation to the text
p = Punctuator(PUNCTUATOR_MODULE)
file_text_with_punctuation = p.punctuate(file_text)
print(file_text_with_punctuation)

Hello, my name is Daniel. Thank you for calling, Acme Studios. How can I best help you? Hi Daniel. My name is belly smartfind from you guys and extremely happy with it I'll just go to East, nobody street, but I'm, just what someone a little bit more about the message prank I have but I'm finding it had I thought you on the corner of Edward and Elizabeth, according to Google, according to the maps, but some would you be able to help me in some way because I think actually walk straight past your shop. Yeah, sure thing. Well, thank you Sally that's, good to hear you're, enjoying it. Let me let me find out where the nearest stories for you.


In [22]:
# Sentiment analysis Sentence by sentence
# Find sentiment on each sentence
for sentence in sent_tokenize(file_text_with_punctuation):
    print(sentence)
    print(sid.polarity_scores(sentence))

Hello, my name is Daniel.
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
Thank you for calling, Acme Studios.
{'neg': 0.0, 'neu': 0.667, 'pos': 0.333, 'compound': 0.3612}
How can I best help you?
{'neg': 0.0, 'neu': 0.303, 'pos': 0.697, 'compound': 0.7845}
Hi Daniel.
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
My name is belly smartfind from you guys and extremely happy with it I'll just go to East, nobody street, but I'm, just what someone a little bit more about the message prank I have but I'm finding it had I thought you on the corner of Edward and Elizabeth, according to Google, according to the maps, but some would you be able to help me in some way because I think actually walk straight past your shop.
{'neg': 0.0, 'neu': 0.894, 'pos': 0.106, 'compound': 0.8124}
Yeah, sure thing.
{'neg': 0.0, 'neu': 0.182, 'pos': 0.818, 'compound': 0.5423}
Well, thank you Sally that's, good to hear you're, enjoying it.
{'neg': 0.0, 'neu': 0.391, 'pos': 0.609, 'compound': 0.872}


## 04.08 Sentiment analysis on formatted text

In this exercise, you'll calculate the sentiment on the customer channel of __call_2.wav__.

You've split the customer channel and saved it to __call_2_channel_2.wav__.

But from your experience with sentiment analysis, you know it can change sentence to sentence.

To calculate it sentence to sentence, you split the split using __NLTK__'s __sent_tokenize()__ module.

But __transcribe_audio()__ doesn't return sentences. To try sentiment anaylsis with sentences, you've tried a paid API service to get __call_2_channel_2_paid_api_text__ which has sentences.

**Instructions**
1. Transcribe the audio of call_2_channel_2.wav and find the sentiment scores.
2. Split call_2_channel_2_text into sentences and find the sentiment score of each sentence.
3. Split call_2_channel_2_paid_api_text into sentences and score the sentiment of each.

**Results:**<br>
<font color=darkgreen>That's pretty cool, you can see how the sentiment differs from sentence to sentence in the call 2 channel 2 paid API text. An extension could be to dig deeper into each of the sentences which have the lowest scores. Let's push on!</font>

In [43]:
def split_to_mono(filename, sel_channel):
    'Split audio and return specified channel'
    # Create AudioSegment instance
    audio_segment = AudioSegment.from_file(filename)
    # Verify the number of channels
    number_of_channels = audio_segment.channels
    if sel_channel < number_of_channels:
        # Split call_1 to mono
        audio_channels = audio_segment.split_to_mono()
        # Export channel 2 (the customer channel)
        file_sel_channel = filename.split('.')[0] + '_channel_2.wav'
        audio_channels[sel_channel].export(file_sel_channel, format='wav')
    else:
        print(f'Error: unmatched audio file, selected channel: {sel_channel+1}°, ' + \
              f'number of channels in the audio file: {number_of_channels}.')
        file_sel_channel = None
    return file_sel_channel

In [47]:
# Split call to mono
print('\nGetting the client audio (channnel 2)...')
wav_file = r"acme_studios_audio\ex4_call_2_stereo_native.wav"
file_one_channel = split_to_mono(wav_file, 1)


Getting the client audio (channnel 2)...
Error: unmatched audio file, selected channel: 2°, number of channels in the audio file: 1.


In [48]:
file_one_channel = r"acme_studios_audio\ex4_call_2_stereo_native_client.wav"

In [49]:
# Transcribe customer channel of call 2
file_one_channel_text = transcribe_long_audio(file_one_channel, language='en-US')
print(file_one_channel_text)

hi Daniel my name is belly recently purchased a smartphone from you guys and extremely happy with it I'm just going to issue nobody street but I'll just go someone a little bit more about the message prank I have Google location but I'm finding it I thought you on the corner of Edward and Elizabeth according to Google according to the maps but some would you be able to help me in some why because I think actually walk straight past your shop 


In [50]:
# Create SentimentIntensityAnalyzer instance
sid = SentimentIntensityAnalyzer()

# Display text and sentiment polarity scores
print(sid.polarity_scores(file_one_channel_text))

{'neg': 0.0, 'neu': 0.901, 'pos': 0.099, 'compound': 0.8124}


In [51]:
# Add punctuation marks
p = Punctuator(PUNCTUATOR_MODULE)
file_one_channel_text_with_punctuation = p.punctuate(file_one_channel_text)
print(file_one_channel_text_with_punctuation)

Hi Daniel, my name is belly recently purchased a smartphone from you guys and extremely happy with it I'm just going to issue, nobody street. But I'll just go someone a little bit more about the message. Prank I have Google location, but I'm finding it I thought you on the corner of Edward and Elizabeth, according to Google, according to the maps, but some would you be able to help me in some why? Because I think actually walk straight past your shop.


In [52]:
# Find sentiment on each sentence
for sentence in sent_tokenize(file_one_channel_text_with_punctuation):
    print(sentence)
    print(sid.polarity_scores(sentence))

Hi Daniel, my name is belly recently purchased a smartphone from you guys and extremely happy with it I'm just going to issue, nobody street.
{'neg': 0.0, 'neu': 0.852, 'pos': 0.148, 'compound': 0.6115}
But I'll just go someone a little bit more about the message.
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
Prank I have Google location, but I'm finding it I thought you on the corner of Edward and Elizabeth, according to Google, according to the maps, but some would you be able to help me in some why?
{'neg': 0.0, 'neu': 0.908, 'pos': 0.092, 'compound': 0.5499}
Because I think actually walk straight past your shop.
{'neg': 0.0, 'neu': 0.787, 'pos': 0.213, 'compound': 0.2263}


## 04.09 Named entity recognition on transcribed text

See the video.

**Examples from the video - Using spaCy**

In [56]:
# Load spaCy language model
nlp = spacy.load("en_core_web_sm")

In [57]:
# Create a spaCy doc
doc = nlp("I'd like to talk about a smartphone I ordered on July 31st from your " + \
          "Sydney store, my order number is 40939440. I spoke to Georgia about it last week.")
          
# Show different tokens and positions
for token in doc:
    print(token.text, token.idx)

I 0
'd 1
like 4
to 9
talk 12
about 17
a 23
smartphone 25
I 36
ordered 38
on 46
July 49
31st 54
from 59
your 64
Sydney 69
store 76
, 81
my 83
order 86
number 92
is 99
40939440 102
. 110
I 112
spoke 114
to 120
Georgia 123
about 131
it 137
last 140
week 145
. 149


In [59]:
# Show sentences in doc
for sentences in doc.sents:
    print(sentences)

I'd like to talk about a smartphone I ordered on July 31st from your Sydney store, my order number is 40939440.
I spoke to Georgia about it last week.


In [61]:
# Find named entities in doc
for entity in doc.ents:
    print(entity.text, entity.label_)

July 31st DATE
Sydney GPE
40939440 DATE
Georgia GPE
last week DATE


**Custom named entities**

In [62]:
# Check spaCy pipeline
print(nlp.pipeline)

[('tagger', <spacy.pipeline.pipes.Tagger object at 0x000001A3005A1DC0>), ('parser', <spacy.pipeline.pipes.DependencyParser object at 0x000001A300545F40>), ('ner', <spacy.pipeline.pipes.EntityRecognizer object at 0x000001A30090FD60>)]


In [63]:
# Create EntityRuler instance
ruler = EntityRuler(nlp)

# Add token pattern to ruler
ruler.add_patterns([{"label":"PRODUCT", "pattern": "smartphone"}])

# Add new rule to pipeline before ner
nlp.add_pipe(ruler, before="ner")

# Check updated pipeline
nlp.pipeline

[('tagger', <spacy.pipeline.pipes.Tagger at 0x1a3005a1dc0>),
 ('parser', <spacy.pipeline.pipes.DependencyParser at 0x1a300545f40>),
 ('entity_ruler', <spacy.pipeline.entityruler.EntityRuler at 0x1a30378d790>),
 ('ner', <spacy.pipeline.pipes.EntityRecognizer at 0x1a30090fd60>)]

In [65]:
doc = nlp("I'd like to talk about a smartphone I ordered on July 31st from your " + \
          "Sydney store, my order number is 40939440. I spoke to Georgia about it last week.")
          
# Test new entity rule
for entity in doc.ents:
    print(entity.text, entity.label_)

smartphone PRODUCT
July 31st DATE
Sydney GPE
40939440 DATE
Georgia GPE
last week DATE


## 04.10 Named entity recognition in spaCy

Named entities are real-world objects which have names, such as, cities, people, dates or times. We can use __spaCy__ to find named entities in our transcribed text.

In this exercise, you'll transcribe __call_4_channel_2.wav__ (https://assets.datacamp.com/production/repositories/4637/datasets/2e039462d95117677db6ddfe24377d9cadcdf730/ex4_call_4_channel_2_formatted.wav) using __transcribe_audio()__ and then use __spaCy__'s language model, __en_core_web_sm__ to convert the transcribed text to a __spaCy__ doc.

Transforming text to a __spaCy__ doc allows us to leverage __spaCy__'s built-in features for analyzing text, such as, __.text__ for tokens (single words), __.sents__ for sentences and __.ents__ for named entities.

**Instructions**
1. Create a spaCy doc by passing the transcribed call 4 channel 2 text to nlp() and then check its type.
2. Create a spaCy doc with call_4_channel_2_text then print all the token text in it using the .text attribute.
3. Load the "en_core_web_sm" language model and then print the sentences in the doc using the .sents attribute.
4. Access the entities in the doc using .ents and then print the text of each.

**Results:**<br>
<font color=darkgreen>Well done! You've now seen some of spaCy's helpful functions for analyzing text. spaCy's built-in named entities are great to start with but sometimes you'll want to use your own. Let's see how!</font>

In [66]:
file = "acme_studios_audio/ex4_call_4_channel_2_formatted.wav"

# Transcribe the audio
print('\nTranscribing the audio...')
file_text = transcribe_long_audio(file, language='en-US')
print(file_text)


Transcribing the audio...
hi Daniel my name is an and I've recently just purchased a smart front buying from you and I'm very happy with the product I'd like to order another one from my friends he lives in Sydney and have it delivered I'm pretty sure it's model 315 I can check that for you and I'll give you more details if you would like to take my details and I I will also give you the address thank you excellent 


In [69]:
# Create a spaCy language model instance
nlp = spacy.load("en_core_web_sm")

# Create a spaCy doc with call 4 channel 2 text
doc = nlp(file_text)

# Check the type of doc
print('Type:', type(doc))

# Show tokens in doc
print('\nTokens:')
for token in doc:
    print(token.text, token.idx)

# Show sentences in doc
print('\nSentences:')
for i, sentence in enumerate(doc.sents, start=1):
    print(f'({i}) {sentence}')

# Show named entities and their labels
print('\nEntities:')
for entity in doc.ents:
    print(entity.text, entity.label_)

Type: <class 'spacy.tokens.doc.Doc'>

Tokens:
hi 0
Daniel 3
my 10
name 13
is 18
an 21
and 24
I 28
've 29
recently 33
just 42
purchased 47
a 57
smart 59
front 65
buying 71
from 78
you 83
and 87
I 91
'm 92
very 95
happy 100
with 106
the 111
product 115
I 123
'd 124
like 127
to 132
order 135
another 141
one 149
from 153
my 158
friends 161
he 169
lives 172
in 178
Sydney 181
and 188
have 192
it 197
delivered 200
I 210
'm 211
pretty 214
sure 221
it 226
's 228
model 231
315 237
I 241
can 243
check 247
that 253
for 258
you 262
and 266
I 270
'll 271
give 275
you 280
more 284
details 289
if 297
you 300
would 304
like 310
to 315
take 318
my 323
details 326
and 334
I 338
I 340
will 342
also 347
give 352
you 357
the 361
address 365
thank 373
you 379
excellent 383

Sentences:
(1) hi Daniel my name is an
(2) and I've recently just purchased a smart front buying from you
(3) and I'm very happy with the product I'd like to order another one from my friends
(4) he lives in Sydney and have it delivered
(

## 04.11 Creating a custom named entity in spaCy

If __spaCy__'s built-in named entities aren't enough, you can make your own using __spaCy__'s __EntityRuler()__ class.

__EntityRuler()__ allows you to create your own entities to add to a __spaCy__ pipeline.

You start by creating an instance of __EntityRuler()__ and passing it the current pipeline, __nlp__.

You can then call __add_patterns()__ on the instance and pass it a dictionary of the text ___pattern__ you'd like to label with an entity.

Once you've setup a pattern you can add it the __nlp__ pipeline using __add_pipe()__.

Since Acme is a technology company, you decide to tag the pattern __"smartphone"__ with the __"PRODUCT"__ entity tag.

spaCy has been imported and a doc already exists containing the transcribed text from __call_4_channel_2.wav__.

**Instructions**

1. Import EntityRuler from spacy.pipeline.
2. Add "smartphone" as the value for the "pattern" key.
3. Add the EntityRuler() instance, ruler, to the nlp pipeline.
4. Print the entity attributes contained in doc.

**Results:**<br>
<font color=darkgreen>There we go! With custom entities like this, you can start to get even more information out of your transcribed text. Depending on the problem you're working with, you may want to combine a few different patterns together. Let's keep going.</font>

In [70]:
# Create EntityRuler instance
ruler = EntityRuler(nlp)

# Add token pattern to ruler
ruler.add_patterns([{"label":"PRODUCT", "pattern": "smart"}])

# Add new rule to pipeline before ner
nlp.add_pipe(ruler, before="ner")
print('The new pipeline:')
print(nlp.pipeline)

# Custom named entities
print('\nApplying the new pipeline...')
doc = nlp(file_text)
          
# Test new entity rule
for entity in doc.ents:
    print(entity.text, entity.label_)

The new pipeline:
[('tagger', <spacy.pipeline.pipes.Tagger object at 0x000001A303FCCDF0>), ('parser', <spacy.pipeline.pipes.DependencyParser object at 0x000001A30610E040>), ('entity_ruler', <spacy.pipeline.entityruler.EntityRuler object at 0x000001A3023FFE50>), ('ner', <spacy.pipeline.pipes.EntityRecognizer object at 0x000001A30610E9A0>)]

Applying the new pipeline...
Daniel PERSON
smart PRODUCT
Sydney GPE
315 CARDINAL


## 04.12 Classifying transcribed speech with Sklearn

See the video.

**Examples from the video - Converting to wav**

In [73]:
# Inspect post purchase audio folder
print('Inspecting the data...')
folder = 'purchases_audio/'

purchase_audio = os.listdir(folder)
print(purchase_audio)


# Loop through mp3 files
print('Converting to wav')
for file in purchase_audio:
    if not file.endswith(".wav"):
        # Use previously made function to convert to .wav
        wav_file = convert_to_wav(folder + file)
    

Inspecting the data...
['post_purchases_01.mp3', 'post_purchases_02.mp3', 'pre_purchases_03.mp3', 'pre_purchases_04.mp3']
Converting to wav
Converting purchases_audio/post_purchases_01.mp3 to purchases_audio/post_purchases_01.wav...
Converting purchases_audio/post_purchases_02.mp3 to purchases_audio/post_purchases_02.wav...
Converting purchases_audio/pre_purchases_03.mp3 to purchases_audio/pre_purchases_03.wav...
Converting purchases_audio/pre_purchases_04.mp3 to purchases_audio/pre_purchases_04.wav...


**Transcribing all phone call excerpts**

In [88]:
# Transcribe text from wav files
def create_text_list(folder):
    text_list = []
    p = Punctuator(PUNCTUATOR_MODULE)
    # Loop through folder
    for file in os.listdir(folder):
        # Check for .wav extension
        if file.endswith(".wav"):
            # Transcribe audio
            text = transcribe_long_audio(folder + file, language='en-US')
            # Add punctuation marks
            text = p.punctuate(text)
            # Add transcribed text to list
            text_list.append(text)
    return text_list

purchase_text = create_text_list(folder)
print(purchase_text)

["How's it going Arthur I just placed an order with you guys and I accidentally sent it to the wrong address? Can you please help me change this.", "Yeah, hello. I'm just wondering if I can speak to someone about an order I received yesterday.", 'Hi David, I just placed an order online and I was wondering if I could make an alteration to that order before you send it off.', "Paypal just looking to place an order, but before I, receive I'm just wondering if this offer still stands."]


**Organizing transcribed text**

In [89]:
# Create post purchase dataframe
df = pd.read_csv('customer_call_transcriptions.csv')
print(df)

             label                                               text
0     pre_purchase  how's it going Arthur I just placed an order w...
1    post_purchase  yeah hello I'm just wondering if I can speak t...
2    post_purchase  hey I receive my order but it's the wrong size...
3     pre_purchase  hi David I just placed an order online and I w...
4    post_purchase  hey I bought something from your website the o...
..             ...                                                ...
97   post_purchase  yeah hello I'm just wondering if I can speak t...
98    pre_purchase  hi I recently ordered a new phone and I'm just...
99    pre_purchase  just looking to get some more information on t...
100   pre_purchase  hi I just realised I ordered the wrong compute...
101  post_purchase  hey mate how you doing I'm just calling in reg...

[102 rows x 2 columns]


**Building a text classifier**

In [96]:
# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(df["text"], df["label"], test_size=0.3, random_state=42)

# Create text classifier pipeline
text_classifier = Pipeline([("vectorizer", CountVectorizer()),
                            ("tfidf", TfidfTransformer()),
                            ("classifier", MultinomialNB())
                            ])

# Fit the classifier pipeline on the training data
text_classifier.fit(X_train, y_train)

# Make predictions and compare them to test labels
predictions = text_classifier.predict(X_test)
accuracy = 100 * np.mean(predictions == y_test)
print(f"The model is {accuracy:.2f}% accurate.")

The model is 96.77% accurate.


## 04.13 Preparing audio files for text classification

Acme are very impressed with your work so far. So they've sent over two more folders of audio files.

One folder is called __pre_purchase__ and contains audio snippets from customers who are pre-purchase, like __pre_purchase_audio_25.mp3__.

And the other is called __post_purchase__ and contains audio snippets from customers who have made a purchase (post-purchase), like __post_purchase_audio_27.mp3__.

Upon inspecting the files you find there's about 50 in each and they're in the __.mp3__ format.

Acme want to know if you can build a classifier to classify future calls. You tell them you sure can.

So in this exercise, you'll go through each folder and convert the audio files to __.wav__ format using __convert_to_wav()__ so you can transcribe them.

**Instructions**

1. Convert the files in pre_purchase to .wav using convert_to_wav().
2. Convert the files in post_purchase to .wav using convert_to_wav().

**Results:**<br>
<font color=darkgreen>Excellent! Now all of the audio files are in .wav format, let's transcribe them.</font>

In [103]:
# All file are already converted before
folder = 'purchases_audio/'
post_purchase_audio = os.listdir(folder)

for file in post_purchase_audio:
    if file.endswith(".wav"):
        print(file)

post-purchase-audio-27.wav
post_purchases_01.wav
post_purchases_02.wav
pre-purchase-audio-25.wav
pre_purchases_03.wav
pre_purchases_04.wav


## 04.14 Transcribing phone call excerpts

In this exercise, we'll transcribe the audio files we converted to __.wav__ format to text using __transcribe_audio()__.

Since there's lots of them and there could be more, we'll build a function __create_test_list()__ which takes a list of filenames of audio files as input and goes through each file transcribing the text.

__create_test_list()__ uses our __transcribe_audio()__ function we created earlier and returns a list of strings containing the transcribed text from each audio file.

__pre_purchase_wav_files__ and __post_purchase_wav_files__ are lists of audio snippet filenames.

**Instructions**

1. Use transcribe_audio() to transcribe the current file to text and add it to the text list.
2. Return the text list.
3. Use create_text_list() to transcribe all post and pre purchase audio snippets.
4. Check the first transcription of the post purchase text list.

**Results:**<br>
<font color=darkgreen>Nice job! We've now got two lists of transcribed audio snippets we can use to start building a text classifier. Let's organize our text data a little bit with a dataframe.</font>

In [105]:
pprint(purchase_text)

["How's it going Arthur I just placed an order with you guys and I "
 'accidentally sent it to the wrong address? Can you please help me change '
 'this.',
 "Yeah, hello. I'm just wondering if I can speak to someone about an order I "
 'received yesterday.',
 'Hi David, I just placed an order online and I was wondering if I could make '
 'an alteration to that order before you send it off.',
 "Paypal just looking to place an order, but before I, receive I'm just "
 'wondering if this offer still stands.']


## 04.15 Organizing transcribed phone call data

We're almost ready to build a text classifier. But right now, all of our transcribed text data is in two lists, __pre_purchase_text__ and __post_purchase_text__.

To organize it better for building a text classifier as well as for future use, we'll put it together into a pandas DataFrame.

To start we'll import __pandas__ as __pd__ then we'll create a post purchase dataframe, __post_purchase_df__ using __pd.DataFrame()__.

We'll pass __pd.DataFrame()__ a dictionary containing a __"label"__ key with a value of __"post_purchase"__ and a __"text"__ key with a value of our __post_purchase_text__ list.

We'll do the same for __pre_purchase_df__ except with __pre_purchase_text__.

To have all the data in one place, we'll use __pd.concat()__ and pass it the pre and post purchase DataFrames.

**Instructions**

1. Create post_purchase_df using the post_purchase_text list.
2. Create pre_purchase_df using the pre_purchase_text list.
3. Combine the two DataFrames using pd.concat().

**Results:**<br>
<font color=darkgreen>Excellent! That was the final piece of the puzzle! Having your data in an organized format makes it easier to work with in the future. Let's go and build that text classifier.</font>

In [106]:
print(df)

             label                                               text
0     pre_purchase  how's it going Arthur I just placed an order w...
1    post_purchase  yeah hello I'm just wondering if I can speak t...
2    post_purchase  hey I receive my order but it's the wrong size...
3     pre_purchase  hi David I just placed an order online and I w...
4    post_purchase  hey I bought something from your website the o...
..             ...                                                ...
97   post_purchase  yeah hello I'm just wondering if I can speak t...
98    pre_purchase  hi I recently ordered a new phone and I'm just...
99    pre_purchase  just looking to get some more information on t...
100   pre_purchase  hi I just realised I ordered the wrong compute...
101  post_purchase  hey mate how you doing I'm just calling in reg...

[102 rows x 2 columns]


## 04.16 Create a spoken language text classifier
Now you've transcribed some customer call audio data, we'll build a model to classify whether the text from the customer call is __pre_purchase__ or __post_purchase__.

We've got 45 examples of __pre_purchase__ calls and 57 examples of __post_purchase calls__.

The data the model will train on is stored in __train_df__ and the data the model will predict on is stored in __test_df__.

Try printing the __.head()__ of each of these to the console.

We'll build an __sklearn pipeline__ using __CountVectorizer()__ and __TfidfTransformer()__ to convert our text samples to numbers and then use a __MultinomialNB()__ classifier to learn what category each sample belongs to.

This model will work well on our small example here but for larger amounts of text, you may want to consider something more sophisticated.

**Instructions**
1. Create text_classifier using CountVectorizer(), TfidfTransformer(), and MultinomialNB().
2. Fit text_classifier on train_df.text and train_df.label.
3. Create predicted by calling predict() on text_classifier and passing it the text column of test_df.
4. Evaluate the model by seeing how predicted compares to the test_df.label.

**Results:**<br>
<font color=darkgreen>Consider it classified! The model was able to classify our test examples with a high level of accuracy. For larger datasets, our pipeline is a good baseline but you might want to look into something like a language model. Now you can start capturing speech, converting it to text and classifying it into different categories. Massive effort!</font>

In [107]:
# Make predictions and compare them to test labels
predictions = text_classifier.predict(X_test)
accuracy = 100 * np.mean(predictions == y_test)
print(f"The model is {accuracy:.2f}% accurate.")

The model is 96.77% accurate.


# Aditional material

- **Online Voice Recorder & Audio Cutter**: https://voice-recorder-online.com/
- **For more details on available language models in "speech_recognition" python module**: https://cloud.google.com/speech-to-text/docs/languages
- **Datacamp course**: https://learn.datacamp.com/courses/spoken-language-processing-in-python
- To work with file different from .wav: http://ffmpeg.org/, ex. (in the shell): <code>ffmpeg -i test.mp3 test.wav</code> 
- Problem with __ffmpeg__ solved: https://www.programmersought.com/article/76562906865/
- Is Vader SentimentIntensityAnalyzer Multilingual? NO, https://stackoverflow.com/questions/45275166/is-vader-sentimentintensityanalyzer-multilingual
- sentiment-analysis-spanish 0.0.25 https://pypi.org/project/sentiment-analysis-spanish/
- Example of Google Cloud Speech API: https://stackoverflow.com/questions/62526560/python-speechrecognition-vs-google-cloud-speech-api
- Adding punctuation: https://pypi.org/project/punctuator/ (For english only)