# GA Users Workshop

Clean and play around with transcripts . Author: A.Pilko <A.Pilko@soton.ac.uk>

In [79]:
# Install deps
!pip3 install nltk pandas stopwordsiso matplotlib
import nltk
import re
import random
import pandas as pd
import matplotlib.pyplot as mpl
import stopwordsiso as swiso
from copy import deepcopy

%matplotlib notebook

# Why is this not default..
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Set a date otherwise conversion to datetime dtype reverts to current date
date_str = '2021-03-26 '



## Ingest

In [2]:
chat_token_split_regex = re.compile(r'\[.*?\].*?\s*?.\d{2}:\d{2}(?::\d{2})?')
trans_token_split_regex = re.compile(r'\d{2}:\d{2}:\d{2}')

msg_regex = re.compile(r'(?<=]).*?(?=\s+.\d{2}:\d{2}(?::\d{2})?)')
ts_regex = re.compile(r'\s*?\d{2}:\d{2}(?::\d{2})?')
author_regex = re.compile(r'\[.*?\]')

### Chat

In [3]:
with open('cleaned_chat.txt', 'r') as f:
    raw_chat = f.read()

Tokenise chat messages by regex and extract author and time

In [4]:
# Get individual chat msgs
chat_tokens = chat_token_split_regex.findall(raw_chat)
chat_tokens = [m.strip() for m in chat_tokens if m and m.strip()] # Filter nulls and strip control chars
print(len(chat_tokens), ' chat messages ')
chat_tokens[-10:] # Print first 10 for sanity check

412  chat messages 


['[GA Pilot 6] Thanks for consulting us - a big step in the right direction.\n\t11:01',
 '[Drone Industry Interest 4] How do we get in touch with the APPG @nouj?\n\t11:01',
 '[GA Pilot 31] Thank you, Jim, panel & Facilitator\n\t11:01',
 '[GA Pilot 3] Engagement t is difficult but worthwhile thank you for the opportunity .\n\t11:01',
 '[GA Pilot 33] Thanks very good discussion\n\t11:01',
 '[Model Aircraft Flyer] Thanks. Well chaired [Facilitator] \n\t11:02',
 '[GA Pilot 17] Keep talking, continuing communication is essential\n\t11:02',
 '[Drone User 8] Thanks for hosting. It was really smooth. Perhaps next time an additional 30mins for the whiteboard sessions\n\t11:02',
 "[GA Pilot 35] As Chair of the Local to Ipswich LAA Club the Great Oakley ACP stimulated a response from our members - thank you for the engagement and more is required!! - this is about 'change' and stakeholder engagement really important\n\t11:02",
 '[GA Pilot 6] Well managed, [Facilitator]. Thanks.\n\t11:02']

In [5]:
chat_authors = [author_regex.match(m).group(0).strip('[]') for m in chat_tokens]

assert len(chat_authors) == len(chat_tokens) # or something has gone very wrong...
print(len(set(chat_authors)), ' unique chat authors ')
chat_authors[:10] # SC

52  unique chat authors 


['Microlight pilot 6',
 'R and D 3',
 'Facilitator',
 'GA Pilot 23',
 'Facilitator',
 'Microlight pilot 7',
 'GA Pilot 18',
 'GA Pilot 30',
 'GA Pilot 3',
 'GA Pilot 18']

In [6]:
chat_timestamps = [date_str + ts_regex.search(m).group(0).strip() for m in chat_tokens]

assert len(chat_timestamps) == len(chat_tokens)
chat_timestamps[:10]

['2021-03-26 09:07',
 '2021-03-26 09:08',
 '2021-03-26 09:09',
 '2021-03-26 09:19',
 '2021-03-26 09:21',
 '2021-03-26 09:22',
 '2021-03-26 09:23',
 '2021-03-26 09:23',
 '2021-03-26 09:25',
 '2021-03-26 09:25']

In [7]:
chat_msgs = [msg_regex.search(m).group(0).strip() for m in chat_tokens]
assert len(chat_msgs) == len(chat_tokens)
chat_msgs[:10]

['Will the slides be shared',
 'Ooo yes please. I would like to share them with my PDRA at the University of Edinburgh who is working on the UKRI TAS Governance node.',
 'Yes, slides will be shared after the workshop',
 'How many transits between Lee-on-Solent and Binstead were completed during the trial?',
 'Thanks Jonathan, weâ€™ll return to that during questions',
 'Statement of needs often downplay alternates. Hovercraft transit about the same time. Importantly what are drone weather limitations? Vans and ferry will operate in all weathers - Skyports Oban trial said they couldnt operate 40% of time due wind so have extended trial. Important issue.',
 'Lots of demand when there is no cost to users and government grants paying providers',
 'where is the "demand" from ? There does not appear to have been a business case published for any TDA examining the drone versus alternates',
 'UTM in 5 years is very ambitious',
 'You need autonomous detect and avoid certified for every BVLOS dro

Pile in all parsed out chat data into dataframe

In [8]:
chat_df = pd.DataFrame({'author': chat_authors, 'time': chat_timestamps, 'message': chat_msgs})
chat_df['author'] = chat_df['author'].astype('string')
chat_df['time'] = pd.to_datetime(chat_df['time'])
chat_df['message'] = chat_df['message'].astype('string')
print(chat_df.dtypes)
chat_df.head()

author             string
time       datetime64[ns]
message            string
dtype: object


Unnamed: 0,author,time,message
0,Microlight pilot 6,2021-03-26 09:07:00,Will the slides be shared
1,R and D 3,2021-03-26 09:08:00,Ooo yes please. I would like to share them wit...
2,Facilitator,2021-03-26 09:09:00,"Yes, slides will be shared after the workshop"
3,GA Pilot 23,2021-03-26 09:19:00,How many transits between Lee-on-Solent and Bi...
4,Facilitator,2021-03-26 09:21:00,"Thanks Jonathan, weâ€™ll return to that during..."


### Presentation Transcript

In [9]:
with open('cleaned_transcript.txt', 'r') as f:
    raw_trans = f.read()

In [10]:
# Get timestamped blocks in transcript
ts_tokens = trans_token_split_regex.split(raw_trans)
ts_tokens = [m.strip() for m in ts_tokens if m and m.strip()] # Filter nulls and strip control chars
print(len(ts_tokens), ' transcript timeblocks ')
_ = [print(t, end='\n--------------------------------------------------------------------\n') for t in ts_tokens] # Print first 10 for sanity check

97  transcript timeblocks 
[Facilitator] Welcome everybody. We are just waiting for some remaining people to join the call as we near nine o'clock. So, we're going to leave a couple of minutes now just to allow any last people to come through and then we'll be making a start within two or three minutes.  

Thank you so much for being here with us. And we will have a proper introduction in just a moment.  And to be clear, this is the workshop on un-crewed aerial vehicles in shared airspace, you are in the right place. Weâ€™ll be making a start up in just a couple of minutes.  We've still got some people arriving, so we'll just allow them to come through and then we'll make a start.

And just a note to everybody, I am using the control to mute all participants just to make sure that we don't simultaneously hear about 60 people who've tapping away at the same time.  If at any point you want to contribute, you will need to unmute yourself, which should be available via the microphone icon,

In [11]:
# ts_timestamps = [ts_regex.search(m).group(0).strip() for m in ts_tokens]
ts_timestamps = [date_str + t.strip() for t in trans_token_split_regex.findall(raw_trans)][:-1]
for t in ts_timestamps:
    if ts_timestamps.count(t) > 1:
        print(t)

print(len(ts_timestamps))
# assert len(ts_timestamps) == len(ts_tokens)
ts_timestamps[:10]

97


['2021-03-26 09:00:40',
 '2021-03-26 09:06:50',
 '2021-03-26 09:07:21',
 '2021-03-26 09:09:46',
 '2021-03-26 09:12:19',
 '2021-03-26 09:13:54',
 '2021-03-26 09:15:30',
 '2021-03-26 09:16:16',
 '2021-03-26 09:16:50',
 '2021-03-26 09:17:53']

In [12]:
# Here not all transcribed messages have an author attached as the timestamps are thrown in the middle of text.
# This presumes the previously tagged author is still talking

ts_authors = []
for m in ts_tokens:
    ssm = author_regex.match(m)
    if ssm:
        ts_authors.append(ssm.group(0).strip('[]'))
    else:
        ts_authors.append(ts_authors[-1])

assert len(ts_authors) == len(ts_tokens)
print(len(set(ts_authors)), ' unique transcript authors ')
set(ts_authors) # SC

10  unique transcript authors 


{'Drone User 2',
 'Facilitator',
 'GA Pilot 1',
 'GA Pilot 36',
 'GM',
 'JS',
 'Microlight Pilot 2',
 'NJ',
 'Other 1',
 'TC'}

In [13]:
ts_msgs = [author_regex.sub('', m) for m in ts_tokens]

assert len(ts_msgs) == len(ts_tokens)
ts_msgs[:10]

[" Welcome everybody. We are just waiting for some remaining people to join the call as we near nine o'clock. So, we're going to leave a couple of minutes now just to allow any last people to come through and then we'll be making a start within two or three minutes.  \n\nThank you so much for being here with us. And we will have a proper introduction in just a moment.  And to be clear, this is the workshop on un-crewed aerial vehicles in shared airspace, you are in the right place. Weâ€™ll be making a start up in just a couple of minutes.  We've still got some people arriving, so we'll just allow them to come through and then we'll make a start.\n\nAnd just a note to everybody, I am using the control to mute all participants just to make sure that we don't simultaneously hear about 60 people who've tapping away at the same time.  If at any point you want to contribute, you will need to unmute yourself, which should be available via the microphone icon, which is on the left most side of

In [14]:
trans_df = pd.DataFrame({'author': ts_authors, 'time': ts_timestamps, 'message': ts_msgs})
trans_df['author'] = trans_df['author'].astype('string')
trans_df['time'] = pd.to_datetime(trans_df['time'])
trans_df['message'] = trans_df['message'].astype('string')
print(trans_df.dtypes)
trans_df.head()

author             string
time       datetime64[ns]
message            string
dtype: object


Unnamed: 0,author,time,message
0,Facilitator,2021-03-26 09:00:40,Welcome everybody. We are just waiting for so...
1,TC,2021-03-26 09:06:50,"Thanks very much, . Good morning, everyone. ..."
2,TC,2021-03-26 09:07:21,"Next slide, please. So, my name is Tom Cherr..."
3,TC,2021-03-26 09:09:46,"Next slide, please. Just to introduce you to t..."
4,TC,2021-03-26 09:12:19,"Next slide, please. Just a little bit of back..."


In [15]:
full_df = pd.concat((chat_df, trans_df)).sort_values('time')
full_df.to_csv('interlaced_transcript.csv')
print(full_df.dtypes)
full_df.head()

author             string
time       datetime64[ns]
message            string
dtype: object


Unnamed: 0,author,time,message
0,Facilitator,2021-03-26 09:00:40,Welcome everybody. We are just waiting for so...
1,TC,2021-03-26 09:06:50,"Thanks very much, . Good morning, everyone. ..."
0,Microlight pilot 6,2021-03-26 09:07:00,Will the slides be shared
2,TC,2021-03-26 09:07:21,"Next slide, please. So, my name is Tom Cherr..."
1,R and D 3,2021-03-26 09:08:00,Ooo yes please. I would like to share them wit...


At this point, the raw text is tagged with relevant metadata and in a form that is easy to work with. It is, however, still verbatim and contains shedloads of extraneous words etc. This is cleaned next.

## Analysis

Strip punctuation, extra whitespace, normalise case. Homogenous text is then tokenised into words, stopwords (eg I, he, she, was, the) are removed then words are lemmatised (taken to root form, eg "going" and "gone" stem from "go")

In [108]:
punc_regex = re.compile('[^0-9A-Za-z ]')
xws_regex = re.compile('\s+', flags=re.MULTILINE)

nltk.download('punkt')
tok = nltk.tokenize.word_tokenize

sws = swiso.stopwords('en')

nltk.download('wordnet')
wn = nltk.WordNetLemmatizer()

def _clean_msg(msg):
    res = xws_regex.sub(' ', msg) # Strip extra whitespace
    res = punc_regex.sub(' ', msg) # Strip Punctuation
    res = res.lower() # Force lowercase
    tokens = [t for t in tok(res) if t not in sws] # Tokenise into words
    return [wn.lemmatize(t) for t in tokens] #Lemmatise and return
    
clean_df = deepcopy(full_df)
clean_df['tokens'] = clean_df['message'].apply(_clean_msg)
clean_df['message'] = [m.replace('\n', '') for m in clean_df['message']]
clean_df.head()

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Aliak\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Aliak\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

Unnamed: 0,author,time,message,tokens
0,Facilitator,2021-03-26 09:00:40,Welcome everybody. We are just waiting for so...,"[waiting, remaining, people, clock, leave, cou..."
1,TC,2021-03-26 09:06:50,"Thanks very much, . Good morning, everyone. ...","[morning, janet, share, slide, slide, morning,..."
0,Microlight pilot 6,2021-03-26 09:07:00,Will the slides be shared,"[slide, shared]"
2,TC,2021-03-26 09:07:21,"Next slide, please. So, my name is Tom Cherret...","[slide, tom, cherrett, head, transportation, u..."
1,R and D 3,2021-03-26 09:08:00,Ooo yes please. I would like to share them wit...,"[ooo, share, pdra, university, edinburgh, ukri..."


Split transcript into what we said("internal") and comments from attendees ("external")

In [109]:
from nltk.sentiment import SentimentIntensityAnalyzer
from nltk.sentiment import SentimentAnalyzer
nltk.download('vader_lexicon')

sa = SentimentIntensityAnalyzer()

clean_df['compound_sentiment'] = [sa.polarity_scores(m)['compound'] for m in clean_df['message']]
clean_df.head()

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\Aliak\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

Unnamed: 0,author,time,message,tokens,compound_sentiment
0,Facilitator,2021-03-26 09:00:40,Welcome everybody. We are just waiting for so...,"[waiting, remaining, people, clock, leave, cou...",0.9992
1,TC,2021-03-26 09:06:50,"Thanks very much, . Good morning, everyone. ...","[morning, janet, share, slide, slide, morning,...",0.9732
0,Microlight pilot 6,2021-03-26 09:07:00,Will the slides be shared,"[slide, shared]",0.34
2,TC,2021-03-26 09:07:21,"Next slide, please. So, my name is Tom Cherret...","[slide, tom, cherrett, head, transportation, u...",0.9834
1,R and D 3,2021-03-26 09:08:00,Ooo yes please. I would like to share them wit...,"[ooo, share, pdra, university, edinburgh, ukri...",0.8271


In [160]:
internal_authors = ['Facilitator', 'TC', 'JS', 'NJ', 'GM', 'AP', 'MG']

int_df = clean_df[clean_df['author'].isin(internal_authors)]
ext_df = clean_df[~clean_df['author'].isin(internal_authors)]
int_df.head()
ext_df.head()
int_tokens = nltk.flatten(int_df['tokens'].tolist())
ext_tokens = nltk.flatten(ext_df['tokens'].tolist())
print(len(set(int_tokens)), ' unique internal tokens')
print(len(set(ext_tokens)), ' unique external tokens')

Unnamed: 0,author,time,message,tokens,compound_sentiment
0,Facilitator,2021-03-26 09:00:40,Welcome everybody. We are just waiting for so...,"[waiting, remaining, people, clock, leave, cou...",0.9992
1,TC,2021-03-26 09:06:50,"Thanks very much, . Good morning, everyone. ...","[morning, janet, share, slide, slide, morning,...",0.9732
2,TC,2021-03-26 09:07:21,"Next slide, please. So, my name is Tom Cherret...","[slide, tom, cherrett, head, transportation, u...",0.9834
2,Facilitator,2021-03-26 09:09:00,"Yes, slides will be shared after the workshop","[slide, shared, workshop]",0.6249
3,TC,2021-03-26 09:09:46,"Next slide, please. Just to introduce you to t...","[slide, introduce, team, researcher, professor...",0.9857
...,...,...,...,...,...
93,Facilitator,2021-03-26 10:57:08,Shall we show the results? Yes. Great. So if y...,"[scroll, people, feeling, strong, consensus, n...",0.9682
94,Facilitator,2021-03-26 10:57:21,Are there any wider challenges to shared airsp...,"[wider, challenge, shared, airspace, worth, me...",0.9320
95,JS,2021-03-26 10:58:05,"So I think, and Janet, that perhaps if you c...","[janet, share, slide, stick, yeah, participati...",0.9892
96,JS,2021-03-26 10:59:00,Just a big thank you to everyone. Really gre...,"[hour, life, busy, life, engage, comment, hope...",0.9966


Unnamed: 0,author,time,message,tokens,compound_sentiment
0,Microlight pilot 6,2021-03-26 09:07:00,Will the slides be shared,"[slide, shared]",0.34
1,R and D 3,2021-03-26 09:08:00,Ooo yes please. I would like to share them wit...,"[ooo, share, pdra, university, edinburgh, ukri...",0.8271
3,GA Pilot 23,2021-03-26 09:19:00,How many transits between Lee-on-Solent and Bi...,"[transit, lee, solent, binstead, completed, tr...",0.0
5,Microlight pilot 7,2021-03-26 09:22:00,Statement of needs often downplay alternates. ...,"[statement, downplay, alternate, hovercraft, t...",0.4767
6,GA Pilot 18,2021-03-26 09:23:00,Lots of demand when there is no cost to users ...,"[lot, demand, cost, user, government, grant, p...",-0.2023


1360  unique internal tokens
1357  unique external tokens


In [111]:
int_freqdist = nltk.FreqDist(int_tokens)
ext_freqdist = nltk.FreqDist(ext_tokens)
print('Top 10 Internal:')
int_freqdist.tabulate(10)
print('\nTop 10 External:')
ext_freqdist.tabulate(10)

Top 10 Internal:
   drone question   people airspace     chat    issue    class      lot  comment    slide 
      79       67       53       51       50       41       40       38       35       33 

Top 10 External:
   drone    class    pilot airspace    avoid aircraft      tmz     user      tda     lima 
     107       72       71       67       38       35       33       27       24       24 


In [166]:
ewma_smoothing = 0.16

int_df['cs_ewma'] = int_df['compound_sentiment'].ewm(alpha=ewma_smoothing).mean()
ext_df['cs_ewma'] = ext_df['compound_sentiment'].ewm(alpha=ewma_smoothing).mean()

int_df.tail(60)
ext_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  int_df['cs_ewma'] = int_df['compound_sentiment'].ewm(alpha=ewma_smoothing).mean()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ext_df['cs_ewma'] = ext_df['compound_sentiment'].ewm(alpha=ewma_smoothing).mean()


101

Unnamed: 0,author,time,message,tokens,compound_sentiment,cs_ewma
38,JS,2021-03-26 09:43:35,"Well, this is just an idea. But if I was flyi...","[idea, flying, manned, aircraft, region, liste...",0.9824,0.512175
39,Facilitator,2021-03-26 09:43:58,Thank you. So next one here. What about alter...,"[alternative, detects, avoids, technology, vis...",0.4215,0.497659
40,JS,2021-03-26 09:45:23,"Yes, so we as a university constantly monitor...","[university, constantly, monitor, lot, constan...",0.9586,0.571444
41,Facilitator,2021-03-26 09:45:35,"So last point on this slide, does Class Lima ...","[slide, class, lima, connotation, label, class]",0.4404,0.550469
42,JS,2021-03-26 09:47:13,"Yeah, this has been pointed out. I had a lon...","[yeah, conversation, genuinely, creating, clas...",0.8674,0.601194
43,Facilitator,2021-03-26 09:47:25,So I will invite people to respond to that la...,"[invite, people, respond, labelling, class, li...",0.6296,0.605741
44,JS,2021-03-26 09:47:56,"Oh, that's a great question. So our vision is...","[question, vision, uncontrolled, airspace]",0.7951,0.636045
45,Facilitator,2021-03-26 09:48:23,So I'm going to try and take these through in...,"[chronological, lot, topic, coming, chat, cove...",0.4019,0.598575
46,JS,2021-03-26 09:48:34,OK. I can field that one. So we undertook th...,"[field, undertook, isle, wight, baby, step, fl...",0.347,0.558316
47,Facilitator,2021-03-26 09:48:59,So I'll just mention also there were some rea...,"[mention, contribution, coming, chat, potentia...",0.9315,0.618034


Unnamed: 0,author,time,message,tokens,compound_sentiment,cs_ewma
0,Microlight pilot 6,2021-03-26 09:07:00,Will the slides be shared,"[slide, shared]",0.34,0.34
1,R and D 3,2021-03-26 09:08:00,Ooo yes please. I would like to share them wit...,"[ooo, share, pdra, university, edinburgh, ukri...",0.8271,0.604728
3,GA Pilot 23,2021-03-26 09:19:00,How many transits between Lee-on-Solent and Bi...,"[transit, lee, solent, binstead, completed, tr...",0.0,0.36717
5,Microlight pilot 7,2021-03-26 09:22:00,Statement of needs often downplay alternates. ...,"[statement, downplay, alternate, hovercraft, t...",0.4767,0.402071
6,GA Pilot 18,2021-03-26 09:23:00,Lots of demand when there is no cost to users ...,"[lot, demand, cost, user, government, grant, p...",-0.2023,0.23586


Plot out compound sentiment polarity scores generated by NLTK's pre-trained VADER sentiment analyser model. This score the sentiment of each message on positivity, negativity and neutrality and combines them to create a compound sentiment score $\in [1,-1]$ where +1 is most positive, -1 is most negative and 0 is neutral sentiment.

The VADER model is trained on social media data so does best at shorter sentences (like most of those in the chat) so is sufficient for a first pass.

Raw scores are expectedly noisy so an EWMA pass is used to smooth them out to something readable

In [185]:
from matplotlib.colors import cnames

sentiment_fig, sax = mpl.subplots(1,1, figsize=(9,8))

int_line = sax.scatter(int_df['time'], int_df['compound_sentiment'], label='Internal Sentiment', color='g')
ext_line = sax.scatter(ext_df['time'], ext_df['compound_sentiment'], label='External Sentiment', color='r')

# 148 colours to choose from!
# Keep rerunning the cell until you win the intelligible plot colours lottery
rand_colour = lambda *args: random.choice(list(cnames.keys())) 

author_colours = {} #Cache colours for each author to prevent duplicate legend entries

for idx, (_,row) in enumerate(int_df.iterrows()):
    # Prevent OOB as we look one ahead of the current row on every iteration
    if idx < len(int_df) - 1:
        author = row['author'] # We want the author that is currently speaking in this time period
        author_start = row['time']
        author_stop = int_df.iloc[idx+1]['time']
        label = author
        colour = rand_colour()
        if author not in author_colours:
            author_colours[author] = colour
        else:
            label = '_' #Prevent duplicate legend entries as each axvspan is its own artist
            colour = author_colours[author]
        _ = sax.axvspan(author_start, author_stop, alpha=0.2, color=colour, label=label)
#         print(f'{author} speaking between {author_start} and {author_stop}')

# Prevent these outputting objects by discarding
_ = sax.set_title('Raw Compound Sentiment')
_ = sax.legend()

sentiment_fig.show()

#### Repeat for EWMA values

msentiment_fig, msax = mpl.subplots(1,1, figsize=(9,8))

int_mline = msax.plot(int_df['time'], int_df['cs_ewma'], label='Internal Sentiment', color='g')
ext_mline = msax.plot(ext_df['time'], ext_df['cs_ewma'], label='External Sentiment', color='r')

mauthor_colours = {} #Cache colours for each author to prevent duplicate legend entries

for idx, (_,row) in enumerate(int_df.iterrows()):
    # Prevent OOB as we look one ahead of the current row on every iteration
    if idx < len(int_df) - 1:
        author = row['author'] # We want the author that is currently speaking in this time period
        author_start = row['time']
        author_stop = int_df.iloc[idx+1]['time']
        label = author
        colour = author_colours[author]
        if author not in mauthor_colours:
            mauthor_colours[author] = colour
        else:
            label = '_' #Prevent duplicate legend entries as each axvspan is its own artist
            colour = author_colours[author]
        _ = msax.axvspan(author_start, author_stop, alpha=0.2, color=colour, label=label)
#         print(f'{author} speaking between {author_start} and {author_stop}')

# Prevent these outputting objects by discarding
_ = msax.set_title(f'EWMA Compound Sentiment with smoothing $\\alpha$={ewma_smoothing}')
_ = msax.legend()

msentiment_fig.show()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>