### Dependencies

In [76]:
import re
import pandas as pd
import numpy as np
import plotly.express as px # pip install plotly
import plotly.io as pio
import plotly.graph_objects as go
pio.renderers.default = 'vscode'

import chart_studio
username = 'klienmaago'
api_key = 'dH0j7GH4JJnUKgEJbP9s'
chart_studio.tools.set_credentials_file(username=username, api_key=api_key)
import chart_studio.plotly as csp
import chart_studio.tools as cst

pd.set_option('display.max_columns', None)  # or 1000
pd.set_option('display.max_rows', None)  # or 1000
pd.set_option('display.max_colwidth', None)  # or 199

df = pd.read_csv("../dataset/_compiled/Clustered.csv")
df_topics = pd.read_csv("../dataset/_compiled/Keywords.csv")

In [77]:
df['Date'] = pd.to_datetime(df['Epoch'], unit='s').dt.to_period('M')
df = df.sort_values(by='Timestamp')

In [78]:
# !wget 'https://github.com/openmaptiles/fonts/raw/master/roboto/Roboto-Light.ttf'
# !wget 'https://github.com/openmaptiles/fonts/raw/master/roboto/Roboto-Regular.ttf'
# !wget 'https://github.com/openmaptiles/fonts/raw/master/roboto/Roboto-Medium.ttf'
# !wget 'https://github.com/openmaptiles/fonts/raw/master/roboto/Roboto-Bold.ttf'

### Nutshell Plot

In [79]:
df["LDA Topic"] = df["LDA Topic"].astype(str)

fig = px.scatter(df, x='X', y='Y', color='LDA Topic',
                 title='t-SNE Clustering of LDA-extracted Topics',
                 hover_name='Hovertext',
                 size='Engagements',
                 color_discrete_map={
                '1': '#ffb000',
                '2': '#785ef0',
                '3': '#dc267f',
                '4': '#fe6100',
                '5': '#648fff'
              },
                 hover_data={'X':False, 'Y':False, 'LDA Topic':False, 'Hovertext':False, 'Breakdown':True})

LDA_label = ['Employee Exit Process', 'BPO Work and Culture', 'Job Application', 'Workplace Management', 'Employee Well-being']

for idx, name in enumerate(LDA_label):
    fig.data[idx].name = name
    fig.data[idx].hovertemplate = f"{name} <br><br>" + f"{fig.data[idx].hovertemplate}"

# fig.add_annotation(
#     x=0,
#     y=-0.2*(1/10)-0.15,
#     text="Top 10 Most Frequent Keywords per Topic",
#     showarrow=False,
#     xref='paper',
#     yref='paper',
#     align='left',
#     font=dict(color='#0e0f11', family='Roboto Regular', size=12)
#   )

# for i, keyword in enumerate(df_topics['Keywords'][:10]):
#   fig.add_annotation(
#     x=0,
#     y=-0.2*(i/5)-0.25,
#     text=f"{LDA_label[i]}: " + f"{keyword.replace(' ', ', ')}",
#     showarrow=False,
#     xref='paper',
#     yref='paper',
#     align='left',
#     font=dict(color=fig.data[i].marker['color'], family='Roboto Regular', size=12)
#   )

fig.update_traces(mode='markers',
                  opacity=1,
                  marker=dict(
                    sizemode='area',
                    sizeref=2.*max(df['Engagements'])/(110**2),
                    line_color='white',
                    line_width=1),
                  )

fig.update_layout(height=1080*0.5,
                  width=1920*0.6,
                  title=dict(font=dict(color='#0e0f11', family='Roboto Medium', size=20)),
                  legend=dict(title="Topic", font=dict(color='#0e0f11', family='Roboto Regular', size=12)),
                  paper_bgcolor='#ffffff',
                  plot_bgcolor='#ffffff',
                  margin_autoexpand=True,
                  # margin=dict(b=100),
                 )

fig.update_xaxes(showline=True,
                 automargin=True,
                 zeroline=False,
                 showgrid=True,
                #  showticklabels=False,
                 linewidth=1,
                 gridwidth=0.5,
                 linecolor='#0e0f11',
                 gridcolor='rgba(14, 15, 17, 0.2)',
                 tickfont=dict(color='#0e0f11', family='Roboto Regular', size=12),
                 title=dict(text="", font=dict(color='#0e0f11', family='Roboto', size=12)),
                 )

fig.update_yaxes(showline=True,
                 automargin=True,
                 zeroline=False,
                 showgrid=True,
                #  showticklabels=False,
                 linewidth=1,
                 gridwidth=0.5,
                 linecolor='#0e0f11',
                 gridcolor='rgba(14, 15, 17, 0.2)',
                 tickfont=dict(color='#0e0f11', family='Roboto Regular', size=12),
                 title=dict(text="", font=dict(color='#0e0f11', family='Roboto Regular', size=12)),
                 )

# !pip install nbformat
# restart kernel
fig.show()

### R1 Plot

In [80]:
df['LDA Topic'] = df['LDA Topic'].astype(int)
df['Union'] = df['Union'].astype(int)

#### Manual Label

In [81]:
total_count = df.groupby('Date').nunique()
total_count = total_count['Hovertext'].cumsum()

# Manually Labeled

cumulative_count_manual = df.groupby(['Date','Union']).nunique()
cumulative_count_manual = cumulative_count_manual.pivot_table('Hovertext', 'Date', 'Union').fillna(0).cumsum()
rel_freq_manual = cumulative_count_manual.div(total_count, axis=0)

cumulative_count_manual = cumulative_count_manual.stack(0).reset_index()
cumulative_count_manual.columns = ['Date', 'Union', 'Frequency']
cumulative_count_manual['Date'] = [x.strftime('%b %Y') for x in cumulative_count_manual['Date']]

rel_freq_manual = rel_freq_manual.stack(0).reset_index()
rel_freq_manual.columns = ['Date', 'Union', 'Relative Frequency']
rel_freq_manual['Date'] = [x.strftime('%b %Y') for x in rel_freq_manual['Date']]

monthly_count_manual = df.groupby(['Date','Union']).size()
monthly_count_manual = monthly_count_manual.reset_index()
monthly_count_manual.columns = ['Date', 'Union', 'Frequency']
monthly_count_manual['Date'] = [x.strftime('%b %Y') for x in monthly_count_manual['Date']]

##### Monthly Frequency vs Time

In [82]:
df['LDA Topic'] = df['LDA Topic'].astype(str)
df['Union'] = df['Union'].astype(str)

fig = px.line(monthly_count_manual, x='Date', y='Frequency', color='Union',
              title='Monthly Frequency vs Time',
              color_discrete_map={
                0: '#fac73c',
                1: '#de6f58'
              },
             )

manual_label = ['Non-unfair Offering', 'Unfair Offering']

for i in range(len(manual_label)):
    topic_number = fig.data[i].name
    if topic_number == '0':
      fig.data[i].name = manual_label[0]
      fig.data[i].hovertemplate = 'Topic: '+f"{manual_label[0]}"+'<br>Date: %{x}<br>Monthly Frequency: %{y}<extra></extra>'
    elif topic_number == '1':
      fig.data[i].name = manual_label[1]
      fig.data[i].hovertemplate = 'Topic: '+f"{manual_label[1]}"+'<br>Date: %{x}<br>Monthly Frequency: %{y}<extra></extra>'


fig.update_xaxes(nticks=8)

fig.update_layout(height=1080*0.5,
                  width=1920*0.6,
                  margin=dict(l=120, r=200),
                  title=dict(font=dict(color='#0e0f11', family='Roboto Medium', size=20)),
                  legend=dict(title="Topic", font=dict(color='#0e0f11', family='Roboto Regular', size=12)),
                  paper_bgcolor='#ffffff',
                  plot_bgcolor='#ffffff',
                 )

fig.update_xaxes(showline=True,
                 zeroline=False,
                 showgrid=True,
                 linewidth=1,
                 gridwidth=0.5,
                 linecolor='#0e0f11',
                 gridcolor='rgba(14, 15, 17, 0.2)',
                 tickfont=dict(color='#0e0f11', family='Roboto Regular', size=12),
                 title=dict(text="", font=dict(color='#0e0f11', family='Roboto', size=12)),
                 )

fig.update_yaxes(showline=True,
                 zeroline=False,
                 showgrid=True,
                 linewidth=1,
                 gridwidth=0.5,
                 linecolor='#0e0f11',
                 gridcolor='rgba(14, 15, 17, 0.2)',
                 tickfont=dict(color='#0e0f11', family='Roboto Regular', size=12),
                 title=dict(text="Number of Posts", font=dict(color='#0e0f11', family='Roboto Regular', size=12)),
                 )

fig.add_trace(go.Scatter(
  x= ['Jun 2023', 'Jun 2023'],
  y= [0, 250],
  mode='lines',
  showlegend=False,
  # name='ahaha',
  hoverinfo='text',
  hovertext='June 2023',
  line=dict(color='red', width=1, dash='dash')))

fig.add_trace(go.Scatter(
  x= ['Dec 2023', 'Dec 2023'],
  y= [0, 250],
  mode='lines',
  showlegend=False,
  # name='ahaha',
  hoverinfo='text',
  hovertext='December 2023',
  line=dict(color='red', width=1, dash='dash')))

# Show the plot
fig.show()

In [83]:
# csp.plot(fig, filename = 'test_plot', auto_open=True)

##### Cumulative Frequency vs Time

In [84]:
df['LDA Topic'] = df['LDA Topic'].astype(str)
df['Union'] = df['Union'].astype(str)

fig = px.line(cumulative_count_manual, x='Date', y='Frequency', color='Union',
              title='Cumulative Frequency vs Time',
              color_discrete_map={
                0: '#fac73c',
                1: '#de6f58'
              },
             )

manual_label = ['Non-unfair Offering', 'Unfair Offering']

for i in range(len(manual_label)):
    topic_number = fig.data[i].name
    if topic_number == '0':
      fig.data[i].name = manual_label[0]
      fig.data[i].hovertemplate = 'Topic: '+f"{manual_label[0]}"+'<br>Date: %{x}<br>Cumulative Frequency: %{y}<extra></extra>'
    elif topic_number == '1':
      fig.data[i].name = manual_label[1]
      fig.data[i].hovertemplate = 'Topic: '+f"{manual_label[1]}"+'<br>Date: %{x}<br>Cumulative Frequency: %{y}<extra></extra>'

fig.update_xaxes(nticks=8)

fig.update_layout(height=1080*0.5,
                  width=1920*0.6,
                  margin=dict(l=120, r=200),
                  title=dict(font=dict(color='#0e0f11', family='Roboto Medium', size=20)),
                  legend=dict(title="Topic", font=dict(color='#0e0f11', family='Roboto Regular', size=12)),
                  paper_bgcolor='#ffffff',
                  plot_bgcolor='#ffffff',
                 )

fig.update_xaxes(showline=True,
                 zeroline=False,
                 showgrid=True,
                 linewidth=1,
                 gridwidth=0.5,
                 linecolor='#0e0f11',
                 gridcolor='rgba(14, 15, 17, 0.2)',
                 tickfont=dict(color='#0e0f11', family='Roboto Regular', size=12),
                 title=dict(text="", font=dict(color='#0e0f11', family='Roboto', size=12)),
                 )

fig.update_yaxes(showline=True,
                 zeroline=False,
                 showgrid=True,
                 linewidth=1,
                 gridwidth=0.5,
                 linecolor='#0e0f11',
                 gridcolor='rgba(14, 15, 17, 0.2)',
                 tickfont=dict(color='#0e0f11', family='Roboto Regular', size=12),
                 title=dict(text="Number of Posts", font=dict(color='#0e0f11', family='Roboto Regular', size=12)),
                 )

# Show the plot
fig.show()

##### Relative Cumulative Frequency vs Time

In [85]:
df['LDA Topic'] = df['LDA Topic'].astype(str)
df['Union'] = df['Union'].astype(str)

fig = px.line(rel_freq_manual, x='Date', y='Relative Frequency', color='Union',
              title='Relative Cumulative Frequency vs Time',
              color_discrete_map={
                0: '#fac73c',
                1: '#de6f58'
              },
             )

manual_label = ['Non-unfair Offering', 'Unfair Offering']

for i in range(len(manual_label)):
    topic_number = fig.data[i].name
    if topic_number == '0':
      fig.data[i].name = manual_label[0]
      fig.data[i].hovertemplate = 'Topic: '+f"{manual_label[0]}"+'<br>Date: %{x}<br>Relative Cumulative Frequency: %{y}<extra></extra>'
    elif topic_number == '1':
      fig.data[i].name = manual_label[1]
      fig.data[i].hovertemplate = 'Topic: '+f"{manual_label[1]}"+'<br>Date: %{x}<br>Relative Cumulative Frequency: %{y}<extra></extra>'

fig.update_xaxes(nticks=8)

fig.update_layout(height=1080*0.5,
                  width=1920*0.6,
                  margin=dict(l=120, r=200),
                  title=dict(font=dict(color='#0e0f11', family='Roboto Medium', size=20)),
                  legend=dict(title="Topic", font=dict(color='#0e0f11', family='Roboto Regular', size=12)),
                  paper_bgcolor='#ffffff',
                  plot_bgcolor='#ffffff',
                 )

fig.update_xaxes(showline=True,
                 zeroline=False,
                 showgrid=True,
                 linewidth=1,
                 gridwidth=0.5,
                 linecolor='#0e0f11',
                 gridcolor='rgba(14, 15, 17, 0.2)',
                 tickfont=dict(color='#0e0f11', family='Roboto Regular', size=12),
                 title=dict(text="", font=dict(color='#0e0f11', family='Roboto', size=12)),
                 )

fig.update_yaxes(showline=True,
                 zeroline=False,
                 showgrid=True,
                 linewidth=1,
                 gridwidth=0.5,
                 linecolor='#0e0f11',
                 gridcolor='rgba(14, 15, 17, 0.2)',
                 tickfont=dict(color='#0e0f11', family='Roboto Regular', size=12),
                 title=dict(text="Number of Posts", font=dict(color='#0e0f11', family='Roboto Regular', size=12)),
                 )

fig.add_trace(go.Scatter(
  x= ['Mar 2022', 'Mar 2022'],
  y= [0, 1],
  mode='lines',
  showlegend=False,
  # name='ahaha',
  hoverinfo='text',
  hovertext='March 2022',
  line=dict(color='red', width=1, dash='dash')))

# Show the plot
fig.show()

#### LDA Label

In [86]:
# LDA Labeled

cumulative_count = df.groupby(['Date','LDA Topic']).nunique()
cumulative_count = cumulative_count.pivot_table('Hovertext', 'Date', 'LDA Topic').fillna(0).cumsum()
rel_freq = cumulative_count.div(total_count, axis=0)

cumulative_count = cumulative_count.stack(0).reset_index()
cumulative_count.columns = ['Date', 'LDA Topic', 'Frequency']
cumulative_count['Date'] = [x.strftime('%b %Y') for x in cumulative_count['Date']]

rel_freq = rel_freq.stack(0).reset_index()
rel_freq.columns = ['Date', 'LDA Topic', 'Relative Frequency']
rel_freq['Date'] = [x.strftime('%b %Y') for x in rel_freq['Date']]

monthly_count = df.groupby(['Date','LDA Topic']).size()
monthly_count = monthly_count.reset_index()
monthly_count.columns = ['Date', 'LDA Topic', 'Frequency']
monthly_count['Date'] = [x.strftime('%b %Y') for x in monthly_count['Date']]

##### Monthly Frequency vs Time

In [87]:
df['LDA Topic'] = df['LDA Topic'].astype(str)
df['Union'] = df['Union'].astype(str)

fig = px.line(monthly_count, x='Date', y='Frequency', color='LDA Topic',
              title='Monthly Frequency vs Time',
              color_discrete_map={
                '1': '#ffb000',
                '2': '#785ef0',
                '3': '#dc267f',
                '4': '#fe6100',
                '5': '#648fff'
              },
             )

LDA_label = ['Employee Exit Process', 'BPO Work and Culture', 'Job Application', 'Workplace Management', 'Employee Well-being']

for i in range(len(LDA_label)):
    topic_number = fig.data[i].name
    if topic_number == '1':
      fig.data[i].name = LDA_label[0]
      fig.data[i].hovertemplate = 'Topic: '+f"{LDA_label[0]}"+'<br>Date: %{x}<br>Monthly Frequency: %{y}<extra></extra>'
    elif topic_number == '2':
      fig.data[i].name = LDA_label[1]
      fig.data[i].hovertemplate = 'Topic: '+f"{LDA_label[1]}"+'<br>Date: %{x}<br>Monthly Frequency: %{y}<extra></extra>'
    elif topic_number == '3':
      fig.data[i].name = LDA_label[2]
      fig.data[i].hovertemplate = 'Topic: '+f"{LDA_label[2]}"+'<br>Date: %{x}<br>Monthly Frequency: %{y}<extra></extra>'
    elif topic_number == '4':
      fig.data[i].name = LDA_label[3]
      fig.data[i].hovertemplate = 'Topic: '+f"{LDA_label[3]}"+'<br>Date: %{x}<br>Monthly Frequency: %{y}<extra></extra>'
    elif topic_number == '5':
      fig.data[i].name = LDA_label[4]
      fig.data[i].hovertemplate = 'Topic: '+f"{LDA_label[4]}"+'<br>Date: %{x}<br>Monthly Frequency: %{y}<extra></extra>'

fig.update_xaxes(nticks=12)

fig.update_layout(height=1080*0.5,
                  width=1920*0.6,
                  margin=dict(l=120, r=200),
                  title=dict(font=dict(color='#0e0f11', family='Roboto Medium', size=20)),
                  legend=dict(title="Topic", font=dict(color='#0e0f11', family='Roboto Regular', size=12)),
                  paper_bgcolor='#ffffff',
                  plot_bgcolor='#ffffff',
                 )

fig.update_xaxes(showline=True,
                 zeroline=False,
                 showgrid=True,
                 linewidth=1,
                 gridwidth=0.5,
                 linecolor='#0e0f11',
                 gridcolor='rgba(14, 15, 17, 0.2)',
                 tickfont=dict(color='#0e0f11', family='Roboto Regular', size=12),
                 title=dict(text="", font=dict(color='#0e0f11', family='Roboto', size=12)),
                 )

fig.update_yaxes(showline=True,
                 zeroline=False,
                 showgrid=True,
                 linewidth=1,
                 gridwidth=0.5,
                 linecolor='#0e0f11',
                 gridcolor='rgba(14, 15, 17, 0.2)',
                 tickfont=dict(color='#0e0f11', family='Roboto Regular', size=12),
                 title=dict(text="Number of Posts", font=dict(color='#0e0f11', family='Roboto Regular', size=12)),
                 )

fig.add_trace(go.Scatter(
  x= ['Dec 2023', 'Dec 2023'],
  y= [0, 70],
  mode='lines',
  showlegend=False,
  # name='ahaha',
  hoverinfo='text',
  hovertext='December 2023',
  line=dict(color='red', width=1, dash='dash')))

fig.add_trace(go.Scatter(
  x= ['Mar 2023', 'Mar 2023'],
  y= [0, 70],
  mode='lines',
  showlegend=False,
  # name='ahaha',
  hoverinfo='text',
  hovertext='March 2023',
  line=dict(color='red', width=1, dash='dash')))

fig.add_trace(go.Scatter(
  x= ['Mar 2024', 'Mar 2024'],
  y= [0, 70],
  mode='lines',
  showlegend=False,
  # name='ahaha',
  hoverinfo='text',
  hovertext='March 2024',
  line=dict(color='red', width=1, dash='dash')))

# Show the plot
fig.show()

##### Cumulative Frequency vs Time

In [88]:
df['LDA Topic'] = df['LDA Topic'].astype(str)
df['Union'] = df['Union'].astype(str)

fig = px.line(cumulative_count, x='Date', y='Frequency', color='LDA Topic',
              title='Cumulative Frequency vs Time',
              # category_orders={'LDA Topic':['1', '2', '3', '4', '5']},
              color_discrete_map={
                '1': '#ffb000',
                '2': '#785ef0',
                '3': '#dc267f',
                '4': '#fe6100',
                '5': '#648fff'
              },
             )

LDA_label = ['Employee Exit Process', 'BPO Work and Culture', 'Job Application', 'Workplace Management', 'Employee Well-being']

for i in range(len(LDA_label)):
    topic_number = fig.data[i].name
    if topic_number == '1':
      fig.data[i].name = LDA_label[0]
      fig.data[i].hovertemplate = 'Topic: '+f"{LDA_label[0]}"+'<br>Date: %{x}<br>Cumulative Frequency: %{y}<extra></extra>'
    elif topic_number == '2':
      fig.data[i].name = LDA_label[1]
      fig.data[i].hovertemplate = 'Topic: '+f"{LDA_label[1]}"+'<br>Date: %{x}<br>Cumulative Frequency: %{y}<extra></extra>'
    elif topic_number == '3':
      fig.data[i].name = LDA_label[2]
      fig.data[i].hovertemplate = 'Topic: '+f"{LDA_label[2]}"+'<br>Date: %{x}<br>Cumulative Frequency: %{y}<extra></extra>'
    elif topic_number == '4':
      fig.data[i].name = LDA_label[3]
      fig.data[i].hovertemplate = 'Topic: '+f"{LDA_label[3]}"+'<br>Date: %{x}<br>Cumulative Frequency: %{y}<extra></extra>'
    elif topic_number == '5':
      fig.data[i].name = LDA_label[4]
      fig.data[i].hovertemplate = 'Topic: '+f"{LDA_label[4]}"+'<br>Date: %{x}<br>Cumulative Frequency: %{y}<extra></extra>'

fig.update_xaxes(nticks=8)

fig.update_layout(height=1080*0.5,
                  width=1920*0.6,
                  margin=dict(l=120, r=200),
                  title=dict(font=dict(color='#0e0f11', family='Roboto Medium', size=20)),
                  legend=dict(title="Topic", font=dict(color='#0e0f11', family='Roboto Regular', size=12)),
                  paper_bgcolor='#ffffff',
                  plot_bgcolor='#ffffff',
                 )

fig.update_xaxes(showline=True,
                 zeroline=False,
                 showgrid=True,
                 linewidth=1,
                 gridwidth=0.5,
                 linecolor='#0e0f11',
                 gridcolor='rgba(14, 15, 17, 0.2)',
                 tickfont=dict(color='#0e0f11', family='Roboto Regular', size=12),
                 title=dict(text="", font=dict(color='#0e0f11', family='Roboto', size=12)),
                 )

fig.update_yaxes(showline=True,
                 zeroline=False,
                 showgrid=True,
                 linewidth=1,
                 gridwidth=0.5,
                 linecolor='#0e0f11',
                 gridcolor='rgba(14, 15, 17, 0.2)',
                 tickfont=dict(color='#0e0f11', family='Roboto Regular', size=12),
                 title=dict(text="Number of Posts", font=dict(color='#0e0f11', family='Roboto Regular', size=12)),
                 )

fig.add_trace(go.Scatter(
  x= ['Dec 2023', 'Dec 2023'],
  y= [0, 450],
  mode='lines',
  showlegend=False,
  # name='ahaha',
  hoverinfo='text',
  hovertext='December 2023',
  line=dict(color='red', width=1, dash='dash')))

# Show the plot
fig.show()

##### Relative Cumulative Frequency vs Time

In [89]:
df['LDA Topic'] = df['LDA Topic'].astype(str)
df['Union'] = df['Union'].astype(str)

fig = px.line(rel_freq, x='Date', y='Relative Frequency', color='LDA Topic',
              title='Relative Cumulative Frequency vs Time',
              # category_orders={'LDA Topic':['1', '2', '3', '4', '5']},
              color_discrete_map={
                '1': '#ffb000',
                '2': '#785ef0',
                '3': '#dc267f',
                '4': '#fe6100',
                '5': '#648fff'
              },
             )

LDA_label = ['Employee Exit Process', 'BPO Work and Culture', 'Job Application', 'Workplace Management', 'Employee Well-being']

for i in range(len(LDA_label)):
    topic_number = fig.data[i].name
    if topic_number == '1':
      fig.data[i].name = LDA_label[0]
      fig.data[i].hovertemplate = 'Topic: '+f"{LDA_label[0]}"+'<br>Date: %{x}<br>Relative Cumulative Frequency: %{y}<extra></extra>'
    elif topic_number == '2':
      fig.data[i].name = LDA_label[1]
      fig.data[i].hovertemplate = 'Topic: '+f"{LDA_label[1]}"+'<br>Date: %{x}<br>Relative Cumulative Frequency: %{y}<extra></extra>'
    elif topic_number == '3':
      fig.data[i].name = LDA_label[2]
      fig.data[i].hovertemplate = 'Topic: '+f"{LDA_label[2]}"+'<br>Date: %{x}<br>Relative Cumulative Frequency: %{y}<extra></extra>'
    elif topic_number == '4':
      fig.data[i].name = LDA_label[3]
      fig.data[i].hovertemplate = 'Topic: '+f"{LDA_label[3]}"+'<br>Date: %{x}<br>Relative Cumulative Frequency: %{y}<extra></extra>'
    elif topic_number == '5':
      fig.data[i].name = LDA_label[4]
      fig.data[i].hovertemplate = 'Topic: '+f"{LDA_label[4]}"+'<br>Date: %{x}<br>Relative Cumulative Frequency: %{y}<extra></extra>'

fig.update_xaxes(nticks=8)

fig.update_layout(height=1080*0.5,
                  width=1920*0.6,
                  margin=dict(l=120, r=200),
                  title=dict(font=dict(color='#0e0f11', family='Roboto Medium', size=20)),
                  legend=dict(title="Topic", font=dict(color='#0e0f11', family='Roboto Regular', size=12)),
                  paper_bgcolor='#ffffff',
                  plot_bgcolor='#ffffff',
                 )

fig.update_xaxes(showline=True,
                 zeroline=False,
                 showgrid=True,
                 linewidth=1,
                 gridwidth=0.5,
                 linecolor='#0e0f11',
                 gridcolor='rgba(14, 15, 17, 0.2)',
                 tickfont=dict(color='#0e0f11', family='Roboto Regular', size=12),
                 title=dict(text="", font=dict(color='#0e0f11', family='Roboto', size=12)),
                 )

fig.update_yaxes(showline=True,
                 zeroline=False,
                 showgrid=True,
                 linewidth=1,
                 gridwidth=0.5,
                 linecolor='#0e0f11',
                 gridcolor='rgba(14, 15, 17, 0.2)',
                 tickfont=dict(color='#0e0f11', family='Roboto Regular', size=12),
                 title=dict(text="Number of Posts", font=dict(color='#0e0f11', family='Roboto Regular', size=12)),
                 )

# Show the plot
fig.show()

### R2 Plot

In [90]:
df['LDA Topic'] = df['LDA Topic'].astype(int)
df['Union'] = df['Union'].astype(int)

#### Manual Label

In [91]:
# Manually Labeled

total_engage = df.groupby(['Date'])['Engagements'].sum().cumsum()

cumulative_engage_manual = pd.DataFrame(df.groupby(['Date','Union'])['Engagements'].sum())
cumulative_engage_manual = cumulative_engage_manual.pivot_table('Engagements', 'Date', 'Union').fillna(0).cumsum()
rel_engage_manual = cumulative_engage_manual.div(total_engage, axis=0)

cumulative_engage_manual = cumulative_engage_manual.stack(0).reset_index()
cumulative_engage_manual.columns = ['Date', 'Union', 'Engagements']
cumulative_engage_manual['Date'] = [x.strftime('%b %Y') for x in cumulative_engage_manual['Date']]

monthly_engage_manual = df.groupby(['Date','Union'])['Engagements'].sum()
monthly_engage_manual = monthly_engage_manual.reset_index()
monthly_engage_manual.columns = ['Date', 'Union', 'Engagements']
monthly_engage_manual['Date'] = [x.strftime('%b %Y') for x in monthly_engage_manual['Date']]

rel_engage_manual = rel_engage_manual.stack(0).reset_index()
rel_engage_manual.columns = ['Date', 'Union', 'Relative Engagements']
rel_engage_manual['Date'] = [x.strftime('%b %Y') for x in rel_engage_manual['Date']]

##### Monthly Engagement vs Time

In [92]:
df['LDA Topic'] = df['LDA Topic'].astype(str)
df['Union'] = df['Union'].astype(str)

fig = px.line(monthly_engage_manual, x='Date', y='Engagements', color='Union',
              title='Monthly Engagement vs Time',
              color_discrete_map={
                0: '#fac73c',
                1: '#de6f58'
              },
             )

manual_label = ['Non-unfair Offering', 'Unfair Offering']

for i in range(len(manual_label)):
    topic_number = fig.data[i].name
    if topic_number == '0':
      fig.data[i].name = manual_label[0]
      fig.data[i].hovertemplate = 'Topic: '+f"{manual_label[0]}"+'<br>Date: %{x}<br>Monthly Engagement: %{y}<extra></extra>'
    elif topic_number == '1':
      fig.data[i].name = manual_label[1]
      fig.data[i].hovertemplate = 'Topic: '+f"{manual_label[1]}"+'<br>Date: %{x}<br>Monthly Engagement: %{y}<extra></extra>'

fig.update_xaxes(nticks=8)

fig.update_layout(height=1080*0.5,
                  width=1920*0.6,
                  margin=dict(l=120, r=200),
                  title=dict(font=dict(color='#0e0f11', family='Roboto Medium', size=20)),
                  legend=dict(title="Topic", font=dict(color='#0e0f11', family='Roboto Regular', size=12)),
                  paper_bgcolor='#ffffff',
                  plot_bgcolor='#ffffff',
                 )

fig.update_xaxes(showline=True,
                 zeroline=False,
                 showgrid=True,
                 linewidth=1,
                 gridwidth=0.5,
                 linecolor='#0e0f11',
                 gridcolor='rgba(14, 15, 17, 0.2)',
                 tickfont=dict(color='#0e0f11', family='Roboto Regular', size=12),
                 title=dict(text="", font=dict(color='#0e0f11', family='Roboto', size=12)),
                 )

fig.update_yaxes(showline=True,
                 zeroline=False,
                 showgrid=True,
                 linewidth=1,
                 gridwidth=0.5,
                 linecolor='#0e0f11',
                 gridcolor='rgba(14, 15, 17, 0.2)',
                 tickfont=dict(color='#0e0f11', family='Roboto Regular', size=12),
                 title=dict(text="Engagement*", font=dict(color='#0e0f11', family='Roboto Regular', size=12)),
                 )

fig.add_annotation(
    x=1,
    y=-0.45*(1/5)-0.08,
    text="*Engagement = (Upvotes + Comments) × Upvote:Downvote Ratio",
    showarrow=False,
    xref='paper',
    yref='paper',
    align='right',
    font=dict(color='rgba(14, 15, 17, 0.7)', family='Roboto Italic', size=12),
  )

fig.add_trace(go.Scatter(
  x= ['Jun 2023', 'Jun 2023'],
  y= [0, 20000],
  mode='lines',
  showlegend=False,
  # name='ahaha',
  hoverinfo='text',
  hovertext='June 2023',
  line=dict(color='red', width=1, dash='dash')))

# Show the plot
fig.show()

##### Cumulative Engagement vs Time

In [93]:
df['LDA Topic'] = df['LDA Topic'].astype(str)
df['Union'] = df['Union'].astype(str)

fig = px.line(cumulative_engage_manual, x='Date', y='Engagements', color='Union',
              title='Cumulative Engagement vs Time',
              color_discrete_map={
                0: '#fac73c',
                1: '#de6f58'
              },
             )

manual_label = ['Non-unfair Offering', 'Unfair Offering']

for i in range(len(manual_label)):
    topic_number = fig.data[i].name
    if topic_number == '0':
      fig.data[i].name = manual_label[0]
      fig.data[i].hovertemplate = 'Topic: '+f"{manual_label[0]}"+'<br>Date: %{x}<br>Cumulative Engagement: %{y}<extra></extra>'
    elif topic_number == '1':
      fig.data[i].name = manual_label[1]
      fig.data[i].hovertemplate = 'Topic: '+f"{manual_label[1]}"+'<br>Date: %{x}<br>Cumulative Engagement: %{y}<extra></extra>'

fig.update_xaxes(nticks=8)

fig.update_layout(height=1080*0.5,
                  width=1920*0.6,
                  margin=dict(l=120, r=200),
                  title=dict(font=dict(color='#0e0f11', family='Roboto Medium', size=20)),
                  legend=dict(title="Topic", font=dict(color='#0e0f11', family='Roboto Regular', size=12)),
                  paper_bgcolor='#ffffff',
                  plot_bgcolor='#ffffff',
                 )

fig.update_xaxes(showline=True,
                 zeroline=False,
                 showgrid=True,
                 linewidth=1,
                 gridwidth=0.5,
                 linecolor='#0e0f11',
                 gridcolor='rgba(14, 15, 17, 0.2)',
                 tickfont=dict(color='#0e0f11', family='Roboto Regular', size=12),
                 title=dict(text="", font=dict(color='#0e0f11', family='Roboto', size=12)),
                 )

fig.update_yaxes(showline=True,
                 zeroline=False,
                 showgrid=True,
                 linewidth=1,
                 gridwidth=0.5,
                 linecolor='#0e0f11',
                 gridcolor='rgba(14, 15, 17, 0.2)',
                 tickfont=dict(color='#0e0f11', family='Roboto Regular', size=12),
                 title=dict(text="Engagement*", font=dict(color='#0e0f11', family='Roboto Regular', size=12)),
                 )

fig.add_annotation(
    x=1,
    y=-0.45*(1/5)-0.08,
    text="*Engagement = (Upvotes + Comments) × Upvote:Downvote Ratio",
    showarrow=False,
    xref='paper',
    yref='paper',
    align='right',
    font=dict(color='rgba(14, 15, 17, 0.7)', family='Roboto Italic', size=12),
  )

# Show the plot
fig.show()

##### Cumulative Relative Engagement vs Time

In [94]:
df['LDA Topic'] = df['LDA Topic'].astype(str)
df['Union'] = df['Union'].astype(str)

fig = px.line(rel_engage_manual, x='Date', y='Relative Engagements', color='Union',
              title='Relative Cumulative Engagement vs Time',
              color_discrete_map={
                0: '#fac73c',
                1: '#de6f58'
              },
             )

manual_label = ['Non-unfair Offering', 'Unfair Offering']

for i in range(len(manual_label)):
    topic_number = fig.data[i].name
    if topic_number == '0':
      fig.data[i].name = manual_label[0]
      fig.data[i].hovertemplate = 'Topic: '+f"{manual_label[0]}"+'<br>Date: %{x}<br>Relative Cumulative Engagement: %{y}<extra></extra>'
    elif topic_number == '1':
      fig.data[i].name = manual_label[1]
      fig.data[i].hovertemplate = 'Topic: '+f"{manual_label[1]}"+'<br>Date: %{x}<br>Relative Cumulative Engagement: %{y}<extra></extra>'

fig.update_xaxes(nticks=8)

fig.update_layout(height=1080*0.5,
                  width=1920*0.6,
                  margin=dict(l=120, r=200),
                  title=dict(font=dict(color='#0e0f11', family='Roboto Medium', size=20)),
                  legend=dict(title="Topic", font=dict(color='#0e0f11', family='Roboto Regular', size=12)),
                  paper_bgcolor='#ffffff',
                  plot_bgcolor='#ffffff',
                 )

fig.update_xaxes(showline=True,
                 zeroline=False,
                 showgrid=True,
                 linewidth=1,
                 gridwidth=0.5,
                 linecolor='#0e0f11',
                 gridcolor='rgba(14, 15, 17, 0.2)',
                 tickfont=dict(color='#0e0f11', family='Roboto Regular', size=12),
                 title=dict(text="", font=dict(color='#0e0f11', family='Roboto', size=12)),
                 )

fig.update_yaxes(showline=True,
                 zeroline=False,
                 showgrid=True,
                 linewidth=1,
                 gridwidth=0.5,
                 linecolor='#0e0f11',
                 gridcolor='rgba(14, 15, 17, 0.2)',
                 tickfont=dict(color='#0e0f11', family='Roboto Regular', size=12),
                 title=dict(text="Engagement*", font=dict(color='#0e0f11', family='Roboto Regular', size=12)),
                 )

fig.add_annotation(
    x=1,
    y=-0.45*(1/5)-0.08,
    text="*Engagement = (Upvotes + Comments) × Upvote:Downvote Ratio",
    showarrow=False,
    xref='paper',
    yref='paper',
    align='right',
    font=dict(color='rgba(14, 15, 17, 0.7)', family='Roboto Italic', size=12),
  )

fig.add_trace(go.Scatter(
  x= ['Mar 2022', 'Mar 2022'],
  y= [0, 1],
  mode='lines',
  showlegend=False,
  # name='ahaha',
  hoverinfo='text',
  hovertext='March 2022',
  line=dict(color='red', width=1, dash='dash')))

# Show the plot
fig.show()

#### LDA Label

In [95]:
# LDA Labeled

total_engage = df.groupby(['Date'])['Engagements'].sum().cumsum()

cumulative_engage = pd.DataFrame(df.groupby(['Date','LDA Topic'])['Engagements'].sum())
cumulative_engage = cumulative_engage.pivot_table('Engagements', 'Date', 'LDA Topic').fillna(0).cumsum()
rel_engage = cumulative_engage.div(total_engage, axis=0)

cumulative_engage = cumulative_engage.stack(0).reset_index()
cumulative_engage.columns = ['Date', 'LDA Topic', 'Engagements']
cumulative_engage['Date'] = [x.strftime('%b %Y') for x in cumulative_engage['Date']]

monthly_engage = df.groupby(['Date','LDA Topic'])['Engagements'].sum()
monthly_engage = monthly_engage.reset_index()
monthly_engage.columns = ['Date', 'LDA Topic', 'Engagements']
monthly_engage['Date'] = [x.strftime('%b %Y') for x in monthly_engage['Date']]

rel_engage = rel_engage.stack(0).reset_index()
rel_engage.columns = ['Date', 'LDA Topic', 'Relative Engagements']
rel_engage['Date'] = [x.strftime('%b %Y') for x in rel_engage['Date']]

##### Monthly Engagement vs Time

In [96]:
df['LDA Topic'] = df['LDA Topic'].astype(str)
df['Union'] = df['Union'].astype(str)

fig = px.area(monthly_engage, x='Date', y='Engagements', color='LDA Topic',
              # category_orders={'LDA Topic':['1', '2', '3', '4', '5']},
              title='Monthly Engagement vs Time',
              color_discrete_map={
                '1': '#ffb000',
                '2': '#785ef0',
                '3': '#dc267f',
                '4': '#fe6100',
                '5': '#648fff'
              },
             )

LDA_label = ['Employee Exit Process', 'BPO Work and Culture', 'Job Application', 'Workplace Management', 'Employee Well-being']

for i in range(len(LDA_label)):
    topic_number = fig.data[i].name
    if topic_number == '1':
      fig.data[i].name = LDA_label[0]
      fig.data[i].hovertemplate = 'Topic: '+f"{LDA_label[0]}"+'<br>Date: %{x}<br>Monthly Engagement: %{y}<extra></extra>'
    elif topic_number == '2':
      fig.data[i].name = LDA_label[1]
      fig.data[i].hovertemplate = 'Topic: '+f"{LDA_label[1]}"+'<br>Date: %{x}<br>Monthly Engagement: %{y}<extra></extra>'
    elif topic_number == '3':
      fig.data[i].name = LDA_label[2]
      fig.data[i].hovertemplate = 'Topic: '+f"{LDA_label[2]}"+'<br>Date: %{x}<br>Monthly Engagement: %{y}<extra></extra>'
    elif topic_number == '4':
      fig.data[i].name = LDA_label[3]
      fig.data[i].hovertemplate = 'Topic: '+f"{LDA_label[3]}"+'<br>Date: %{x}<br>Monthly Engagement: %{y}<extra></extra>'
    elif topic_number == '5':
      fig.data[i].name = LDA_label[4]
      fig.data[i].hovertemplate = 'Topic: '+f"{LDA_label[4]}"+'<br>Date: %{x}<br>Monthly Engagement: %{y}<extra></extra>'

fig.update_xaxes(nticks=12)

fig.update_layout(height=1080*0.5,
                  width=1920*0.6,
                  margin=dict(l=120, r=200),
                  title=dict(font=dict(color='#0e0f11', family='Roboto Medium', size=20)),
                  legend=dict(title="Topic", font=dict(color='#0e0f11', family='Roboto Regular', size=12)),
                  paper_bgcolor='#ffffff',
                  plot_bgcolor='#ffffff',
                 )

fig.update_xaxes(showline=True,
                 zeroline=False,
                 showgrid=True,
                 linewidth=1,
                 gridwidth=0.5,
                 linecolor='#0e0f11',
                 gridcolor='rgba(14, 15, 17, 0.2)',
                 tickfont=dict(color='#0e0f11', family='Roboto Regular', size=12),
                 title=dict(text="", font=dict(color='#0e0f11', family='Roboto', size=12)),
                 )

fig.update_yaxes(showline=True,
                 zeroline=False,
                 showgrid=True,
                 linewidth=1,
                 gridwidth=0.5,
                 linecolor='#0e0f11',
                 gridcolor='rgba(14, 15, 17, 0.2)',
                 tickfont=dict(color='#0e0f11', family='Roboto Regular', size=12),
                 title=dict(text="Engagement*", font=dict(color='#0e0f11', family='Roboto Regular', size=12)),
                 )

fig.add_annotation(
    x=1,
    y=-0.45*(1/5)-0.08,
    text="*Engagement = (Upvotes + Comments) × Upvote:Downvote Ratio",
    showarrow=False,
    xref='paper',
    yref='paper',
    align='right',
    font=dict(color='rgba(14, 15, 17, 0.7)', family='Roboto Italic', size=12),
  )

fig.add_trace(go.Scatter(
  x= ['Jun 2023', 'Jun 2023'],
  y= [0, 11000],
  mode='lines',
  showlegend=False,
  # name='ahaha',
  hoverinfo='text',
  hovertext='June 2023',
  line=dict(color='red', width=1, dash='dash')))


# Show the plot
fig.show()

##### Cumulative Engagement vs Time

In [97]:
df['LDA Topic'] = df['LDA Topic'].astype(str)
df['Union'] = df['Union'].astype(str)

fig = px.line(cumulative_engage, x='Date', y='Engagements', color='LDA Topic',
              title='Cumulative Engagement vs Time',
              # category_orders={'LDA Topic':['1', '2', '3', '4', '5']},
              color_discrete_map={
                '1': '#ffb000',
                '2': '#785ef0',
                '3': '#dc267f',
                '4': '#fe6100',
                '5': '#648fff'
              },
             )

LDA_label = ['Employee Exit Process', 'BPO Work and Culture', 'Job Application', 'Workplace Management', 'Employee Well-being']

for i in range(len(LDA_label)):
    topic_number = fig.data[i].name
    if topic_number == '1':
      fig.data[i].name = LDA_label[0]
      fig.data[i].hovertemplate = 'Topic: '+f"{LDA_label[0]}"+'<br>Date: %{x}<br>Monthly Engagement: %{y}<extra></extra>'
    elif topic_number == '2':
      fig.data[i].name = LDA_label[1]
      fig.data[i].hovertemplate = 'Topic: '+f"{LDA_label[1]}"+'<br>Date: %{x}<br>Monthly Engagement: %{y}<extra></extra>'
    elif topic_number == '3':
      fig.data[i].name = LDA_label[2]
      fig.data[i].hovertemplate = 'Topic: '+f"{LDA_label[2]}"+'<br>Date: %{x}<br>Monthly Engagement: %{y}<extra></extra>'
    elif topic_number == '4':
      fig.data[i].name = LDA_label[3]
      fig.data[i].hovertemplate = 'Topic: '+f"{LDA_label[3]}"+'<br>Date: %{x}<br>Monthly Engagement: %{y}<extra></extra>'
    elif topic_number == '5':
      fig.data[i].name = LDA_label[4]
      fig.data[i].hovertemplate = 'Topic: '+f"{LDA_label[4]}"+'<br>Date: %{x}<br>Monthly Engagement: %{y}<extra></extra>'

fig.update_xaxes(nticks=12)

fig.update_layout(height=1080*0.5,
                  width=1920*0.6,
                  margin=dict(l=120, r=200),
                  title=dict(font=dict(color='#0e0f11', family='Roboto Medium', size=20)),
                  legend=dict(title="Topic", font=dict(color='#0e0f11', family='Roboto Regular', size=12)),
                  paper_bgcolor='#ffffff',
                  plot_bgcolor='#ffffff',
                 )

fig.update_xaxes(showline=True,
                 zeroline=False,
                 showgrid=True,
                 linewidth=1,
                 gridwidth=0.5,
                 linecolor='#0e0f11',
                 gridcolor='rgba(14, 15, 17, 0.2)',
                 tickfont=dict(color='#0e0f11', family='Roboto Regular', size=12),
                 title=dict(text="", font=dict(color='#0e0f11', family='Roboto', size=12)),
                 )

fig.update_yaxes(showline=True,
                 zeroline=False,
                 showgrid=True,
                 linewidth=1,
                 gridwidth=0.5,
                 linecolor='#0e0f11',
                 gridcolor='rgba(14, 15, 17, 0.2)',
                 tickfont=dict(color='#0e0f11', family='Roboto Regular', size=12),
                 title=dict(text="Engagement*", font=dict(color='#0e0f11', family='Roboto Regular', size=12)),
                 )

fig.add_annotation(
    x=1,
    y=-0.45*(1/5)-0.08,
    text="*Engagement = (Upvotes + Comments) × Upvote:Downvote Ratio",
    showarrow=False,
    xref='paper',
    yref='paper',
    align='right',
    font=dict(color='rgba(14, 15, 17, 0.7)', family='Roboto Italic', size=12),
  )


fig.add_trace(go.Scatter(
  x= ['Jun 2023', 'Jun 2023'],
  y= [0, 50000],
  mode='lines',
  showlegend=False,
  # name='ahaha',
  hoverinfo='text',
  hovertext='June 2023 <br> SOMETHING HUGE HAPPENED',
  line=dict(color='red', width=1, dash='dash')))

# Show the plot
fig.show()

##### Cumulative Relative Engagement vs Time

In [98]:
df['LDA Topic'] = df['LDA Topic'].astype(str)
df['Union'] = df['Union'].astype(str)

fig = px.line(rel_engage, x='Date', y='Relative Engagements', color='LDA Topic',
              title='Relative Cumulative Engagement vs Time',
              # category_orders={'LDA Topic':['1', '2', '3', '4', '5']},
              color_discrete_map={
                '1': '#ffb000',
                '2': '#785ef0',
                '3': '#dc267f',
                '4': '#fe6100',
                '5': '#648fff'
              },
             )

LDA_label = ['Employee Exit Process', 'BPO Work and Culture', 'Job Application', 'Workplace Management', 'Employee Well-being']

for i in range(len(LDA_label)):
    topic_number = fig.data[i].name
    if topic_number == '1':
      fig.data[i].name = LDA_label[0]
      fig.data[i].hovertemplate = 'Topic: '+f"{LDA_label[0]}"+'<br>Date: %{x}<br>Monthly Engagement: %{y}<extra></extra>'
    elif topic_number == '2':
      fig.data[i].name = LDA_label[1]
      fig.data[i].hovertemplate = 'Topic: '+f"{LDA_label[1]}"+'<br>Date: %{x}<br>Monthly Engagement: %{y}<extra></extra>'
    elif topic_number == '3':
      fig.data[i].name = LDA_label[2]
      fig.data[i].hovertemplate = 'Topic: '+f"{LDA_label[2]}"+'<br>Date: %{x}<br>Monthly Engagement: %{y}<extra></extra>'
    elif topic_number == '4':
      fig.data[i].name = LDA_label[3]
      fig.data[i].hovertemplate = 'Topic: '+f"{LDA_label[3]}"+'<br>Date: %{x}<br>Monthly Engagement: %{y}<extra></extra>'
    elif topic_number == '5':
      fig.data[i].name = LDA_label[4]
      fig.data[i].hovertemplate = 'Topic: '+f"{LDA_label[4]}"+'<br>Date: %{x}<br>Monthly Engagement: %{y}<extra></extra>'

fig.update_xaxes(nticks=12)

fig.update_layout(height=1080*0.5,
                  width=1920*0.6,
                  margin=dict(l=120, r=200),
                  title=dict(font=dict(color='#0e0f11', family='Roboto Medium', size=20)),
                  legend=dict(title="Topic", font=dict(color='#0e0f11', family='Roboto Regular', size=12)),
                  paper_bgcolor='#ffffff',
                  plot_bgcolor='#ffffff',
                 )

fig.update_xaxes(showline=True,
                 zeroline=False,
                 showgrid=True,
                 linewidth=1,
                 gridwidth=0.5,
                 linecolor='#0e0f11',
                 gridcolor='rgba(14, 15, 17, 0.2)',
                 tickfont=dict(color='#0e0f11', family='Roboto Regular', size=12),
                 title=dict(text="", font=dict(color='#0e0f11', family='Roboto', size=12)),
                 )

fig.update_yaxes(showline=True,
                 zeroline=False,
                 showgrid=True,
                 linewidth=1,
                 gridwidth=0.5,
                 linecolor='#0e0f11',
                 gridcolor='rgba(14, 15, 17, 0.2)',
                 tickfont=dict(color='#0e0f11', family='Roboto Regular', size=12),
                 title=dict(text="Engagement*", font=dict(color='#0e0f11', family='Roboto Regular', size=12)),
                 )
fig.add_annotation(
    x=1,
    y=-0.45*(1/5)-0.08,
    text="*Engagement = (Upvotes + Comments) × Upvote:Downvote Ratio",
    showarrow=False,
    xref='paper',
    yref='paper',
    align='right',
    font=dict(color='rgba(14, 15, 17, 0.7)', family='Roboto Italic', size=12),
  )

fig.add_trace(go.Scatter(
  x= ['Apr 2022', 'Apr 2022'],
  y= [0, 1],
  mode='lines',
  showlegend=False,
  # name='ahaha',
  hoverinfo='text',
  hovertext='April 2023',
  line=dict(color='red', width=1, dash='dash')))

# Show the plot
fig.show()