In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
from IPython.display import Image

import os, sys, re, datetime, time, copy
from pathlib import Path

pj_dir = Path(os.getcwd()).parents[1]
data_dir = pj_dir/'data'
img_dir = pj_dir/'images'
src_dir = pj_dir/'src'
sys.path.append(str(src_dir))

from matplotlib import pyplot as plt
import jpholiday
from tqdm import tqdm_notebook
from dotenv import load_dotenv

import seaborn as sns
plt.style.use("bmh")
import numpy as np
import pandas as pd
import dask.dataframe as dd
import requests

import MeCab
from sklearn.manifold import TSNE
from wordcloud import WordCloud

from gensim import models
from gensim.models.doc2vec import TaggedDocument

In [None]:
import matplotlib
matplotlib.rcParams["figure.figsize"] = (16, 4)
plt.rcParams["font.family"] = "IPAexGothic"
import logging
logging.basicConfig(level=logging.INFO)

In [None]:
pd.set_option("display.max_rows", 100)
load_dotenv(pj_dir/'.env')

# Slackデータの取得

In [None]:
token = os.environ.get('SLACK_TOKEN')

In [None]:
headers = {
    "Content-type": "application/json",
    "Authorization": f"Bearer {token}"
}

In [None]:
def fetch_messages_by_channel(channe_id):
    oldest_ts = None
    one_year_ago = pd.to_datetime('2017-12-31')
    endpoint = 'https://slack.com/api/channels.history'

    ls_messages = []
    while True:
        payload = {
            'channel': channel_id,
            'latest': oldest_ts,
            'count': 1000
        }

        data = requests.get(endpoint, headers=headers, params=payload).json()
        messages = data['messages']
        ls_messages.extend(messages)

        if data['has_more']:
            time.sleep(1)
            oldest_ts = messages[-1]['ts']
            oldest_datetime = pd.to_datetime(oldest_ts, unit='s')
            sys.stdout.write(f"\r{oldest_datetime}")
            sys.stdout.flush()
            if oldest_datetime < one_year_ago:
                sys.stdout.write(f"\rfinish!" + ' '*50)
                break
        else:
            break
    df = pd.DataFrame(ls_messages)
    df['channel_id'] = channel_id
    return df

In [None]:
ls_df = []
ls_err_channel_id = []
for i, row in tqdm_notebook(df_channel.iterrows()):
    channel_id = row['id']
    try:
        df = fetch_messages_by_channel(channel_id)
    except:
        print(f"Error on {row['name']}")
        ls_err_channel_id.append(channel_id)
    else:
        ls_df.append(df)
    time.sleep(1)

In [None]:
df = pd.concat(ls_df)

In [None]:
endpoint = 'https://slack.com/api/channels.list'
payload = {}

data = requests.get(endpoint, headers=headers, params=payload).json()
df_channel = pd.DataFrame(data['channels'])

In [None]:
endpoint = 'https://slack.com/api/users.list'
payload = {}

data = requests.get(endpoint, headers=headers, params=payload).json()
df_member = pd.DataFrame(data['members'])

# 保存

In [None]:
df_channel.to_pickle(data_dir/'kaizen_slack/channels.pickle')
df_member.to_pickle(data_dir/'kaizen_slack/members.pickle')
df.to_pickle(data_dir/'kaizen_slack/messages.pickle')

# ロード

In [None]:
dfall = pd.read_pickle(data_dir/'kaizen_slack/all_messages.pickle')
df_channel = pd.read_pickle(data_dir/'kaizen_slack/channels.pickle')
df_member = pd.read_pickle(data_dir/'kaizen_slack/members.pickle')
df = pd.read_pickle(data_dir/'kaizen_slack/messages.pickle')

# mapping作成

In [None]:
user_id_name_map = df_member.set_index('id')['name'].to_dict()

In [None]:
channel_name_id_map = df_channel.set_index('id')['name'].to_dict()

# 前処理

In [None]:
# datetimeに変換
df['dt'] = pd.to_datetime(df['ts'], unit='s')

# 1年前からの発言に絞る
one_year_ago = pd.to_datetime('2017-12-31')
df = df.query('@one_year_ago < dt')

# usernameをmap
df['username'] = df['user'].map(user_id_name_map)

# channel nameをmap
df['channel_name'] = df['channel_id'].map(channel_name_id_map)

# botを削除
df = df[df['bot_id'].isnull()]
df = df.query('username != "cronbot"').query('username != "slackbot"')

# メッセージだけに絞る

In [None]:
not_message_types = ['channel_join', 'channel_leave', 'channel_topic', 'channel_archive', 'channel_purpose', 'sh_room_created', 'channel_name', 'pinned_item', 'reminder_add', 'app_conversation_join']
df = df[~df['subtype'].isin(not_message_types)]

# @されてるユーザー

In [None]:
df['at_user'] = df['text'].str.extract(r'(?<=<@)(.{1,9})(?=>)')
df['at_username'] = df['at_user'].map(user_id_name_map)

# 発言数

In [None]:
s = df['username'].value_counts()[:5]

n = s.shape[0]
fig = plt.figure(figsize=(16,1*n))

tmp_df = s.to_frame('value').reset_index().rename(columns={'index': 'name'})
ax = sns.barplot(x='value', y='name', palette="autumn", data=tmp_df)
max_ = tmp_df['value'].max()

for i, (_, row) in enumerate(tmp_df.iterrows()):
    text = ax.text(row['value'] + max_*.05, i+0.1, row['value'], color='black', ha="center", fontsize=20)

[spine.set_visible(False) for spine in ax.spines.values()]
ax.tick_params(bottom=False, left=False, labelbottom=False)
ax.tick_params(axis='y', labelsize=20)
ax.set_xlabel('')
ax.set_ylabel('')
ax.set_title('2018年 発言数 TOP5', fontsize=30)
ax.patch.set_facecolor('white') 

ax.patch.set_alpha(0)
plt.grid(False)

# @された数

In [None]:
s = df['at_username'].value_counts()[:5]

n = s.shape[0]
fig = plt.figure(figsize=(16,1*n))

tmp_df = s.to_frame('value').reset_index().rename(columns={'index': 'name'})
ax = sns.barplot(x='value', y='name', data=tmp_df, palette="autumn")
max_ = tmp_df['value'].max()

for i, (_, row) in enumerate(tmp_df.iterrows()):
    text = ax.text(row['value'] + max_*.05, i+0.1, row['value'], color='black', ha="center", fontsize=20)

[spine.set_visible(False) for spine in ax.spines.values()]
ax.tick_params(bottom=False, left=False, labelbottom=False)
ax.tick_params(axis='y', labelsize=20)
ax.set_xlabel('')
ax.set_ylabel('')
ax.set_title('2018年 @された数 TOP5', fontsize=30)
ax.patch.set_facecolor('white') 

ax.patch.set_alpha(0)
plt.grid(False)

# 一番使われたリアクションは？

In [None]:
from collections import defaultdict

In [None]:
count_reaction = defaultdict(int)
for reactions in tqdm_notebook(df['reactions'].fillna('')):
    if len(reactions) == 0:
        continue
    for reaction in reactions:
        name = reaction['name']
        count = len(reaction['users'])
        count_reaction[name] += count

In [None]:
s = pd.Series(count_reaction).sort_values(ascending=False)[:5]

n = s.shape[0]
fig = plt.figure(figsize=(16,1*n))

tmp_df = s.to_frame('value').reset_index().rename(columns={'index': 'name'})
ax = sns.barplot(x='value', y='name', data=tmp_df, palette="autumn")
max_ = tmp_df['value'].max()

for i, (_, row) in enumerate(tmp_df.iterrows()):
    text = ax.text(row['value'] + max_*.05, i+0.1, row['value'], color='black', ha="center", fontsize=20)

[spine.set_visible(False) for spine in ax.spines.values()]
ax.tick_params(bottom=False, left=False, labelbottom=False)
ax.tick_params(axis='y', labelsize=20)
ax.set_xlabel('')
ax.set_ylabel('')
ax.set_title('2018年 使われたリアクション TOP10', fontsize=30)
ax.patch.set_facecolor('white') 

ax.patch.set_alpha(0)
plt.grid(False)

# 時系列で見た発言数

In [None]:
is_weekday_date =[dt for dt in pd.date_range('2018-1-1', '2018-12-16', freq='1D') if dt.weekday() in [0, 1, 2, 3, 4] and not jpholiday.is_holiday(dt.date())]

In [None]:
fig = plt.figure(figsize=(16, 4))
ax = fig.add_subplot(1, 1, 1)
ax = df.groupby(pd.Grouper(key='dt', freq='1D')).size()[is_weekday_date].plot(linewidth=2, linestyle='--', ax=ax)
ax = df.groupby(pd.Grouper(key='dt', freq='1D')).size()[is_weekday_date].rolling(window=5).mean().plot(linewidth=5, ax=ax)
ax.set_title('1日あたりの発言数(休日祝日は除く) ※破線が実数、実線が周期5の移動平均', fontsize=20)
ax.tick_params(axis='both', labelsize='xx-large')
ax.set_xlabel('')
ax.set_ylabel('')

# Channel数

In [None]:
df_channel['created_dt'] = pd.to_datetime(df_channel['created'], unit='s')

In [None]:
s_added = df_channel.groupby(pd.Grouper(key='created_dt', freq='1W')).size()
s_archived = dfall.query('subtype == "channel_archive"').groupby(pd.Grouper(key='datetime', freq='1W')).size()

# transactionデータ作成

In [None]:
df_transition_channel = pd.concat([s_added, s_archived], axis=1).rename(columns={0: 'added', 1: 'archived'})
df_transition_channel.fillna(0, inplace=True)
df_transition_channel['count_channel'] = (df_transition_channel['added'] - df_transition_channel['archived']).cumsum()

In [None]:
df_transition_channel[one_year_ago < df_transition_channel.index]['added'].sum(), df_transition_channel[one_year_ago < df_transition_channel.index]['archived'].sum()

In [None]:
fig = plt.figure(figsize=(16, 8))
ax = fig.add_subplot(1, 1, 1)
s = df_transition_channel['count_channel']
ax = s.plot(linewidth=5, linestyle='-', ax=ax, label='総Channel数')
ax.set_title('左軸: Channel数   右軸: 作成/アーカイブ数', fontsize=20)
ax.tick_params(axis='both', labelsize=20)
ax.set_xlabel('')
ax.set_ylabel('')
plt.legend(fontsize=20, loc='lower right')

ax2 = ax.twinx()
s = df_transition_channel['added'].rolling(window=7).mean()
s.plot(linewidth=3, linestyle='--', ax=ax2, label='作成数/day', color='C1')
s = df_transition_channel['archived'].rolling(window=7).mean()
s.plot(linewidth=3, linestyle='--', ax=ax2, label='アーカイブ数/day', color='C3')
ax2.tick_params(axis='both', labelsize=15)
plt.legend(fontsize=20)

# 時系列細かく

In [None]:
weekday_str_map = {
    0: '月', 1: '火', 2: '水', 3: '木', 4: '金', 5: '土', 6: '日'
}

In [None]:
df_daily = df.groupby(pd.Grouper(key='dt', freq='1D')).size()[is_weekday_date].to_frame('count')

In [None]:
df_daily['weekday'] = df_daily.index.weekday.map(weekday_str_map)
df_daily['day_in_month'] = df_daily.index.day

In [None]:
fig = plt.figure(figsize=(16, 4))
ax = fig.add_subplot(1, 1, 1)
ax = sns.boxplot(data=df_daily.sort_values('weekday'), x='weekday', y='count')
ax.set_title('曜日による発言数の分布', fontsize=20)
ax.tick_params(axis='both', labelsize='x-large')
ax.set_xlabel('')
ax.set_ylabel('')

In [None]:
fig = plt.figure(figsize=(16, 4))
ax = fig.add_subplot(1, 1, 1)
ax = sns.boxplot(data=df_daily.sort_values('day_in_month'), x='day_in_month', y='count')
ax.set_title('日付による発言数の分布', fontsize=20)
ax.tick_params(axis='both', labelsize='x-large')
ax.set_xlabel('')
ax.set_ylabel('')

In [None]:
is_weekday = df['dt'].dt.weekday.isin([0, 1, 2, 3, 4]) & ~df['dt'].dt.date.apply(jpholiday.is_holiday)

In [None]:
df_hourly = df[is_weekday].groupby(pd.Grouper(key='dt', freq='1h')).size().to_frame('count')
df_hourly['hour'] = df_hourly.index.hour + 9
work_hours = list(range(9, 20))

fig = plt.figure(figsize=(16, 4))
ax = fig.add_subplot(1, 1, 1)
ax = sns.boxplot(data=df_hourly.query('hour in @work_hours').sort_values('hour'), x='hour', y='count')
ax.set_title('時間帯による発言数の分布', fontsize=20)
ax.tick_params(axis='both', labelsize='x-large')
ax.set_xlabel('')
ax.set_ylabel('')

In [None]:
s = df.groupby('channel_name').size().sort_values(ascending=False)[:5]

n = s.shape[0]
fig = plt.figure(figsize=(16,1*n))

tmp_df = s.to_frame('value').reset_index().rename(columns={'channel_name': 'name'})
ax = sns.barplot(x='value', y='name', data=tmp_df, palette="autumn")
max_ = tmp_df['value'].max()

for i, (_, row) in enumerate(tmp_df.iterrows()):
    text = ax.text(row['value'] + max_*.05, i+0.1, row['value'], color='black', ha="center", fontsize=20)

[spine.set_visible(False) for spine in ax.spines.values()]
ax.tick_params(bottom=False, left=False, labelbottom=False)
ax.tick_params(axis='y', labelsize=20)
ax.set_xlabel('')
ax.set_ylabel('')
ax.set_title('2018年 発言が多かったChannel TOP5', fontsize=30)
ax.patch.set_facecolor('white') 

ax.patch.set_alpha(0)
plt.grid(False)

In [None]:
fig = plt.figure(figsize=(16, 4))
ax = fig.add_subplot(1, 1, 1)
ax = df.query('channel_name == "ad-cs"').groupby(pd.Grouper(key='dt', freq='1D')).size()[is_weekday_date].plot(linewidth=2, linestyle='--', ax=ax)
ax = df.query('channel_name == "ad-cs"').groupby(pd.Grouper(key='dt', freq='1D')).size()[is_weekday_date].rolling(window=5).mean().plot(linewidth=5, ax=ax)
ax.set_title('ad-csの1日あたりの発言数(休日祝日は除く) ※破線が実数、実線が周期5の移動平均', fontsize=20)
ax.tick_params(axis='both', labelsize='xx-large')
ax.set_xlabel('')
ax.set_ylabel('')

In [None]:
fig = plt.figure(figsize=(16, 4))
ax = fig.add_subplot(1, 1, 1)
ax = df.query('channel_name == "times_ikedayu"').groupby(pd.Grouper(key='dt', freq='1D')).size()[is_weekday_date].plot(linewidth=2, linestyle='--', ax=ax)
ax = df.query('channel_name == "times_ikedayu"').groupby(pd.Grouper(key='dt', freq='1D')).size()[is_weekday_date].rolling(window=5).mean().plot(linewidth=5, ax=ax)
ax.set_title('times_ikedayuの1日あたりの発言数(休日祝日は除く) ※破線が実数、実線が周期5の移動平均', fontsize=20)
ax.tick_params(axis='both', labelsize='xx-large')
ax.set_xlabel('')
ax.set_ylabel('')

In [None]:
# s = df[df['channel_name'].str.contains('times_')].groupby('channel_name').size().sort_values(ascending=False)[:5]

# n = s.shape[0]
# fig = plt.figure(figsize=(16,1*n))

# tmp_df = s.to_frame('value').reset_index().rename(columns={'channel_name': 'name'})
# ax = sns.barplot(x='value', y='name', data=tmp_df, palette="autumn")
# max_ = tmp_df['value'].max()

# for i, (_, row) in enumerate(tmp_df.iterrows()):
#     text = ax.text(row['value'] + max_*.05, i+0.1, row['value'], color='black', ha="center", fontsize=20)

# [spine.set_visible(False) for spine in ax.spines.values()]
# ax.tick_params(bottom=False, left=False, labelbottom=False)
# ax.tick_params(axis='y', labelsize=20)
# ax.set_xlabel('')
# ax.set_ylabel('')
# ax.set_title('2018年 発言が多かったtimes TOP5', fontsize=30)
# ax.patch.set_facecolor('white') 

# ax.patch.set_alpha(0)
# plt.grid(False)

In [None]:
def plot_hist(s, title, bins=30, xlabel='', ylabel=''):
    fig = plt.figure(figsize=(16, 4))
    ax = fig.add_subplot(1, 1, 1)

    mean =s.mean().round(2)
    median = s.median().round(2)
    std = s.std().round(2)

    sns.distplot(s, ax=ax, bins=bins, kde_kws={"color": "k", "lw": 3})
    ax.set_title(title, fontsize=20)
    ax.tick_params(axis = 'x', which = 'major', labelsize = 20)
    vals = ax.get_yticks()
    ax.set_xlabel(xlabel, fontsize=20)
    ax.set_ylabel(ylabel, fontsize=20)
    ax.set_yticklabels(['{:,.2%}'.format(x) for x in vals])
    ax.text( 0.99, 0.99, f"平均値: {mean:.2f} \n 中央値: {median:.2f} \n 標準偏差: {std:.2f}", horizontalalignment='right', verticalalignment='top', transform=ax.transAxes, fontsize=20)

In [None]:
s = df.groupby('channel_name').size().sort_values(ascending=False)
s = s[s > 0]
plot_hist(s, 'チャンネルごとの発言数のヒストグラム', bins=100, xlabel='発言数')

In [None]:
s = df.groupby('channel_name').size().sort_values(ascending=False)
sum_ = s.sum()
tmp_df = pd.concat([s, s.cumsum(), 100*s.cumsum()/sum_], axis=1)
tmp_df.columns = ['number', 'cumsum', 'cumsum_percent']

fig = plt.figure(figsize=(16, 4))
ax = fig.add_subplot(1, 1, 1)

n = tmp_df.shape[0]
x = np.arange(0, n)
ax.plot(x, tmp_df['cumsum_percent'], linewidth=5)
ax.tick_params(axis='both', labelsize=20)
ax.set_xlabel('Channel数', fontsize=20)
ax.set_ylabel('発言数の累積％', fontsize=20)
ax.set_title('Slcak Channelと発言数のパレート図', fontsize=30)

# 自然言語処理

In [None]:
df['text_trimed'] = df['text'].str.replace(r'<\S+>', '').str.replace(r':\S+:', '').str.replace('\n', '')

# Doc2Vec

In [None]:
tmp_df = df.groupby('channel_name').size().sort_values(ascending=False)
top100_channel = tmp_df[:100].index.tolist()
top10_channel = tmp_df[:10].index.tolist()
top20_channel = tmp_df[:20].index.tolist()

In [None]:
grouped = df.query('channel_name in @top100_channel').groupby('channel_name')
channel_words = {}
for channel_name, tmp_df in tqdm_notebook(grouped):
    doc = ''.join(tmp_df['text_trimed'].values.tolist())
    channel_words[channel_name] = split_into_words(doc)

In [None]:
def split_into_words(doc):
    mecab = MeCab.Tagger("-Ochasen -d /usr/local/lib/mecab/dic/mecab-ipadic-neologd")
    lines = mecab.parse(doc).splitlines()
    words = []
    for line in tqdm_notebook(lines):
        chunks = line.split('\t')
        if len(chunks) > 3 and (chunks[3].startswith('動詞') or chunks[3].startswith('形容詞') or (chunks[3].startswith('名詞') and not chunks[3].startswith('名詞-数'))):
            words.append(chunks[0])
    return words

In [None]:
stop_words = [ 'てる', 'いる', 'なる', 'れる', 'する', 'ある', 'こと', 'これ', 'さん', 'して', \
         'くれる', 'やる', 'くださる', 'そう', 'せる', 'した',  '思う',  \
         'それ', 'ここ', 'ちゃん', 'くん', '', 'て','に','を','は','の', 'が', 'と', 'た', 'し', 'で', \
         'ない', 'も', 'な', 'い', 'か', 'ので', 'よう', '', '思い', 'なっ', 'でき', 'いい', 'もの', 'あり', 'なり', 'ところ',
        'こちら', '本日', 'おり', 'ください', 'お願い', 'いたし', 'ため', 'いただき', 'gt', 'commented', 'on', '思っ', '行っ', 
        'しまっ', 'やっ', '行き', 'とき', 'できる', '自分', '書い', 'あと'
    ]

In [None]:
channel_name = 'general'

tmp_df = df.query(f'channel_name == "{channel_name}"')    
tmp_doc = ''.join(tmp_df['text_trimed'].values.tolist())
tmp_words = split_into_words(tmp_doc)

fig = plt.figure(figsize=(16, 10))
fpath = "/System/Library/Fonts/ヒラギノ角ゴシック W3.ttc"

wordcloud = WordCloud(background_color="white", width=900, height=500, font_path=fpath, stopwords=stop_words).generate(" ".join(tmp_words))
ax = fig.add_subplot(1, 1, 1)
ax.imshow(wordcloud)
[spine.set_visible(False) for spine in ax.spines.values()]

ax.set_yticklabels([])
ax.set_xticklabels([])
ax.grid(False)
ax.set_title(f'#{channel_name}', fontsize=20)

In [None]:
channel_name = 'random'

tmp_df = df.query(f'channel_name == "{channel_name}"')    
tmp_doc = ''.join(tmp_df['text_trimed'].values.tolist())
tmp_words = split_into_words(tmp_doc)

fig = plt.figure(figsize=(16, 10))
fpath = "/System/Library/Fonts/ヒラギノ角ゴシック W3.ttc"

wordcloud = WordCloud(background_color="white", width=900, height=500, font_path=fpath, stopwords=stop_words).generate(" ".join(tmp_words))
ax = fig.add_subplot(1, 1, 1)
ax.imshow(wordcloud)
[spine.set_visible(False) for spine in ax.spines.values()]

ax.set_yticklabels([])
ax.set_xticklabels([])
ax.grid(False)
ax.set_title(f'#{channel_name}', fontsize=20)

In [None]:
channel_name = 'times_ikedayu'

tmp_df = df.query(f'channel_name == "{channel_name}"')    
tmp_doc = ''.join(tmp_df['text_trimed'].values.tolist())
tmp_words = split_into_words(tmp_doc)

fig = plt.figure(figsize=(16, 10))
fpath = "/System/Library/Fonts/ヒラギノ角ゴシック W3.ttc"

wordcloud = WordCloud(background_color="white", width=900, height=500, font_path=fpath, stopwords=stop_words).generate(" ".join(tmp_words))
ax = fig.add_subplot(1, 1, 1)
ax.imshow(wordcloud)
[spine.set_visible(False) for spine in ax.spines.values()]

ax.set_yticklabels([])
ax.set_xticklabels([])
ax.grid(False)
ax.set_title(f'#{channel_name}', fontsize=20)

# Doc2Vec

In [None]:
sentences = []
for channel_name, words in channel_words.items():
    td = TaggedDocument(words=words, tags=[channel_name])
    sentences.append(td)

In [None]:
model = models.Doc2Vec(sentences, dm=0, vector_size=300, window=15, alpha=.025, min_alpha=.025, min_count=1, sample=1e-6)

print('\n訓練開始')
for epoch in range(20):
    print('Epoch: {}'.format(epoch + 1))
    model.train(sentences, total_examples=model.corpus_count, epochs=model.epochs)
    model.alpha -= (0.025 - 0.0001) / 19
    model.min_alpha = model.alpha

In [None]:
model.save(str(data_dir/'kaizen_slack/d2v.model'))

In [None]:
model = models.Doc2Vec.load(str(data_dir/'kaizen_slack/d2v.model'))

In [None]:
ls_similar_doc = []
for channel_name in top20_channel:
    similar_doc = {'channel_name': channel_name}
    similar_channels = model.docvecs.most_similar(channel_name, topn=3)
    for i, (cname, value) in enumerate(similar_channels):
        text = f"{cname}({value:.2f})"
        similar_doc[f'{i+1}位'] = text
    ls_similar_doc.append(similar_doc)

In [None]:
df_sim = pd.DataFrame(ls_similar_doc)[['channel_name', '1位', '2位', '3位']]

In [None]:
df_sim[df_sim['channel_name'].str.contains('times_')].to_clipboard(sep=';')

# t-SNE

In [None]:
target_channels = [c for c in top20_channel if 'kz-' not in c]
X = np.stack([model.docvecs[cname] for cname in target_channels])
X_embedded = TSNE(n_components=2, n_iter=100000, learning_rate=4).fit_transform(X)

In [None]:
fig = plt.figure(figsize=(16, 8))
ax = fig.add_subplot(1, 1, 1)

ax.scatter(X_embedded.T[0], X_embedded.T[1])
ax.set_xlim(-0.105, -0.094)
ax.set_ylim(-0.075, -0.063)
fontsize=19

for i, c_name in enumerate(target_channels):
    if c_name == 'ad-cs':
        ax.annotate(c_name, (X_embedded[i][0]+0.0001, X_embedded[i][1]+0.0005), fontsize=fontsize)
    elif c_name =='support-tech':
        ax.annotate(c_name, (X_embedded[i][0]+0.0001, X_embedded[i][1]-0.0005), fontsize=fontsize)
    elif c_name =='cs-engineering':
        ax.annotate(c_name, (X_embedded[i][0]+0.0001, X_embedded[i][1]-0.0005), fontsize=fontsize)
    elif c_name =='prd-random':
        ax.annotate(c_name, (X_embedded[i][0]+0.0001, X_embedded[i][1]-0.0005), fontsize=fontsize)
    elif c_name =='ad-dev-qa':
        ax.annotate(c_name, (X_embedded[i][0]-0.001, X_embedded[i][1]+0.0005), fontsize=fontsize)
    elif c_name =='prd-team-sre':
        ax.annotate(c_name, (X_embedded[i][0]+0.0001, X_embedded[i][1]-0.0007), fontsize=fontsize)
    else:
        ax.annotate(c_name, (X_embedded[i][0]+0.0001, X_embedded[i][1]+0.0001), fontsize=fontsize)