# Installs

In [1]:
!pip install -q -U kaleido

[K     |████████████████████████████████| 79.9 MB 105 kB/s 
[?25h

# Mount Drive

We mount our drive to access the data.
If you run the notebook, please set your data directory's path in the global variables.

In [2]:
from google.colab import drive, output
drive.mount('/content/drive')
BEEP = lambda: output.eval_js('new Audio("https://upload.wikimedia.org/wikipedia/commons/0/05/Beep-09.ogg").play()')

Mounted at /content/drive


# Imports

In [3]:
import kaleido
import pandas as pd
import numpy as np
import networkx as nx
from sklearn.manifold import TSNE
from sklearn.cluster import AgglomerativeClustering
from sklearn.metrics import silhouette_score
from matplotlib import pyplot as plt
import plotly.express as px
from typing import Union, List, Dict, Tuple
from tqdm.notebook import tqdm
from functools import lru_cache
import os
import warnings

# Globals

In [4]:
DATA_DIR_PATH = r"/content/drive/MyDrive/NLP_And_Social_Dynamics/Data"
IRA_DIR_PATH = os.path.join(DATA_DIR_PATH, 'IRA_tweets', 'Raw')
PREPROCESSED_DIR_PATH = os.path.join(DATA_DIR_PATH, 'preprocessed_data')
PLOTS_DIR_PATH = r"/content/drive/MyDrive/NLP_And_Social_Dynamics/plots/sensitivity_analysis"
WRITE_PLOTS = False
if not os.path.isdir(PLOTS_DIR_PATH):
    os.mkdir(PLOTS_DIR_PATH)

# Load Model's Results Data

In [5]:
mrdf = pd.read_csv(r"/content/drive/MyDrive/NLP_And_Social_Dynamics/Data/preprocessed_data/model_res.csv", usecols=lambda x: 'unname' not in x.lower())
for c in ['interactions_predictions', 'content_predictions', 'combined_predictions', 'user_label']:
  mrdf[c].fillna('non_troll', inplace=True)

col_renamer = {'troll_label': 'source_troll_label',
               'interactions_predictions': 'iemb_pred', 
               'content_predictions': 'cemb_pred', 
               'combined_predictions': 'combined_pred'}
mrdf.rename(columns=col_renamer, inplace=True)
cols_order = ['user_id', 'source_troll_label', 'user_label', 'iemb_pred', 
              'cemb_pred', 'combined_pred', 'number_of_communications']
cols_order += [col for col in mrdf.columns if col.startswith('i_emb')]
cols_order += [col for col in mrdf.columns if col.startswith('c_emb')]
cols_order += [c for c in mrdf.columns if c not in cols_order]
mrdf = mrdf[cols_order]
mrdf.head()

Unnamed: 0,user_id,source_troll_label,user_label,iemb_pred,cemb_pred,combined_pred,number_of_communications,i_emb_1,i_emb_2,i_emb_3,...,c_emb_374,c_emb_375,c_emb_376,c_emb_377,c_emb_378,c_emb_379,c_emb_380,c_emb_381,c_emb_382,c_emb_383
0,4055906223,RightTroll,non_troll,non_troll,non_troll,non_troll,3,0.462099,-0.188525,0.292322,...,0.289764,-0.386197,0.099142,-0.057479,-0.106457,-0.133397,0.069177,0.283184,0.188373,0.026228
1,19318314,LeftTroll,non_troll,non_troll,RightTroll,non_troll,1,-0.233147,0.455398,1.826569,...,-0.116023,-0.100995,0.598074,0.050329,-0.017465,0.066769,0.316101,-0.384427,-0.133672,-0.15022
2,91704353,HashtagGamer,non_troll,non_troll,non_troll,non_troll,1,0.379801,0.279609,-0.145845,...,-0.220358,-0.163694,0.492782,-0.073708,-0.285252,0.265858,0.2254,-0.308415,0.081859,0.263814
3,141457038,RightTroll,non_troll,non_troll,non_troll,non_troll,1,0.459218,-0.192166,0.292625,...,0.509176,-0.132968,0.052189,0.134628,-0.541072,0.071889,-0.112296,0.236368,0.833883,-0.326172
4,322879013,RightTroll,non_troll,non_troll,non_troll,non_troll,1,0.768351,0.328922,0.696026,...,0.584121,0.123896,0.606532,0.25565,-0.005507,0.289792,0.632154,-0.141409,0.076882,-0.289222


# Explore Model Results

In [6]:
mrdf.shape

(27228, 455)

In [7]:
mrdf['source_troll_label'].value_counts()

LeftTroll       10744
RightTroll       8448
HashtagGamer     7904
Newsfeed          126
Fearmonger          5
Unknown             1
Name: source_troll_label, dtype: int64

## Account Types Distribution

In [8]:
val_count = mrdf['source_troll_label'].value_counts()
x = val_count.keys().to_numpy()
y = val_count.values
fig_name = "Ego-centered Trolls Account-Types Distribution"
fig = px.bar(x=x, y=y, labels={'x': 'Labels', 'y': 'Count'}, log_y=True)
fig.update_layout(title_text=fig_name, title_x=0.5, font={'size': 14})
if WRITE_PLOTS:
    fig.write_image(os.path.join(PLOTS_DIR_PATH, f"{fig_name}.png".lower().replace(' ', '_')))
fig.show()

In [9]:
x = mrdf['user_label'].value_counts().keys().to_numpy()
y = mrdf['user_label'].value_counts().values
fig_name = "Ego-Networks Users Account-Types Distribution"
fig = px.bar(x=x, y=y, labels={'x': 'Labels', 'y': 'Count'}, log_y=True)
fig.update_layout(title_text=fig_name, title_x=0.5, font={'size': 14})
if WRITE_PLOTS:
    fig.write_image(os.path.join(PLOTS_DIR_PATH, f"{fig_name}.png".lower().replace(' ', '_')))
fig.show()

### Account Types Distribution (Per Source)

In [10]:
for src_trl in mrdf['source_troll_label'].unique():
    _fildf = mrdf[mrdf['source_troll_label'] == src_trl]
    x = _fildf['user_label'].value_counts().keys().to_numpy()
    y = _fildf['user_label'].value_counts().values
    fig_name = f"{src_trl}s-centered Network: Users Account-Types Distribution"
    fig = px.bar(x=x, y=y, labels={'x': 'Labels', 'y': 'Count'}, log_y=True)
    fig.update_layout(title_text=fig_name, title_x=0.5, font={'size': 14})
    if WRITE_PLOTS:
        fig.write_image(os.path.join(PLOTS_DIR_PATH, f"{fig_name}.png".lower().replace(' ', '_')))
    fig.show()

## Models-Classifications, Confusion Matrix

In [12]:
_cols = ['user_label', 'iemb_pred', 'cemb_pred', 'combined_pred']
_opts = sorted(mrdf['user_label'].unique(), reverse=True)
for _col in _cols:
    _types = sorted(mrdf[_col].unique(), reverse=True)
    _toprint = []
    for _opt in _opts:
        if _opt in _types:
          _toprint.append(_opt)
        else:
          _toprint.append('-' * len(_opt))
    _pad_size = (15-len(_col))//2
    _col = (' ' * _pad_size) + _col + 's' + (' ' * _pad_size)
    _col = (' ' * (16-len(_col))) + _col
    print(f"*{_col.upper()}:  ", ' | '.join(_toprint))

*   USER_LABELS  :   non_troll | Unknown | RightTroll | Newsfeed | LeftTroll | HashtagGamer | Fearmonger
*   IEMB_PREDS   :   non_troll | ------- | RightTroll | Newsfeed | LeftTroll | HashtagGamer | ----------
*   CEMB_PREDS   :   non_troll | ------- | RightTroll | Newsfeed | LeftTroll | HashtagGamer | Fearmonger
* COMBINED_PREDS :   non_troll | ------- | RightTroll | Newsfeed | LeftTroll | HashtagGamer | Fearmonger


In [13]:
y_true  = mrdf[_cols[0]]
_lbls = y_true.unique()
_lbls

array(['non_troll', 'Newsfeed', 'RightTroll', 'HashtagGamer', 'LeftTroll',
       'Unknown', 'Fearmonger'], dtype=object)

In [14]:
conf_matrices = {}
for _col in _cols[1:]:
    conf_mat = np.zeros((_lbls.shape[0], _lbls.shape[0]))
    conf_mat_log = np.zeros((_lbls.shape[0], _lbls.shape[0]))
    for g, _gold in enumerate(_lbls):
      filtered_df = mrdf[y_true == _gold]
      for p, _pred in enumerate(_lbls):
          _val = filtered_df[filtered_df[_col] == _pred].shape[0]
          conf_mat[g, p] = _val
          conf_mat_log[g, p] = np.log10(_val) if _val > 0 else 0
    conf_matrices[_col] = {'m': conf_mat, 'log': conf_mat_log}

In [16]:
import plotly.graph_objects as go
embs_dict = {'iemb_pred': 'interactions_embeddings_predictions', \
             'cemb_pred': 'content_embeddings_predictions', 
             'combined_pred': 'combined_predictions'}

for _emb, _cmat in conf_matrices.items():
    fig_name = f"{embs_dict[_emb]}_confusion_matrix"
    fig = go.Figure(data=go.Heatmap(x=_lbls, y=_lbls, z=_cmat['log'], 
                                    text=_cmat['m'], texttemplate="%{text}", 
                                    textfont={'size': 12}
                                    )
    )
    fig.update_layout(xaxis_title=f"Predicted ({_emb})", 
                      yaxis_title='Golden Truth', 
                      title_text=' '.join([x.capitalize() for x in fig_name.split('_')]), 
                      title_x=0.5, 
                      font={'size': 14}
                      )
    if WRITE_PLOTS:
        fig.write_image(os.path.join(PLOTS_DIR_PATH, f"{fig_name}.png".lower().replace(' ', '_')))
    fig.show()
    # fig = px.imshow(_cmat, x=_lbls, y=_lbls, labels={'x': f"Predicted ({_emb})", 'y': 'Golden Truth'}, text_auto=True)
    # fig.update_layout(title_text=f"{_emb} Confusion Matrix", title_x=0.5, font={'size': 14})
    # fig.show()

## Compare Confusion Matrices

In [38]:
mrdf.head()

Unnamed: 0,user_id,source_troll_label,user_label,iemb_pred,cemb_pred,combined_pred,number_of_communications,i_emb_1,i_emb_2,i_emb_3,...,c_emb_374,c_emb_375,c_emb_376,c_emb_377,c_emb_378,c_emb_379,c_emb_380,c_emb_381,c_emb_382,c_emb_383
0,4055906223,RightTroll,non_troll,non_troll,non_troll,non_troll,3,0.462099,-0.188525,0.292322,...,0.289764,-0.386197,0.099142,-0.057479,-0.106457,-0.133397,0.069177,0.283184,0.188373,0.026228
1,19318314,LeftTroll,non_troll,non_troll,RightTroll,non_troll,1,-0.233147,0.455398,1.826569,...,-0.116023,-0.100995,0.598074,0.050329,-0.017465,0.066769,0.316101,-0.384427,-0.133672,-0.15022
2,91704353,HashtagGamer,non_troll,non_troll,non_troll,non_troll,1,0.379801,0.279609,-0.145845,...,-0.220358,-0.163694,0.492782,-0.073708,-0.285252,0.265858,0.2254,-0.308415,0.081859,0.263814
3,141457038,RightTroll,non_troll,non_troll,non_troll,non_troll,1,0.459218,-0.192166,0.292625,...,0.509176,-0.132968,0.052189,0.134628,-0.541072,0.071889,-0.112296,0.236368,0.833883,-0.326172
4,322879013,RightTroll,non_troll,non_troll,non_troll,non_troll,1,0.768351,0.328922,0.696026,...,0.584121,0.123896,0.606532,0.25565,-0.005507,0.289792,0.632154,-0.141409,0.076882,-0.289222


In [39]:
_lbls

array(['non_troll', 'Newsfeed', 'RightTroll', 'HashtagGamer', 'LeftTroll',
       'Unknown', 'Fearmonger'], dtype=object)

In [42]:
mrdf[(mrdf['user_label'] == 'Fearmonger') & (mrdf['iemb_pred'] == 'LeftTroll')].shape[0]

1

In [43]:
iemb_confusion_matrix = -np.ones((_lbls.shape[0], _lbls.shape[0]), np.int32)
cemb_confusion_matrix = -np.ones((_lbls.shape[0], _lbls.shape[0]), np.int32)
comb_confusion_matrix = -np.ones((_lbls.shape[0], _lbls.shape[0]), np.int32)
for i_lab, glab in enumerate(_lbls):
    for j_lab, plab in enumerate(_lbls):
        iemb_confusion_matrix[i_lab, j_lab] = mrdf[(mrdf['user_label'] == glab) & (mrdf['iemb_pred'] == plab)].shape[0]
        cemb_confusion_matrix[i_lab, j_lab] = mrdf[(mrdf['user_label'] == glab) & (mrdf['cemb_pred'] == plab)].shape[0]
        comb_confusion_matrix[i_lab, j_lab] = mrdf[(mrdf['user_label'] == glab) & (mrdf['combined_pred'] == plab)].shape[0]

In [49]:
iemb_mat_df = pd.DataFrame(iemb_confusion_matrix, index=_lbls, columns=_lbls)
cemb_mat_df = pd.DataFrame(cemb_confusion_matrix, index=_lbls, columns=_lbls)
comb_mat_df = pd.DataFrame(comb_confusion_matrix, index=_lbls, columns=_lbls)
iemb_mat_df

Unnamed: 0,non_troll,Newsfeed,RightTroll,HashtagGamer,LeftTroll,Unknown,Fearmonger
non_troll,25832,0,27,5,2,0,0
Newsfeed,156,0,1,0,1,0,0
RightTroll,120,2,269,8,6,0,0
HashtagGamer,133,0,9,319,0,0,0
LeftTroll,92,0,9,1,230,0,0
Unknown,1,0,1,0,0,0,0
Fearmonger,3,0,0,0,1,0,0


In [52]:
(iemb_confusion_matrix == cemb_confusion_matrix).shape

(7, 7)

In [55]:
iemb_mat_df[(iemb_mat_df == cemb_mat_df) & (iemb_mat_df > 0)]

Unnamed: 0,non_troll,Newsfeed,RightTroll,HashtagGamer,LeftTroll,Unknown,Fearmonger
non_troll,,,,,,,
Newsfeed,,,,,,,
RightTroll,,,269.0,,,,
HashtagGamer,,,,,,,
LeftTroll,,,,,,,
Unknown,1.0,,1.0,,,,
Fearmonger,,,,,1.0,,


## Dimensionality Reduction

In [22]:
def get_tsne_dim_df(data: pd.DataFrame, cols: list=None, n_comp: int=2, verbose: bool=True) -> pd.DataFrame:
    """
    calculate the t-SNE values and return a dataframe with the components 
    "tsne{i}" for each i in n_comp 1-indexed range (tsne1, tsne2, etc..)
    the t-SNE method uses 'pca' as initialization method of the datapoints into 
    the new dimensionality and uses cosine similarity metric.
    :param data: (pd.DataFrame) the data to reduce the dimensions.
    :param cols: (list) list of columns to reduce (default=None -> all the data).
    :param n_comp: (int) the number of components to reduce the data (default=2).
    :param verbose: (bool) print process.
    :return: (pd.DataFrame) dataframe with the same columns (that did not enter to t-SNE)
      and new columns 'tsne1'....'tsne{n_comp}' for the new values.
    """
    with warnings.catch_warnings():
        warnings.simplefilter('ignore')
        if verbose: print('Prep Reduced DF')
        if cols is not None:
            tsne_df = data[np.setdiff1d(data.columns, cols)].copy()
        else: 
            tsne_df = pd.DataFrame(index=data.index)
        
        if verbose: print('create TSNE')
        tsne = TSNE(n_components=n_comp, init='pca', metric='cosine')
        if verbose: print('fit transform')
        red_features = tsne.fit_transform(data[cols])
        tsne_df[[f"tsne{i}" for i in range(1, n_comp+1)]] = red_features
        return tsne_df

## i_emb Dimensionality Reduction between users' account-types  -  TO DELETE?!

In [None]:
cols = [c for c in mrdf.columns if c.startswith('i_emb')]
res_dict = {'source': [], 'target': []}
res_dict.update({c: [] for c in cols})
for src_trl in tqdm(mrdf['source_troll_label'].unique(), desc='Trolls Analysis'):
    ego_df = mrdf[mrdf['source_troll_label'] == src_trl]
    for tgt_user in ego_df['user_label'].unique():
        inter_agg_df = ego_df[ego_df['user_label'] == tgt_user]
        _mean_df = inter_agg_df[cols].mean(axis=0)
        res_dict['source'].append(src_trl)
        res_dict['target'].append(tgt_user)
        for c in cols:
            res_dict[c].append(_mean_df[c])

Trolls Analysis:   0%|          | 0/6 [00:00<?, ?it/s]

In [None]:
res_df = pd.DataFrame(res_dict)
res_df

Unnamed: 0,source,target,i_emb_1,i_emb_2,i_emb_3,i_emb_4,i_emb_5,i_emb_6,i_emb_7,i_emb_8,...,i_emb_55,i_emb_56,i_emb_57,i_emb_58,i_emb_59,i_emb_60,i_emb_61,i_emb_62,i_emb_63,i_emb_64
0,RightTroll,non_troll,0.634371,-0.057366,0.518407,0.765408,-0.897043,-0.113159,0.106084,0.306854,...,1.476032,-0.428543,-1.558929,-1.018724,-0.877789,0.511925,-0.704787,-1.487561,0.402759,-0.090859
1,RightTroll,Newsfeed,1.037933,0.465172,1.040742,1.27879,-1.57272,-0.203822,0.166022,-0.098208,...,1.549931,-1.149996,-1.695703,-1.297648,-0.778495,1.687334,-0.649293,-1.715851,1.433487,-0.327739
2,RightTroll,RightTroll,1.138461,0.65399,1.042582,0.895953,-1.366342,-0.199471,0.415623,-0.144668,...,0.848246,-1.086884,-1.402302,-1.001983,-0.321715,1.340934,-0.370483,-1.471075,1.131366,-0.456595
3,RightTroll,LeftTroll,1.604773,1.388355,2.250668,0.723775,-1.642602,-0.004005,1.162591,-0.702639,...,-0.424744,-1.450585,-0.767221,-0.98714,0.773822,1.941969,0.629756,-1.610699,1.383673,-1.183889
4,RightTroll,Unknown,2.251703,0.870382,0.744884,1.701973,-1.708111,0.712639,0.652407,-0.970492,...,1.371741,-2.289809,-0.730294,-0.691497,-0.431271,1.421217,-0.259264,-1.983084,1.077592,-2.258878
5,RightTroll,HashtagGamer,1.013461,0.464268,0.645729,0.100514,-1.141286,-0.36186,0.590075,-1.586694,...,0.526064,-0.705725,-0.475715,-0.389249,0.057298,1.3706,-0.090665,-1.211387,1.678728,-0.535692
6,LeftTroll,non_troll,0.068427,0.150866,1.666597,0.567126,-0.617985,0.197706,0.525052,0.691565,...,-0.222086,-0.840268,-0.467458,-0.554666,1.143485,-0.158645,1.019783,-0.739961,-0.488156,-0.175445
7,LeftTroll,Newsfeed,0.338736,0.460344,1.874391,1.016464,-0.840827,0.170127,0.445584,0.640081,...,0.134854,-1.287244,-0.721845,-0.756282,0.945698,0.648908,0.795299,-1.130266,0.232323,-0.233987
8,LeftTroll,LeftTroll,0.317499,0.379068,1.993697,0.354112,-0.987079,0.222559,0.827417,0.592765,...,-0.475967,-0.925223,-0.444594,-0.735936,1.260444,0.227697,1.235445,-1.202284,-0.194697,-0.38473
9,LeftTroll,HashtagGamer,0.133234,0.295436,1.369576,0.649759,-0.504347,0.017878,0.277877,0.103327,...,0.285082,-1.180479,-0.481251,-0.295948,0.942939,0.620702,0.74831,-0.651127,0.613583,-0.003702


In [None]:
symbols_ = ['circle-open', 'square-open-dot', 'triangle-right', 'triangle-left', 'x', 'hash-open', 'star-diamond']
tsne_df = get_tsne_dim_df(res_df, cols=cols)
fig_name = f"Interactions Embeddings between user-types (Dimensionality Reduction)"
fig = px.scatter(tsne_df, x='tsne1', y='tsne2', hover_name='source', 
                 symbol='target', symbol_sequence=symbols_, 
                 color='source', title='i_emb Dimensionality Reduction between users')
fig.update_traces(marker_size=12)
fig.update_layout(title_text=fig_name, title_x=0.5, font={'size': 14})
fig.show()

Prep Reduced DF
create TSNE
fit transform


## c_emb Dimensionality Reduction between users' account-types  -  TO DELETE?!

In [None]:
cols = [c for c in mrdf.columns if c.startswith('c_emb')]
res_dict = {'source': [], 'target': []}
res_dict.update({c: [] for c in cols})
for src_trl in tqdm(mrdf['source_troll_label'].unique(), desc='Trolls Analysis'):
    ego_df = mrdf[mrdf['source_troll_label'] == src_trl]
    for tgt_user in ego_df['user_label'].unique():
        inter_agg_df = ego_df[ego_df['user_label'] == tgt_user]
        _mean_df = inter_agg_df[cols].mean(axis=0)
        res_dict['source'].append(src_trl)
        res_dict['target'].append(tgt_user)
        for c in cols:
            res_dict[c].append(_mean_df[c])

Trolls Analysis:   0%|          | 0/6 [00:00<?, ?it/s]

In [None]:
res_df = pd.DataFrame(res_dict)
res_df

Unnamed: 0,source,target,c_emb_0,c_emb_1,c_emb_2,c_emb_3,c_emb_4,c_emb_5,c_emb_6,c_emb_7,...,c_emb_374,c_emb_375,c_emb_376,c_emb_377,c_emb_378,c_emb_379,c_emb_380,c_emb_381,c_emb_382,c_emb_383
0,RightTroll,non_troll,-0.027573,0.103014,-0.145484,-0.022523,-0.042446,-0.022019,0.142142,-0.140183,...,0.175192,-0.22268,0.144794,0.034513,-0.160973,0.044634,0.114769,-0.104527,0.064381,0.042446
1,RightTroll,Newsfeed,0.007596,0.072922,0.008856,-0.111552,0.050188,0.112475,0.106961,-0.019056,...,0.062155,-0.084921,0.051446,0.042506,-0.036312,-0.019014,-0.042829,-0.072793,-0.04346,0.152067
2,RightTroll,RightTroll,0.071161,0.222813,-0.179863,-0.044506,0.040073,-0.014745,0.202426,-0.260047,...,0.161799,-0.194375,0.154965,0.067844,-0.122077,0.080629,0.101068,-0.086392,0.112956,0.027236
3,RightTroll,LeftTroll,-0.059978,0.139706,-0.189724,-0.100135,0.073134,0.095371,0.214413,-0.167619,...,0.173853,-0.251957,0.131467,0.09577,-0.264125,0.017498,-0.081969,-0.090108,0.245226,0.05893
4,RightTroll,Unknown,-0.055217,0.513028,-0.148801,0.134757,0.044703,0.243737,0.480083,-0.279549,...,-0.017333,0.12216,0.462542,0.104045,-0.247767,0.071473,0.496951,-0.375079,-0.159477,0.113229
5,RightTroll,HashtagGamer,-0.071258,0.115238,-0.050629,-0.017098,-0.126466,-0.054177,0.305964,-0.17224,...,0.034248,-0.230475,0.238389,0.166364,-0.140278,0.079298,0.130518,-0.105691,0.047181,0.00822
6,LeftTroll,non_troll,-0.040816,0.096336,-0.110293,-0.002471,-0.078046,-0.015646,0.193031,-0.103971,...,0.149049,-0.259565,0.116514,0.103627,-0.168771,0.063779,0.181709,-0.119155,-0.021655,0.03114
7,LeftTroll,Newsfeed,-0.042235,0.129471,-0.013995,-0.105117,-0.013057,-0.030928,0.149997,0.008492,...,0.133765,-0.167044,0.048913,0.044547,-0.087415,-0.020967,-0.048867,0.004541,0.017876,0.12873
8,LeftTroll,LeftTroll,-0.061273,0.261979,-0.239641,0.069848,-0.044857,0.159128,0.292829,-0.158119,...,0.134553,-0.257017,0.114391,0.117303,-0.240747,0.051169,0.070517,-0.14463,0.012068,0.061836
9,LeftTroll,HashtagGamer,-0.061986,0.048975,-0.051864,-0.117088,-0.039267,0.012303,0.329989,-0.095112,...,0.230816,-0.054265,0.282074,0.064324,-0.245094,0.283652,0.125542,-0.100967,-0.280035,-0.176743


In [None]:
symbols_ = ['circle-open', 'square-open-dot', 'triangle-right', 'triangle-left', 'x', 'hash-open', 'star-diamond']
tsne_df = get_tsne_dim_df(res_df, cols=cols)
fig_name = f"Content Embeddings between user-types (Dimensionality Reduction)"
fig = px.scatter(tsne_df, x='tsne1', y='tsne2', hover_name='source', 
                 symbol='target', symbol_sequence=symbols_, 
                 color='source', title='i_emb Dimensionality Reduction between users')
fig.update_traces(marker_size=12)
fig.update_layout(title_text=fig_name, title_x=0.5, font={'size': 14})
fig.show()

Prep Reduced DF
create TSNE
fit transform
