In [1]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
from statistics import mode, mean
from collections import Counter
from random import sample
from umap import UMAP
from sklearn.cluster import MeanShift, estimate_bandwidth, KMeans
from sklearn.metrics import accuracy_score, adjusted_rand_score, precision_recall_fscore_support
from csv import DictReader
from plotly import graph_objects as go
from statistics import stdev

  @numba.jit()
  @numba.jit()
  @numba.jit()
  from .autonotebook import tqdm as notebook_tqdm
  @numba.jit()


## Code

### Utils

In [2]:
# leave tqdm bar after completion
LEAVE_BAR = False

# DIM REDUCTION and CLUSTERING used
class INFO_USED():
    def __init__(self):
        self.LOAD_PARAMS_USED = ""
        self.DIM_RED_USED = ""
        self.CLUSTERING_USED = ""

INFO = INFO_USED()

In [3]:
def get_tweet_counts(list_of_tweets, fuzzy_matching=False, fuzzy_matching_threshold=0.7):
    """Returns a list containing tuple((tweet): count), sorted by count in descending order.
    The first element (tweet) is a tuple, and may aggregate multiple tweets
    as one, based on the tweet_similarity function.
    
    Args:
        list_of_tweets (list): 
            list of strings, where each string is a tweet.
        fuzzy_matching (bool): 
            To find similar tweets using fuzzy string matching.
                                          Default: False.
        fuzzy_matching_threshold (int): 
            Similarity threshold for fuzzy string matching over 
            which to consider two tweets similar. Defaults to 0.7.
    
    Returns:
        (list): list of ((tweet/s), count).
    
    TODO:
        Add fuzzy string matching for tweets.
    """
    if not fuzzy_matching:
        unique_tweets = set(list_of_tweets)
        tweet_counts = [(tweet,list_of_tweets.count(tweet)) 
                        for tweet in tqdm(unique_tweets, desc="finding couts", leave=LEAVE_BAR)]
        tweet_counts =  sorted(tweet_counts, key=lambda item: item[1], reverse=True)

    if fuzzy_matching:
        pass
        # TO BE IMPLEMENTED

    return tweet_counts

In [4]:
# Function to return 

# List to Sorted counts dict
def sorted_count(array, reverse=True):
    """Returns a list containing tuple (value, count) of each unqiue value
       in the list, in desceding order.
    """
    sorted_count_list = [ (x,array.count(x)) for x in tqdm(set(array), desc="finding counts", leave=LEAVE_BAR) ]
    sorted_count_list =  sorted(sorted_count_list, key=lambda item: item[1], reverse=reverse)
    return sorted_count_list

In [5]:
# String to List
def str2list(string):
    """Returns list contained in a string.
    
    Args:
        string (str): A string containing a list.
    
    Returns:
        (list): inferred list from the string.
    """
    # Removing brackets "[" "]" from the string
    string = string[1:-1]

    # Splitting at ","
    output = string.split(",")

    # Removing trailing, leading space and residue ' ' characters
    output = [ string.strip()[1:-1] for string in output ]

    return output

In [6]:
# Get a unique file name
def unique_filename(save_path, extra_info):
    """Generate a new unique enumerated file name, 
       based on existing files.
    Args:
        save_path (str): 
            file path. Makes directory if does not exist.
        extra_info (str): 
            Text to append before file extension.
    Returns:
        (str): An enumerated file path.
    """
    # Check if path exists, otherwise make directory
    if not os.path.exists(os.path.dirname(save_path)):
        os.makedirs(os.path.dirname(save_path))
    
    filename, extension = os.path.splitext(save_path)
    filename, extension = str(filename), str(extension)

    filename += extra_info
    save_path = filename+extension
    
    counter = 1
    while os.path.exists(save_path):
        save_path = filename+" ("+str(counter)+")"+extension
        counter += 1

    return save_path

In [7]:
def scatter_plot_2d(input_data, hover_info=[], marker_size=6 ,title="", plot_save_path=""):
    """Generate and open an interactive 2D scatter plot as html.
    Args:
        input_data (dict): Dictionary of (user: feature_vector).
        hover_info (list): List of labels/info to display for each 
            user in input_data in 2D scatter plot.
            May be Hastags or Mentions.
        marker_size (int, optional): Point size in plot. Defaults to 6.
        title (str, optional): Title of the plot. Defaults to "".
        plot_save_path (str, optional): Save path for plot html file. Defaults to "".
    
    TODO:
        Add marker labels.
    """
    
    # Get data to plot from input_data dict
    feature_vectors, labels = zip(*list(input_data.values()))
    x,y = zip(* [tuple(x) for x in feature_vectors] )
    
    trace = [go.Scatter(x=x, y=y, 
                          mode='markers', 
                          hovertemplate = '%{text}', 
                          text = [" ".join(x for x in item) for item in hover_info],
                          marker=dict(size=marker_size,
                                      color=labels,
                                      colorscale='Viridis',
                                      opacity=0.8)
                          )]
    fig = go.Figure(data=trace)

    title += INFO.LOAD_PARAMS_USED+INFO.DIM_RED_USED+INFO.CLUSTERING_USED
    fig.update_layout(title=title,
                      xaxis_title="",
                      yaxis_title="",
                      legend_title="")
    
    if plot_save_path:
        extra_info = INFO.LOAD_PARAMS_USED+INFO.DIM_RED_USED+INFO.CLUSTERING_USED
        plot_save_path = unique_filename (plot_save_path, extra_info) 
        fig.write_html(plot_save_path)
        
    fig.show()
    
    return

In [8]:
def scatter_plot_3d(input_data, hover_info=[], marker_size=6 ,title="", plot_save_path=""):
    """Generate and open an interactive 3D scatter plot as html.
    Args:
        input_data (dict): Dictionary of (user: feature_vector).
        hover_info (list): List of labels/info to display for each 
            user in input_data in 3D scatter plot.
            May be Hastags or Mentions.
        marker_size (int, optional): Point size in plot. Defaults to 6.
        title (str, optional): Title of the plot. Defaults to "".
        plot_save_path (str, optional): Save path for plot html file. Defaults to "".
    
    TODO:
        Add marker labels.
    """
    
    # Get data to plot from input_data dict
    feature_vectors, labels = zip(*list(input_data.values()))
    x,y,z = zip(* [tuple(x) for x in feature_vectors] )
    
    trace = [go.Scatter3d(x=x, y=y, z=z, 
                          mode='markers', 
                          hovertemplate = '%{text}', 
                          text = [" ".join(x for x in item) for item in hover_info],
                          marker=dict(size=marker_size,
                                      color=labels,
                                      colorscale='Viridis',
                                      opacity=0.8)
                          )]
    fig = go.Figure(data=trace)

    title += INFO.LOAD_PARAMS_USED+INFO.DIM_RED_USED+INFO.CLUSTERING_USED
    fig.update_layout(title=title,
                      xaxis_title="",
                      yaxis_title="",
                      legend_title="")
    
    if plot_save_path:
        extra_info = INFO.LOAD_PARAMS_USED+INFO.DIM_RED_USED+INFO.CLUSTERING_USED
        plot_save_path = unique_filename (plot_save_path, extra_info) 
        fig.write_html(plot_save_path)
        
    fig.show()
    
    return

### Feat Extraction

In [9]:
class FeatureExtraction():
    def __init__(self):
        # Dictionary to lookup function for each input ["T","R","H"] 
        self.functions_dict = {
            "T": self.tweets,
            "R": self.mentions,
            "H": self.hashtags}
        return

    def get_user_feature_vectors(self, features_to_use, users_list, *feature_vectors_to_use, feature_size=None, relative_freq=True):
        """Returns a dictionary containing, (user:feature_vector), where
        feature_vector are concatenated features provided in features_to_use.
        Args:
            features_to_use (list): 
                list of features to use e.g. ["T","R","H"]  
                T : tweets feature vectors
                R : mentions OR retweets feature vectors
                H : hashtags feature vector
                Feature vectors are concatenated in order. 
            users_list (list): 
                list of users
            feature_vectors_to_use (*args): 
                positional arguments as list of feature_vectors in same order 
                as features_to_use ["T","R","H"].
            feature_size (int): 
                Length of the hashtags feature vector (Equivalent to selecting top popular hashtags)
                If None - uses all hashtags in the dataset.
            relative_freq (bool): 
                Whether to compute feature vector with relative 
                count i.e. divide by total count.
                                              
        Returns:
            (dict): Dictionary of (user:feature_vector).
        """
        features_dict = {}
        for i,feature in enumerate(features_to_use):
            try:
                current_feature_vectors_to_use = feature_vectors_to_use[i]
            except:
                raise ValueError("Different num of features_to_use and num of positional arguments *feature_vectors_to_use ")  

            try:
                features_dict[feature] = self.functions_dict[feature](
                                            users_list,
                                            current_feature_vectors_to_use,
                                            feature_size=feature_size,
                                            relative_freq=relative_freq
                                            )
            except:
                raise ValueError(f'Invalid feature {feature} ! Options: ["T","R","H"] !')        

        # Concatenating user feature vectors
        user_feature_vectors_dict = {}
        for user in tqdm(set(users_list), desc="concat user_features", leave=LEAVE_BAR):
            user_feature_vectors_dict[user] = np.concatenate( [features_dict[f][user] for f in features_to_use] )

        return user_feature_vectors_dict


    def hashtags(self, users_list, hashtags_list, feature_size=None, relative_freq=True):
        """Returns a list of dictionary, with hashtag features for each user.
        
        Args:
            users_list (list): 
                List of all users in the dataset (Non-Unique).
            hashtags_list (list): 
                List of list of all hashtags shared.
            feature_size (int): 
                Length of the hashtags feature vector (Equivalent to selecting top popular hashtags). 
                If None - uses all hashtags in the dataset.
            relative_freq (bool): 
                Whether to compute feature vector with relative 
                count i.e. divide by total count.
                                          
        Returns:
            (dict): Dictionary of (user:hashtag_feature_vector).
        """
        # get the counts of each hashtag shared
        # Collapse the list of lists: hashtags_list
        hashtag_counts = sorted_count([h for l in hashtags_list for h in l if h])

        # fitler against feature_size, Default is None=Selects all.
        hashtag_counts = hashtag_counts[:feature_size]
        hashtag_vector = tuple([h for h,_ in hashtag_counts])

        # zip users,hastags
        users_hashtags_zip = list(zip(users_list, hashtags_list))

        # findng hashtag feature for each user
        hashtag_features = {}
        for user in tqdm(set(users_list), desc="hashtag_features", leave=LEAVE_BAR):
            user_hashtags = [h for u,hts in users_hashtags_zip for h in hts if u==user]
            hashtag_features[user] = np.array( [ user_hashtags.count(h) for h in hashtag_vector ] )
            if relative_freq and np.sum(hashtag_features[user])>0:
                hashtag_features[user] = hashtag_features[user]/np.sum(hashtag_features[user])
        
        return hashtag_features
    

    def mentions(self, users_list, mentions_list, feature_size=None, relative_freq=True):
        """Returns a list of dictionary, with mentions features for each user.
        
        Args:
            users_list (list): 
                List of all users in the dataset (Non-Unique).
            mentions_list (list): 
                List of list of all mentions shared.
            feature_size (int): 
                Length of the mentions feature vector (Equivalent to selecting top popular mentions). 
                If None - uses all mentions in the dataset.
            relative_freq (bool): 
                Whether to compute feature vector with relative count i.e. divide by total count.
                                          
        Returns:
            (dict): Dictionary of (user:mentions_feature_vector).
        """
        # Collapsing mentions of users into a single list
        all_mentions = [x for m in mentions_list for x in m if x]
        mention_counts = sorted_count(all_mentions)

        mentions_vector = [m for m,_ in mention_counts]

        # zip users, mentions
        users_mentions_zip = list(zip(users_list, mentions_list))
        # findng mention feature vector for each user
        mention_features = {}
        for user in tqdm(set(users_list), desc="mention_features", leave=LEAVE_BAR):
            user_mentions = [m for u,mns in users_mentions_zip for m in mns if u==user]
            mention_features[user] = np.array( [ user_mentions.count(m) for m in mentions_vector ] )
            if relative_freq and np.sum(mention_features[user])!=0:
                mention_features[user] = mention_features[user]/np.sum(mention_features[user])
        
        return mention_features


    def tweets(self, users_list, tweets_list, feature_size=None, relative_freq=True):
        """Returns a list of dictionary, with tweets features for each user.
        
        Args:
            users_list (list): 
                List of all users in the dataset (Non-Unique).
            tweets_list (list): 
                List of list of all tweets shared.
            feature_size (int): 
                Length of the tweets feature vector (Equivalent to selecting top popular tweets). 
                If None, uses all tweets in the dataset.
            relative_freq (bool): 
                Whether to compute feature vector with relative count i.e. divide by total count.
                                          
        Returns:
            (dict): Dictionary of (user:mentions_feature_vector).
        """
        # Get tweet counts, sorted by count in descending order
        tweet_counts = get_tweet_counts(tweets_list, fuzzy_matching=False)

        # Tweet Vector
        tweets_vector = [tweet for tweet,_ in tweet_counts]

        # zip users, tweets
        users_tweets_zip = list(zip(users_list, tweets_list))

        # findng tweet feature vector for each user
        tweet_features = {}
        for user in tqdm(set(users_list), desc="tweet_features", leave=LEAVE_BAR):
            user_tweets = [ tweet for u,tweet in users_tweets_zip if u==user ]

            tweet_features[user] = np.array( [ user_tweets.count(tweet) for tweet in tweets_vector ] )
            if relative_freq and np.sum(tweet_features[user])!=0:
                tweet_features[user] = tweet_features[user]/np.sum(tweet_features[user])
        
        return tweet_features
    
    
    def get_user_info_labels(self, users_list, user_info_list, top_n=5):
        
        # Build user_info dict
        user_info_label_dict = {}
        user_info_zip = list(zip(users_list, user_info_list))
        for user in set(users_list):
            user_label_candidates = [x for u,item in user_info_zip for x in item if u==user]
            user_info_label_dict[user] = [a for a,b in Counter(user_label_candidates).most_common(top_n)]
            
        return user_info_label_dict

### Dim Reduction (UMAP)

In [10]:
def get_umap_embedding(input_data, n_neighbors=15, n_components=3, min_distance=0.1, distance_metric='correlation'):
    """Get the low dimensional UMAP embedding of input_data (num_samples, num_features).
    Args:
        input_data (dict): 
            Dictionary of (user: feature_vector).
        n_neighbors (int, optional): 
            UMAP parameter. Defaults to 15.
        n_components (int, optional): 
            Lower Dimension components.
        min_distance (float, optional): 
            UMAP parameter. Defaults to 0.1.
        distance_metric (str, optional): 
            UMAP parameter. Defaults to 'correlation'.
    Returns:
        (dict) : Dictionary of (user: low_dim_feature_vector).
    """
    INFO.DIM_RED_USED = f" UMAP(n_neigh {n_neighbors}, min_dist {min_distance})"
    #print("\n"+INFO.DIM_RED_USED+"\n")
    
    
    dim_reducer = UMAP(n_neighbors=n_neighbors,
                       n_components=n_components,
                       min_dist=min_distance,
                       metric=distance_metric)
    
    # reshape data into list of shape (num_samples, num_features)
    users = list(input_data.keys())
    data = np.array( list(input_data.values()) )
    
    low_dim_data = dim_reducer.fit_transform(data)

    low_dim_user_data = dict(zip( users, list(low_dim_data) ))
    return low_dim_user_data

### Clustering (Mean Shift)

In [11]:
def mean_shift_clustering(input_data, bandwidth=None):
    """Get the Mean Shift clustered labels of input_data.
    Args:
        input_data (dict): 
            Dictionary of (user:feature_vector) to be clusterd.
        bandwidth (int, optional): 
            RBF kernel bandwidth. Defaults to None.
    Returns:
        (dict): Dictionary of ( user:(feature_vector,label) ).
    """
    INFO.CLUSTERING_USED = " MeanShift"
    #print("\n"+INFO.CLUSTERING_USED+"\n")

    # converting input_data to list of shape (n_samples, n_features)
    users = list(input_data.keys())
    data = np.array( list(input_data.values()) )
    
    model = MeanShift(bandwidth=bandwidth)
    
    model.fit(data)
    labels = list(model.labels_)
    
    data_label_zip = list(zip(data,labels))
    user_feature_label_dict = dict(zip( users, data_label_zip))
    return user_feature_label_dict

### Data Load

In [12]:
# Filter Dataset for top users with tweets>min_tweets
def filter_users(users_list, num_top_users, min_tweets):
    """Returns filtered dataset for users top users with tweets greater than or equal to min_tweets.
    Args:
        users_list (list): 
            List of users to filter.
        num_top_users (int) : 
            Number of top users to cluster.
        min_tweets (int) : 
            Min number of tweets to consider a user "active/engaged".
    
    Returns:
        (list) : list of users to keep.
    """
    # Get sorted user counts
    users_to_keep = sorted_count(users_list)

    # Selecting with greater than or equal to min_tweets
    users_to_keep = [ k for k,v in tqdm(users_to_keep, desc="filtering rows", leave=LEAVE_BAR) if v>= min_tweets ]
    
    # Selecting top num_top_users
    users_to_keep = users_to_keep[:num_top_users]

    return users_to_keep

In [13]:
# Get twitter dataset: uses Pythons native csv DictReader
def load_dataset(dataset_path="", features=[], num_top_users=None, min_tweets=0, random_sample_size=0, rows_to_read=None, user_col="user_id", str2list_cols=[]):
    """Returns the csv twitter dataset, number of outputs same as features with order maintained.
    
    Args:
        dataset_path (str) : 
            Path to the dataset csv file.
        features (list) : 
            List of feature/columns names to return, if empty, returns all columns.
        num_top_users (int) : 
            Number of top users to return.
        min_tweets (int) : 
            Criteria to filter users, with tweets>=min_tweets.
        random_sample_size (int): 
            Random samples to get from the dataset, must be less than the total dataset size.
        user_col (string) : 
            User Identification Column Name. MUST BE SPECIFIED.
        str2list_cols (list) : 
            Column names with list values read as string, converted back to lists using str2list.
    
    Returns:
        (list) : csv rows as dictionaries.
    """
    INFO.LOAD_PARAMS_USED = f" #rows {rows_to_read} num_top_users {num_top_users} min_tweets {min_tweets}"
    #print("\n"+INFO.LOAD_PARAMS_USED+"\n")
    
    if not dataset_path:
        raise ValueError("Arguement dataset_path not defined !")

    dataset = []
    with open(dataset_path, encoding="utf8") as csv_file:  
        csv_file = DictReader(csv_file, delimiter='\t')

        for i,row in enumerate(tqdm(csv_file, desc="reading rows", leave=LEAVE_BAR),1):
            if features:
                out = tuple( [row[feat] for feat in features] )
                dataset.append( out )
            else:
                dataset.append( row )
            
            if i==rows_to_read:
                break
    
    # Select random samples from the list
    if random_sample_size:
        try:
            dataset = sample(dataset, random_sample_size)
        except:
            raise ValueError(f"random_sample_size larger than dataset size or negative !")
    
    # Filtering Top users with tweets>=min_tweets
    index_of_user_col = features.index( user_col )
    users_list = [ row[index_of_user_col] for row in dataset ]

    # filtering users
    users_to_keep = filter_users(users_list, num_top_users, min_tweets)

    # filtering rest of data, based on users_to_keep
    str2list_indices = [features.index(col) for col in str2list_cols]
    filtered_dataset = [ tuple([x if i not in str2list_indices else str2list(x) for i,x in enumerate(row)])
                         for row in tqdm(dataset, desc="filtering data", leave=LEAVE_BAR) if row[index_of_user_col] in users_to_keep]

    return zip(* filtered_dataset )

### Full Run Code

In [14]:
def run_umap_ms(d_path, n_comp, FEATURES_TO_USE):
    # Get dataset columns
    users_list, usernames_list, tweets_list, mentions_list, hashtags_list  =  load_dataset(
                            dataset_path=d_path, 
                            features=["id", "name", "rawTweet", "mentions", "hashtags"], 
                            num_top_users=None,
                            min_tweets=0,
                            random_sample_size=0, 
                            rows_to_read=None,
                            user_col="id", 
                            str2list_cols=["mentions", "hashtags"])
    
    ft_extract = FeatureExtraction()
    user_feature_dict = ft_extract.get_user_feature_vectors(
                            FEATURES_TO_USE,
                            users_list,
                            tweets_list, 
                            mentions_list, 
                            hashtags_list,
                            feature_size=None,
                            relative_freq=True
                            )
    user_info_label_dict = ft_extract.get_user_info_labels(
                            users_list,
                            user_info_list = hashtags_list,
                            top_n = 5
                            )
    
    low_dim_user_feature_dict = get_umap_embedding(
                            user_feature_dict,
                            n_neighbors=15,
                            n_components=n_comp,
                            min_distance=0.1,
                            distance_metric="correlation")
    
    user_feature_label_dict = mean_shift_clustering( low_dim_user_feature_dict )

    data = pd.read_csv(d_path,delimiter = "\t")
    data = data.dropna()
    labs = {}
    nodes = list(data['id'].unique())
    for i in nodes:
        dat = data.loc[data['id'] == i]
        labs[str(i)] = round(np.mean(dat['label'])) - 1

    y_true = []
    y_pred = []
    y_inv = []
    for i in nodes:
        y_true.append(labs[str(i)])
        y_pred.append(user_feature_label_dict[str(i)][1])
        y_inv.append(1 - user_feature_label_dict[str(i)][1])

    return y_true, y_pred, y_inv, user_feature_label_dict, user_info_label_dict

In [15]:
def get_vecs(d_path, n_comp, FEATURES_TO_USE):
    # Get dataset columns
    users_list, usernames_list, tweets_list, mentions_list, hashtags_list  =  load_dataset(
                            dataset_path=d_path, 
                            features=["id", "name", "rawTweet", "mentions", "hashtags"], 
                            num_top_users=None,
                            min_tweets=0,
                            random_sample_size=0, 
                            rows_to_read=None,
                            user_col="id", 
                            str2list_cols=["mentions", "hashtags"])
    
    ft_extract = FeatureExtraction()
    user_feature_dict = ft_extract.get_user_feature_vectors(
                            FEATURES_TO_USE,
                            users_list,
                            tweets_list, 
                            mentions_list, 
                            hashtags_list,
                            feature_size=None,
                            relative_freq=True
                            )

    return user_feature_dict

## Run Model

In [28]:
file_path = './Processed/'
# options : 'euro', 'timme', 'cd', 'conref'
dat = 'cd'
# topics for CD; options : 'all', 'abortion', 'marijuana', 'gayRights', or 'obama'
top = 'gayRights'
# whether or not to use TIMME-All when running with TIMME; False runs TIMME-Pure
t_all = True
# determines number of dimensions UMAP uses; True for 2, False for 3
two_d = False
# maximum number of trials to run
n_trials = 500
# number of trials desired to break and calculate results
trials = 10
FEATURES_TO_USE = ["T", "R", "H"]

if dat == 'cd':
  dat = dat + top

if dat == 'timme':
  if t_all:
    dat = dat + '_all'

d_path = file_path + dat + '_mapping.csv'
if two_d:
  n_comp = 2
else:
  n_comp = 3

In [29]:
res_c = []
res_n = []
count = 0
for i in range(n_trials):
    y_true, y_pred, y_inv, u_feat, u_info = run_umap_ms(d_path, n_comp, FEATURES_TO_USE)
    n_clust = len(np.unique(y_pred))
    if n_clust == 2:
        a1 = accuracy_score(y_true, y_pred)
        a2 = accuracy_score(y_true, y_inv)

        accc = max(a1,a2)
        if a1 > a2:
            best_pred = y_pred
        else:
            best_pred = y_inv
        _, _, f, _ = precision_recall_fscore_support(y_true, best_pred, average='weighted')
        res_c.append((accc, f))
        count += 1
    if count == trials:
        print('-----> Average across', len(res_c),'trials with 2 clusters\nAccuracy :')
        print(round(mean([item[0] for item in res_c]), 4) * 100)
        print('Weighted F1 :')
        print(round(mean([item[1] for item in res_c]), 4) * 100)
        if (n_trials > 1) and (len(res_c) > 1):
            print('Accuracy Standard Deviation :')
            print(round(stdev([item[0] for item in res_c]), 4) * 100)
            print('Weighted F1 Standard Deviation :')
            print(round(stdev([item[1] for item in res_c]), 4) * 100)
        break

finding couts:   0%|          | 0/969 [00:00<?, ?it/s] 

                                                                  

In [21]:
if two_d:
    scatter_plot_2d(u_feat, 
                        hover_info=list(u_info.values()),
                        title="Twitter Users Scatter Plot",
                        plot_save_path="./stance_detect/results/2d_scatter_plot.html")
else:
    scatter_plot_3d(u_feat, 
                    hover_info=list(u_info.values()),
                    title="Twitter Users Scatter Plot",
                    plot_save_path="./stance_detect/results/3d_scatter_plot.html")