In [1]:
# Preliminaries

# scratch_location = r'/scratch/hmnshpl'
import os
import sys
import heapq
import getpass
import numpy as np
import pandas as pd
import networkx as nx
from copy import deepcopy
from collections import defaultdict

dataset_name = 'wikipedia'
scratch_location = rf'/scratch/{getpass.getuser()}'


## Load Data
# Load data and train val test split
graph_df = pd.read_csv('{}/processed_data/{}/ml_{}.csv'.format(scratch_location,
                                                            dataset_name,
                                                            dataset_name)
                    )
edge_raw_features = np.load('{}/processed_data/{}/ml_{}.npy'.format(scratch_location,
                                                                    dataset_name,
                                                                    dataset_name)
                            )
node_raw_features = np.load('{}/processed_data/{}/ml_{}_node.npy'.format(scratch_location,
                                                                        dataset_name,
                                                                        dataset_name)
                            )

# Set the working directory to the project root
project_root = os.path.abspath(os.path.join(os.path.dirname('__file__'), '..')) # this might cause issue
sys.path.append(project_root)

In [2]:
from preprocess_data.temporal_pr import temporal_pagerank_with_timestamps, calc_timestamp_pagerank,\
    calc_inc_timestamp_pagerank, optimized_calc_inc_timestamp_pagerank,\
    get_temporal_pagerank, mean_shift_removal, mean_shift_removal2, compute_mean_shifts_with_metrics

In [3]:
# get the timestamp of validate and test set
val_ratio = test_ratio = 0.15
print(val_ratio, test_ratio)
val_time, test_time = list(np.quantile(graph_df.ts, [(1 - val_ratio - test_ratio), (1 - test_ratio)]))
print(val_time, test_time)

0.15 0.15
1862652.1 2218288.5999999996


In [4]:
train_graph_df = graph_df[graph_df['ts'] < val_time]
train_graph_df.head()

Unnamed: 0.1,Unnamed: 0,u,i,ts,label,idx
0,0,1,8228,0.0,0.0,1
1,1,2,8229,36.0,0.0,2
2,2,2,8229,77.0,0.0,3
3,3,3,8230,131.0,0.0,4
4,4,2,8229,150.0,0.0,5


In [5]:
print(len(train_graph_df))
print(len(train_graph_df) / len(graph_df))  # is 70% of the dataset

110232
0.700001270050929


In [6]:
tmp_graph = train_graph_df.copy(deep=True)
upto=0.7
metric='kl_divergence'
# wasserstein, kl_divergence
# jensen_shannon_divergence -- 1m47s
# wasserstein -- 3m31s
# kl_divergence -- 32s


# tmp_graph = tmp_graph.sort_values(by=['u', 'i', 'ts'])

# # Exclude the first and last rows based on 'u' and 'i'
# grouped = tmp_graph.groupby(['u', 'i'])
# modified_df = grouped.apply(lambda x: x.iloc[1:-1]).reset_index(drop=True)

In [7]:
# total_groups = len(grouped)
# rows_removed = total_groups * 2 
# total_rows_original = len(tmp_graph)
# percentage_removed = (rows_removed / total_rows_original) * 100
# percentage_removed, len(modified_df) / len(tmp_graph)

# # Group by 'u' and 'i' and capture the first and last interactions
# first_interactions = grouped.first().reset_index()
# last_interactions = grouped.last().reset_index()

# (len(first_interactions) + len(last_interactions)) / len(tmp_graph)

In [8]:
# based on maximum mean shift strategy
mean_shifts = compute_mean_shifts_with_metrics(tmp_graph, metric=metric)

Starting mean shift and metrics computation...
Running temporal PageRank computation...
	 inside tpr heap method
	 heapify successful
	 out of loop.
Temporal PageRank computation completed.
Sorting timestamps...
Sorting completed.
Calculating mean shifts and distance metrics...


Processing for kl_divergence...: 100%|██████████| 110231/110231 [00:13<00:00, 8029.28it/s]

Mean shifts and metrics calculation completed.
Sorting by the selected metric completed.





In [9]:
print(f'remove upto: {(1-upto):.2%}', 'length of mean shift is: ', len(mean_shifts), end=' ')

threshold_index = int(len(mean_shifts) * (1-upto))

print(f'{threshold_index=}')
top_mean_shifts = mean_shifts[:threshold_index]
print(len(top_mean_shifts), f'{len(top_mean_shifts) / len(mean_shifts):.2%}' )

top_x_percent_timestamps = [ts for ts, _ in top_mean_shifts]
print(len(top_x_percent_timestamps), f'{len(top_x_percent_timestamps) / len(train_graph_df["ts"]):.2%}')

# sampled_df = modified_df[~modified_df['ts'].isin(top_x_percent_timestamps)]
sampled_df = tmp_graph[~tmp_graph['ts'].isin(top_x_percent_timestamps)]
print(len(sampled_df['ts']), len(sampled_df['ts']) / len(tmp_graph['ts']))
print('data sampling successful.')

remove upto: 30.00% length of mean shift is:  110231 threshold_index=33069
33069 30.00%
33069 30.00%
76258 0.6917954858843167
data sampling successful.


In [10]:
len(top_x_percent_timestamps) #  330
len(tmp_graph['ts'])  # 110232

len(set(tmp_graph['ts']).difference(set(top_x_percent_timestamps)))  # 106602
len(set(tmp_graph['ts']).difference(set(top_x_percent_timestamps))) / len(tmp_graph['ts'])  # 106602

0.6743232455185427

In [None]:
filename = f'{scratch_location}/sparsified_data/{dataset_name}_{metric}_sparsified_{upto}.csv'
# sampled_df.drop(['Unnamed: 0'], axis=1).to_csv(filename)
print(filename)

In [None]:
assert 0 == 1

In [9]:
def sparsify_data(tmp_graph, metric, upto, dataset_name, save=False):
    # tmp_graph = tmp_graph.sort_values(by=['u', 'i', 'ts'])

    # # Exclude the first and last rows based on 'u' and 'i'
    # grouped = tmp_graph.groupby(['u', 'i'])
    # modified_df = grouped.apply(lambda x: x.iloc[1:-1]).reset_index(drop=True)
    
    # based on maximum mean shift strategy
    mean_shifts = compute_mean_shifts_with_metrics(tmp_graph, metric=metric)

    print('back to sparsify_data file....')

    threshold_index = int(len(mean_shifts) * (1-upto))
    top_mean_shifts = mean_shifts[:threshold_index]

    top_x_percent_timestamps = [ts for ts, _ in top_mean_shifts]

    sampled_df = tmp_graph[~tmp_graph['ts'].isin(top_x_percent_timestamps)]
    print('data sampling successful.')
    
    if save:
        filename = f'{scratch_location}/sparsified_data/{dataset_name}_{metric}_sparsified_{upto}.csv'
        sampled_df.drop(['Unnamed: 0'], axis=1).to_csv(filename)
        print(filename, ' saved.')
    return sampled_df

In [10]:
list_of_upto = [0.9, 0.8, 0.7]
metrics= ['cosine', 'euclidean', 'jaccard','kl_divergence', 'jensen_shannon_divergence', 'wasserstein']
tmp_graph = train_graph_df.copy(deep=True)

for upto in list_of_upto:
    print(f'{upto}')
    for metric in metrics:
        print(f'\t{metric}', end=' ')
        sampled_df = sparsify_data(tmp_graph, metric, upto, dataset_name, save=True)
        print('done')

0.9
	cosine Starting mean shift and metrics computation...
Running temporal PageRank computation...
	 inside tpr heap method
	 heapify successful
	 out of loop.
Temporal PageRank computation completed.
Sorting timestamps...
Sorting completed.
Calculating mean shifts and distance metrics...


Processing for cosine...: 100%|██████████| 110231/110231 [00:05<00:00, 19446.33it/s]


Mean shifts and metrics calculation completed.
Sorting by the selected metric completed.
back to sparsify_data file....
data sampling successful.
/scratch/hmnshpl/sparsified_data/wikipedia_cosine_sparsified_0.9.csv  saved.
done
	euclidean Starting mean shift and metrics computation...
Running temporal PageRank computation...
	 inside tpr heap method
	 heapify successful
	 out of loop.
Temporal PageRank computation completed.
Sorting timestamps...
Sorting completed.
Calculating mean shifts and distance metrics...


Processing for euclidean...: 100%|██████████| 110231/110231 [00:04<00:00, 25896.01it/s]


Mean shifts and metrics calculation completed.
Sorting by the selected metric completed.
back to sparsify_data file....
data sampling successful.
/scratch/hmnshpl/sparsified_data/wikipedia_euclidean_sparsified_0.9.csv  saved.
done
	jaccard Starting mean shift and metrics computation...
Running temporal PageRank computation...
	 inside tpr heap method
	 heapify successful
	 out of loop.
Temporal PageRank computation completed.
Sorting timestamps...
Sorting completed.
Calculating mean shifts and distance metrics...


Processing for jaccard...: 100%|██████████| 110231/110231 [00:19<00:00, 5595.61it/s]


Mean shifts and metrics calculation completed.
Sorting by the selected metric completed.
back to sparsify_data file....
data sampling successful.
/scratch/hmnshpl/sparsified_data/wikipedia_jaccard_sparsified_0.9.csv  saved.
done
	kl_divergence Starting mean shift and metrics computation...
Running temporal PageRank computation...
	 inside tpr heap method
	 heapify successful
	 out of loop.
Temporal PageRank computation completed.
Sorting timestamps...
Sorting completed.
Calculating mean shifts and distance metrics...


Processing for kl_divergence...: 100%|██████████| 110231/110231 [00:14<00:00, 7626.92it/s]


Mean shifts and metrics calculation completed.
Sorting by the selected metric completed.
back to sparsify_data file....
data sampling successful.
/scratch/hmnshpl/sparsified_data/wikipedia_kl_divergence_sparsified_0.9.csv  saved.
done
	jensen_shannon_divergence Starting mean shift and metrics computation...
Running temporal PageRank computation...
	 inside tpr heap method
	 heapify successful
	 out of loop.
Temporal PageRank computation completed.
Sorting timestamps...
Sorting completed.
Calculating mean shifts and distance metrics...


Processing for jensen_shannon_divergence...: 100%|██████████| 110231/110231 [00:35<00:00, 3137.46it/s]


Mean shifts and metrics calculation completed.
Sorting by the selected metric completed.
back to sparsify_data file....
data sampling successful.
/scratch/hmnshpl/sparsified_data/wikipedia_jensen_shannon_divergence_sparsified_0.9.csv  saved.
done
	wasserstein Starting mean shift and metrics computation...
Running temporal PageRank computation...
	 inside tpr heap method
	 heapify successful
	 out of loop.
Temporal PageRank computation completed.
Sorting timestamps...
Sorting completed.
Calculating mean shifts and distance metrics...


Processing for wasserstein...: 100%|██████████| 110231/110231 [02:57<00:00, 620.46it/s]


Mean shifts and metrics calculation completed.
Sorting by the selected metric completed.
back to sparsify_data file....
data sampling successful.
/scratch/hmnshpl/sparsified_data/wikipedia_wasserstein_sparsified_0.9.csv  saved.
done
0.8
	cosine Starting mean shift and metrics computation...
Running temporal PageRank computation...
	 inside tpr heap method
	 heapify successful
	 out of loop.
Temporal PageRank computation completed.
Sorting timestamps...
Sorting completed.
Calculating mean shifts and distance metrics...


Processing for cosine...: 100%|██████████| 110231/110231 [00:05<00:00, 19593.86it/s]


Mean shifts and metrics calculation completed.
Sorting by the selected metric completed.
back to sparsify_data file....
data sampling successful.
/scratch/hmnshpl/sparsified_data/wikipedia_cosine_sparsified_0.8.csv  saved.
done
	euclidean Starting mean shift and metrics computation...
Running temporal PageRank computation...
	 inside tpr heap method
	 heapify successful
	 out of loop.
Temporal PageRank computation completed.
Sorting timestamps...
Sorting completed.
Calculating mean shifts and distance metrics...


Processing for euclidean...: 100%|██████████| 110231/110231 [00:04<00:00, 25997.85it/s]


Mean shifts and metrics calculation completed.
Sorting by the selected metric completed.
back to sparsify_data file....
data sampling successful.
/scratch/hmnshpl/sparsified_data/wikipedia_euclidean_sparsified_0.8.csv  saved.
done
	jaccard Starting mean shift and metrics computation...
Running temporal PageRank computation...
	 inside tpr heap method
	 heapify successful
	 out of loop.
Temporal PageRank computation completed.
Sorting timestamps...
Sorting completed.
Calculating mean shifts and distance metrics...


Processing for jaccard...: 100%|██████████| 110231/110231 [00:19<00:00, 5521.22it/s]


Mean shifts and metrics calculation completed.
Sorting by the selected metric completed.
back to sparsify_data file....
data sampling successful.
/scratch/hmnshpl/sparsified_data/wikipedia_jaccard_sparsified_0.8.csv  saved.
done
	kl_divergence Starting mean shift and metrics computation...
Running temporal PageRank computation...
	 inside tpr heap method
	 heapify successful
	 out of loop.
Temporal PageRank computation completed.
Sorting timestamps...
Sorting completed.
Calculating mean shifts and distance metrics...


Processing for kl_divergence...: 100%|██████████| 110231/110231 [00:14<00:00, 7576.21it/s]


Mean shifts and metrics calculation completed.
Sorting by the selected metric completed.
back to sparsify_data file....
data sampling successful.
/scratch/hmnshpl/sparsified_data/wikipedia_kl_divergence_sparsified_0.8.csv  saved.
done
	jensen_shannon_divergence Starting mean shift and metrics computation...
Running temporal PageRank computation...
	 inside tpr heap method
	 heapify successful
	 out of loop.
Temporal PageRank computation completed.
Sorting timestamps...
Sorting completed.
Calculating mean shifts and distance metrics...


Processing for jensen_shannon_divergence...: 100%|██████████| 110231/110231 [00:35<00:00, 3111.81it/s]


Mean shifts and metrics calculation completed.
Sorting by the selected metric completed.
back to sparsify_data file....
data sampling successful.
/scratch/hmnshpl/sparsified_data/wikipedia_jensen_shannon_divergence_sparsified_0.8.csv  saved.
done
	wasserstein Starting mean shift and metrics computation...
Running temporal PageRank computation...
	 inside tpr heap method
	 heapify successful
	 out of loop.
Temporal PageRank computation completed.
Sorting timestamps...
Sorting completed.
Calculating mean shifts and distance metrics...


Processing for wasserstein...: 100%|██████████| 110231/110231 [02:57<00:00, 619.71it/s]


Mean shifts and metrics calculation completed.
Sorting by the selected metric completed.
back to sparsify_data file....
data sampling successful.
/scratch/hmnshpl/sparsified_data/wikipedia_wasserstein_sparsified_0.8.csv  saved.
done
0.7
	cosine Starting mean shift and metrics computation...
Running temporal PageRank computation...
	 inside tpr heap method
	 heapify successful
	 out of loop.
Temporal PageRank computation completed.
Sorting timestamps...
Sorting completed.
Calculating mean shifts and distance metrics...


Processing for cosine...: 100%|██████████| 110231/110231 [00:05<00:00, 19504.98it/s]


Mean shifts and metrics calculation completed.
Sorting by the selected metric completed.
back to sparsify_data file....
data sampling successful.
/scratch/hmnshpl/sparsified_data/wikipedia_cosine_sparsified_0.7.csv  saved.
done
	euclidean Starting mean shift and metrics computation...
Running temporal PageRank computation...
	 inside tpr heap method
	 heapify successful
	 out of loop.
Temporal PageRank computation completed.
Sorting timestamps...
Sorting completed.
Calculating mean shifts and distance metrics...


Processing for euclidean...: 100%|██████████| 110231/110231 [00:04<00:00, 26001.50it/s]


Mean shifts and metrics calculation completed.
Sorting by the selected metric completed.
back to sparsify_data file....
data sampling successful.
/scratch/hmnshpl/sparsified_data/wikipedia_euclidean_sparsified_0.7.csv  saved.
done
	jaccard Starting mean shift and metrics computation...
Running temporal PageRank computation...
	 inside tpr heap method
	 heapify successful
	 out of loop.
Temporal PageRank computation completed.
Sorting timestamps...
Sorting completed.
Calculating mean shifts and distance metrics...


Processing for jaccard...: 100%|██████████| 110231/110231 [00:19<00:00, 5569.29it/s]


Mean shifts and metrics calculation completed.
Sorting by the selected metric completed.
back to sparsify_data file....
data sampling successful.
/scratch/hmnshpl/sparsified_data/wikipedia_jaccard_sparsified_0.7.csv  saved.
done
	kl_divergence Starting mean shift and metrics computation...
Running temporal PageRank computation...
	 inside tpr heap method
	 heapify successful
	 out of loop.
Temporal PageRank computation completed.
Sorting timestamps...
Sorting completed.
Calculating mean shifts and distance metrics...


Processing for kl_divergence...: 100%|██████████| 110231/110231 [00:14<00:00, 7530.24it/s]


Mean shifts and metrics calculation completed.
Sorting by the selected metric completed.
back to sparsify_data file....
data sampling successful.
/scratch/hmnshpl/sparsified_data/wikipedia_kl_divergence_sparsified_0.7.csv  saved.
done
	jensen_shannon_divergence Starting mean shift and metrics computation...
Running temporal PageRank computation...
	 inside tpr heap method
	 heapify successful
	 out of loop.
Temporal PageRank computation completed.
Sorting timestamps...
Sorting completed.
Calculating mean shifts and distance metrics...


Processing for jensen_shannon_divergence...: 100%|██████████| 110231/110231 [00:35<00:00, 3082.92it/s]


Mean shifts and metrics calculation completed.
Sorting by the selected metric completed.
back to sparsify_data file....
data sampling successful.
/scratch/hmnshpl/sparsified_data/wikipedia_jensen_shannon_divergence_sparsified_0.7.csv  saved.
done
	wasserstein Starting mean shift and metrics computation...
Running temporal PageRank computation...
	 inside tpr heap method
	 heapify successful
	 out of loop.
Temporal PageRank computation completed.
Sorting timestamps...
Sorting completed.
Calculating mean shifts and distance metrics...


Processing for wasserstein...: 100%|██████████| 110231/110231 [02:59<00:00, 614.51it/s]


Mean shifts and metrics calculation completed.
Sorting by the selected metric completed.
back to sparsify_data file....
data sampling successful.
/scratch/hmnshpl/sparsified_data/wikipedia_wasserstein_sparsified_0.7.csv  saved.
done


In [None]:
import pandas as pd
sampled_df=pd.read_csv(filename)
# Remove columns with 'Unnamed:' in their name
sampled_df = sampled_df.loc[:, ~sampled_df.columns.str.contains('^Unnamed')]
sampled_df.head()

In [14]:
def generate_shell_script(result_path, save_folder, email, dataset_name, model_name, patch_size,
                        max_input_sequence_length, num_runs, gpu, sparsify, strategy, sampling_upto,
                        num_cpus, num_gpus, gnode_name):
    
    # set Default variables
    result_path = "/home2/hmnshpl/projects/results" if result_path is None else result_path
    save_folder = "DygLib" if save_folder is None else save_folder
    email = "himanshu.pal@research.iiit.ac.in" if email is None else email
    dataset_name = "wikipedia" if dataset_name is None else dataset_name
    model_name = "TGN" if model_name is None else model_name
    patch_size = 2 if patch_size is None else patch_size
    max_input_sequence_length = 64 if max_input_sequence_length is None else max_input_sequence_length
    num_runs = 5 if num_runs is None else num_runs
    gpu = 0 if gpu is None else gpu
    sparsify = True if sparsify is None else sparsify
    strategy = "ts_tpr_remove_cosine" if strategy is None else strategy
    sampling_upto = 0.7 if sampling_upto is None else sampling_upto
    num_cpus = 9 if num_cpus is None else num_cpus
    num_gpus = 1 if num_gpus is None else num_gpus
    if gnode_name is None or 'gnode' not in gnode_name:
        raise ValueError("Please provide a valid gnode.")

    # Generate shell script content
    script_content = f"""#!/bin/bash
#SBATCH -A research
#SBATCH -n {num_cpus}
#SBATCH --gres=gpu:{num_gpus}
#SBATCH --mem-per-cpu=2G
#SBATCH --output={result_path}/{save_folder}/Link_Prediciton_{strategy}_{sampling_upto}.txt
#SBATCH --nodelist {gnode_name}
#SBATCH --time=96:00:00
#SBATCH --mail-user={email}
#SBATCH --mail-type=ALL

source ~/.bashrc

conda activate tg

python train_link_prediction.py --dataset_name {dataset_name} --model_name {model_name} --patch_size {patch_size} --max_input_sequence_length {max_input_sequence_length} --num_runs {num_runs} --gpu {gpu} --sparsify {sparsify} --strategy {strategy} --sampling_upto {sampling_upto}
    """

    # Specify the output filename dynamically
    output_filename = f"../LP_{strategy}_{sampling_upto}.sh"

    # Write to file
    with open(output_filename, "w") as file:
        file.write(script_content)

    print(f"Shell script '{output_filename}' has been successfully generated.")

# Example usage
# generate_shell_script("/home2/hmnshpl/projects/results", "DygLib", "himanshu.pal@research.iiit.ac.in",
#                     "wikipedia", "TGN", 2, 64, 5, 0, True, "ts_tpr_remove_wasserstein", 0.7, 9, 1, 'gnode085')


In [15]:
available_gnodes =  ['gnode074', 'gnode078', 'gnode067']
strategies = ['ts_tpr_remove_MSS', 'ts_tpr_remove_mss_2', 'ts_tpr_remove_kl_divergence', 'ts_tpr_remove_jensen_shannon_divergence',
            'ts_tpr_remove_cosine', 'ts_tpr_remove_euclidean', 'ts_tpr_remove_jaccard']
samplings = [0.7, 0.8, 0.9]

# Mapping of strategies to gnodes
strategy_to_gnode = {
    'ts_tpr_remove_wasserstein': 'gnode074',
    'ts_tpr_remove_kl_divergence': 'gnode078',
    'ts_tpr_remove_jensen_shannon_divergence': 'gnode067',
    'ts_tpr_remove_MSS':'gnode074',
    'ts_tpr_remove_mss_2':'gnode078',
    'ts_tpr_remove_cosine': 'gnode067',
    'ts_tpr_remove_euclidean': 'gnode074',
    'ts_tpr_remove_jaccard': 'gnode078', 
}

for strategy in strategies:
    for sampling in samplings:
        gnode = strategy_to_gnode[strategy]
        generate_shell_script("/home2/hmnshpl/projects/results", "DygLib", "himanshu.pal@research.iiit.ac.in",
                    "wikipedia", "TGN", 2, 64, 5, 0, True, strategy, sampling, 9, 1, gnode)

Shell script '../LP_ts_tpr_remove_MSS_0.7.sh' has been successfully generated.
Shell script '../LP_ts_tpr_remove_MSS_0.8.sh' has been successfully generated.
Shell script '../LP_ts_tpr_remove_MSS_0.9.sh' has been successfully generated.
Shell script '../LP_ts_tpr_remove_mss_2_0.7.sh' has been successfully generated.
Shell script '../LP_ts_tpr_remove_mss_2_0.8.sh' has been successfully generated.
Shell script '../LP_ts_tpr_remove_mss_2_0.9.sh' has been successfully generated.
Shell script '../LP_ts_tpr_remove_kl_divergence_0.7.sh' has been successfully generated.
Shell script '../LP_ts_tpr_remove_kl_divergence_0.8.sh' has been successfully generated.
Shell script '../LP_ts_tpr_remove_kl_divergence_0.9.sh' has been successfully generated.
Shell script '../LP_ts_tpr_remove_jensen_shannon_divergence_0.7.sh' has been successfully generated.
Shell script '../LP_ts_tpr_remove_jensen_shannon_divergence_0.8.sh' has been successfully generated.
Shell script '../LP_ts_tpr_remove_jensen_shannon_di

In [13]:
tmp_graph = train_graph_df.copy(deep=True)
# upto=0.7
list_of_upto = [0.9, 0.8, 0.7]

for upto in list_of_upto:
    tmp_graph = tmp_graph.sort_values(by=['u', 'i', 'ts'])

    # Exclude the first and last rows based on 'u' and 'i'
    grouped = tmp_graph.groupby(['u', 'i'])
    modified_df = grouped.apply(lambda x: x.iloc[1:-1]).reset_index(drop=True)

    mean_shifts = mean_shift_removal2(tmp_graph)

    print('back to sparsify_data file....')

    threshold_index = int(len(mean_shifts) * (1-upto))
    top_mean_shifts = mean_shifts[:threshold_index]

    top_x_percent_timestamps = [ts for ts, _ in top_mean_shifts]

    sampled_df = modified_df[~modified_df['ts'].isin(top_x_percent_timestamps)]

    filename = f'{scratch_location}/sparsified_data/{dataset_name}_mss2_sparsified_{upto}.csv'
    sampled_df.drop(['Unnamed: 0'], axis=1).to_csv(filename)
    print(filename, ' saved.')

in mss removal method....
running temporal page rank method...
	 inside tpr heap method
	 heapify successful
	 out of loop.
Done.
sorting started....
sorting Completed.
Before calc


running mean shift: 100%|██████████| 110231/110231 [00:04<00:00, 24970.11it/s]


Done.
mean shift calc done....
back to sparsify_data file....
/scratch/hmnshpl/sparsified_data/wikipedia_mss2_sparsified_0.9.csv  saved.
in mss removal method....
running temporal page rank method...
	 inside tpr heap method
	 heapify successful
	 out of loop.
Done.
sorting started....
sorting Completed.
Before calc


running mean shift: 100%|██████████| 110231/110231 [00:04<00:00, 26891.33it/s]


Done.
mean shift calc done....
back to sparsify_data file....
/scratch/hmnshpl/sparsified_data/wikipedia_mss2_sparsified_0.8.csv  saved.
in mss removal method....
running temporal page rank method...
	 inside tpr heap method
	 heapify successful
	 out of loop.
Done.
sorting started....
sorting Completed.
Before calc


running mean shift: 100%|██████████| 110231/110231 [00:04<00:00, 26944.18it/s]


Done.
mean shift calc done....
back to sparsify_data file....
/scratch/hmnshpl/sparsified_data/wikipedia_mss2_sparsified_0.7.csv  saved.
