## Imports 

In [1]:
import os
import numpy as np
import pandas as pd
import yaml

import cufflinks as cf
import plotly.offline
cf.go_offline()
cf.set_config_file(offline=False, world_readable=True)

In [6]:
from sklearn.manifold import SpectralEmbedding, TSNE, LocallyLinearEmbedding, MDS
from sklearn.decomposition import PCA

config = yaml.safe_load(open('../../conf/conf.yaml'))
data_path=os.path.join(config['DATA_PATH'],'embedding_results')

# load supernode embedding and metadata:
def load_emb_data(tag, data_path=data_path):
    #emb=pd.read_csv('dcc_supernodes_combo.csv',header=None,sep='\t')
    emb=np.loadtxt(os.path.join(data_path,'dcc_supernodes_{}.csv'.format(tag)))
    meta=pd.read_csv(os.path.join(data_path,'dcc_supernodes_labels_{}.txt'.format(tag)),encoding='latin1',sep='\t')
    return(emb,meta)

### generate visualization
def visualize(emb,meta,method='TSNE',dim=2, n_iter=300, title=""):
    if method == 'TSNE':   
        tsne = TSNE(n_components=dim, verbose=1, perplexity=5, n_iter=n_iter, init='pca')
        results = tsne.fit_transform(emb)
    elif method == 'PCA':
        pca = PCA(n_components=dim)
        results=pca.fit_transform(emb)
    elif method == 'SpectralEmbedding':
        speb = SpectralEmbedding(n_components=dim)
        results = speb.fit_transform(emb)
    elif method == 'MDS':
        mds=MDS(n_components=dim)
        results = mds.fit_transform(emb)
    elif method == 'LLE':
        lle = LocallyLinearEmbedding(n_components=dim)
        results = lle.fit_transform(emb)
    else:
        print('Unknown method')
        return
    meta['x']=results[:,0]
    meta['y']=results[:,1]
    if(dim==2):
        meta.iplot(kind='scatter',x="x",y="y", text="Title", categories="Conference", title=title)
    elif(dim==3):
        meta['z']=results[:,2]
        meta.iplot(kind='scatter3d',x="x",y="y",z="z", text="Title", categories="Conference", title=title)
    else:
        print('Dimension is not right')

## Visualizing Embeddings of Single Modalities

In [7]:
### Text2Graph
emb,meta=load_emb_data('t2g')
visualize(emb,meta,method='TSNE',dim=2, n_iter=500, title='Text2Graph (TSNE)')

[t-SNE] Computing 16 nearest neighbors...
[t-SNE] Indexed 410 samples in 0.001s...
[t-SNE] Computed neighbors for 410 samples in 0.006s...
[t-SNE] Computed conditional probabilities for sample 410 / 410
[t-SNE] Mean sigma: 0.442954
[t-SNE] KL divergence after 250 iterations with early exaggeration: 82.101074
[t-SNE] KL divergence after 500 iterations: 0.958247


In [8]:
### Image2Graph
emb,meta=load_emb_data('i2g')
visualize(emb,meta,method='TSNE',dim=2, title='Image2Graph (TSNE)')

[t-SNE] Computing 16 nearest neighbors...
[t-SNE] Indexed 213 samples in 0.001s...
[t-SNE] Computed neighbors for 213 samples in 0.004s...
[t-SNE] Computed conditional probabilities for sample 213 / 213
[t-SNE] Mean sigma: 0.603516
[t-SNE] KL divergence after 250 iterations with early exaggeration: 72.060509
[t-SNE] KL divergence after 300 iterations: 0.882869


In [9]:
### Code2Graph (on subset of repositories)
emb,meta=load_emb_data('c2g')
visualize(emb,meta,method='TSNE',dim=2, n_iter=500, title='Code2Graph (TSNE)')

[t-SNE] Computing 16 nearest neighbors...
[t-SNE] Indexed 32 samples in 0.000s...
[t-SNE] Computed neighbors for 32 samples in 0.002s...
[t-SNE] Computed conditional probabilities for sample 32 / 32
[t-SNE] Mean sigma: 4.091255
[t-SNE] KL divergence after 250 iterations with early exaggeration: 68.172462
[t-SNE] KL divergence after 500 iterations: 0.705384


## Visualizing Embedding of the Combined Graph 

In [10]:
emb,meta=load_emb_data('combo')
visualize(emb,meta,method='TSNE',dim=2, title='Combined Graph (TSNE)')

[t-SNE] Computing 16 nearest neighbors...
[t-SNE] Indexed 654 samples in 0.001s...
[t-SNE] Computed neighbors for 654 samples in 0.012s...
[t-SNE] Computed conditional probabilities for sample 654 / 654
[t-SNE] Mean sigma: 0.000000
[t-SNE] KL divergence after 250 iterations with early exaggeration: 71.061691
[t-SNE] KL divergence after 300 iterations: 1.110954


In [11]:
#visualize(emb,meta,method='SpectralEmbedding',dim=2,  title='Combined Graph (Spectral Embedding)')
visualize(emb,meta,method='PCA',dim=2,  title='Combined Graph (PCA)')