# Visualizing the shared followers at the brainhack Warsaw

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from preprocess_data import *

	"grid.linewidth: white
"
	in file "/home/mboos/.config/matplotlib/stylelib/mb.mplstyle"
	Key grid.linewidth: Could not convert "white" to float
  styles = read_style_directory(stylelib_path)


## Data loading
Our data comes in a dictionary in which each entry is the username of a follower of the Brainhack Warsaw account and its value is a list of all its followers.
To find the simplest similarity measure between Twitter users, we compute a matrix of binary indicators, each entry indicates if a user (indicated by the row number) is followed by another user (all _followers_ are indicated by the number of columns).

In [21]:
with open('data/data_so_far.json', 'r') as fl:
    data_dict = json.load(fl)
data_dict = {key: map(str, val) for key, val in data_dict.items() if val is not None}
sparse_mat, vocabulary = compute_sparse_matrix_of_followers(data_dict)

## The simplest similarity
We now compute how many followers are shared between any two users by a simple matrix multiplication.

In [22]:
shared_followers = sparse_mat.dot(sparse_mat.T).todense()

## Embedding it in a space
We now embed all followers of Brainhack Warsaw in a two dimensional space.
For this we use dimensionality reduction to project individual users in a space that groups users with a high number of shared followers closer together than users with a low number.

In [27]:
import umap
shared_embedding = umap.UMAP(n_components=2, min_dist=0.1, metric='precomputed', n_neighbors=8).fit_transform(shared_followers)

## Visualizing the space
We now visualize it.

In [28]:
import matplotlib.pyplot as plt
import numpy as np
import mpld3
from sklearn.preprocessing import minmax_scale


from bokeh.io import output_file, show, output_notebook
from bokeh.models import ColumnDataSource, HoverTool, LinearColorMapper
from bokeh.palettes import plasma
from bokeh.plotting import figure
from bokeh.transform import transform

labels = sorted(data_dict.keys())
source = ColumnDataSource(data=dict(x=shared_embedding[:,0], y=shared_embedding[:,1], z=minmax_scale(
    np.diag(shared_followers), (5,30)), desc=labels))
hover = HoverTool(tooltips=[
    ("index", "$index"),
    ("(x,y)", "(@x, @y)"),
    ('desc', '@desc'),
])
mapper = LinearColorMapper(palette=plasma(256),
                           low=np.diag(shared_followers).min(), high=np.diag(shared_followers).max())

p = figure(plot_width=400, plot_height=400, tools=[hover], title="Shared followers")
p.circle('x', 'y', size='z', source=source,
         fill_color=transform('y', mapper))

output_notebook()
show(p)



In [None]:

from bokeh.io import output_file, show
from bokeh.models import ColumnDataSource, HoverTool, LinearColorMapper
from bokeh.palettes import plasma
from bokeh.plotting import figure
from bokeh.transform import transform

list_x = list(range(100))
list_y = [random() + sin(i / 20) for i in range(100)]
desc = [str(i) for i in list_y]

source = ColumnDataSource(data=dict(x=list_x, y=list_y, desc=desc))
hover = HoverTool(tooltips=[
    ("index", "$index"),
    ("(x,y)", "(@x, @y)"),
    ('desc', '@desc'),
])
mapper = LinearColorMapper(palette=plasma(256), low=min(list_y), high=max(list_y))

p = figure(plot_width=400, plot_height=400, tools=[hover], title="Belgian test")
p.circle('x', 'y', size=10, source=source,
         fill_color=transform('y', mapper))

output_file('test.html')
show(p)