In [1]:
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib as mpl

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

/kaggle/input/us-politicians-twitter-dataset/dataset.csv


In [8]:
!ls

__notebook_source__.ipynb


In [9]:
# tweets = pd.read_csv("/kaggle/input/us-politician-tweets/politics_tweets.zip")
tweets = pd.read_csv("../input/us-politicians-twitter-dataset/dataset.csv")
tweets.head()

Unnamed: 0,Name,Twitter_username,Account_start_time,Account_ID,Sex,Birthplace,Birthday,Age,Instagram_username,Political_party
0,A. Donald McEachin,RepMcEachin,2017-01-03T00:00:00Z,816181091673448448,male,Germany,1961-10-10T00:00:00Z,59.0,repmceachin,Democratic Party
1,Aaron Michlewitz,RepMichlewitz,2010-06-27T00:00:00Z,160246973,male,United States of America,1978-01-01T00:00:00Z,42.0,,Democratic Party
2,Aaron Peskin,AaronPeskin,2010-11-13T00:00:00Z,215369273,male,United States of America,1964-06-17T00:00:00Z,56.0,apeskin52,Democratic Party
3,Aaron Peña,AaronPena,2007-10-31T00:00:00Z,9843332,male,United States of America,1959-06-08T00:00:00Z,61.0,,Republican Party
4,Aaron Schock,aaronschock,2009-03-12T00:00:00Z,23951197,male,United States of America,1981-05-28T00:00:00Z,39.0,aaronschock,Republican Party


In [10]:
tweets = tweets[["id", "username", "tweet"]]
tweets = tweets[[isinstance(x, str) for x in tweets["username"]]]

KeyError: "None of [Index(['id', 'username', 'tweet'], dtype='object')] are in the [columns]"

In [None]:
politicians = list(set(tweets["username"]))

In [None]:
network_graph = {'Source': [], 'Target': [], 'Type': [], 'weight': []}
network_graph = pd.DataFrame(data=network_graph)

In [None]:
!pip install flair
from flair.models import TextClassifier
from flair.data import Sentence
sia = TextClassifier.load('en-sentiment')
def flair_prediction(x):
    sentence = Sentence(x)
    sia.predict(sentence)
    score = sentence.labels[0]
    if "POSITIVE" in str(score):
        return "pos"
    elif "NEGATIVE" in str(score):
        return "neg"
    else:
        return "neu"

In [None]:
for row in tqdm(tweets.iterrows(),total=tweets.shape[0]):
    row = row[1]
    polarity = None
    for pol in politicians:
        if pol in row["tweet"]:
            if polarity == None:
                polarity = -1 if flair_prediction(row["tweet"]) == "neg" else 1
            queried = network_graph.loc[(network_graph["Source"] == "me") & (network_graph["Target"] == "you")]
            if len(queried) == 0:
                network_graph = network_graph.append({'Source': row["username"], 'Target': pol, 'Type': "Undirected", 'weight': polarity}, ignore_index=True)
            else:
                network_graph.at[queried.index[0],"weight"]+=polarity

In [None]:
network_graph.head()
network_graph.to_csv("adjacency_graph.csv", index=False)

In [None]:
network_graph = pd.read_csv("adjacency_graph.csv")

In [None]:
network_graph = network_graph[network_graph["Target"] != "POTUS"]
network_graph = network_graph[network_graph["Target"] != "VP"]

In [3]:
import networkx as nx
G = nx.from_pandas_edgelist(network_graph, source="Source", target="Target", edge_attr="weight")

NameError: name 'network_graph' is not defined

In [None]:
usernames = pd.read_csv("/kaggle/input/us-politicians-twitter-dataset/dataset.csv")
usernames = usernames[["Twitter_username","Political_party"]]

In [None]:
usernames = usernames.set_index('Twitter_username').to_dict()["Political_party"]
parties = usernames.copy()

In [None]:
for i in usernames:
    if "democratic" in usernames[i].lower():
        usernames[i] = 0
    else:
        usernames[i] = 1

### Base Pyvis Visualization

In [None]:
!pip install pyvis
from pyvis.network import Network

In [None]:
net = Network(notebook=True)
net.from_nx(G)
net.save_graph("graph.html")
net.show('graph.html')

### Bokeh Weighted Visualization

In [None]:
!pip install bokeh
from bokeh.io import output_notebook, show, save
from bokeh.models import Range1d, Circle, ColumnDataSource, MultiLine
from bokeh.plotting import figure
from bokeh.plotting import from_networkx
from bokeh.palettes import Blues8, Reds8, Purples8, Oranges8, Viridis8, Spectral8, RdBu8, turbo
import matplotlib.cm as cm
output_notebook()

In [None]:
degrees = dict(nx.degree(G))
nx.set_node_attributes(G, name='degree', values=degrees)

In [None]:
number_to_adjust_by = 5
adjusted_node_size = dict([(node, degree+number_to_adjust_by) for node, degree in nx.degree(G)])
nx.set_node_attributes(G, name='adjusted_node_size', values=adjusted_node_size)
nx.set_node_attributes(G, name='party_name', values=usernames)
nx.set_node_attributes(G, name='party_string', values=parties)

In [None]:
for i in G.nodes():
    if i not in usernames:
        usernames[i] = 0
usernames["rosadelauro"] = 10

In [None]:
import community
parts = community.best_partition(G, partition=usernames, weight='None')
values = [(node,parts.get(node)) for node in G.nodes()]

In [None]:
nx.set_node_attributes(G, name='communities', values=dict(values))

In [None]:
maxval = max(parts.values())
radii = np.random.random(size=maxval + 1) * 1.5
colors = [
    "#%02x%02x%02x" % (int(r), int(g), int(b)) for r, g, b, _ in 255*mpl.cm.viridis(mpl.colors.Normalize()(radii))
]
fcm = linear_cmap('communities', palette=turbo(maxval+1),low=0,high=maxval)

In [None]:
#Choose attributes from G network to size and color by — setting manual size (e.g. 10) or color (e.g. 'skyblue') also allowed
size_by_this_attribute = 'adjusted_node_size'
color_by_this_attribute = 'party_name'

#Pick a color palette — Blues8, Reds8, Purples8, Oranges8, Viridis8
color_palette = RdBu8

#Choose a title!
title = 'US Politics Graph'

#Establish which categories will appear when hovering over each node
HOVER_TOOLTIPS = [
       ("Person", "@index"),
        ("Mentions", "@degree"),
    ("Real Party", "party_string")
]

#Create a plot — set dimensions, toolbar, and title
plot = figure(tooltips = HOVER_TOOLTIPS,
              tools="pan,wheel_zoom,save,reset", active_scroll='wheel_zoom',
            x_range=Range1d(-10.1, 10.1), y_range=Range1d(-10.1, 10.1), title=title)

#Create a network graph object
# https://networkx.github.io/documentation/networkx-1.9/reference/generated/networkx.drawing.layout.spring_layout.html\
network_vis = from_networkx(G, nx.spring_layout, scale=10, center=(0, 0))

#Set node sizes and colors according to node degree (color as spectrum of color palette)
minimum_value_color = 0#min(network_graph.node_renderer.data_source.data[color_by_this_attribute])
maximum_value_color = 1#max(network_graph.node_renderer.data_source.data[color_by_this_attribute])
cmap = cm.get_cmap('viridis', max(parts.values()) + 1)
network_vis.node_renderer.glyph = Circle(size=size_by_this_attribute, fill_color=fcm)

#Set edge opacity and width
network_vis.edge_renderer.glyph = MultiLine(line_alpha=0.5, line_width=1)

plot.renderers.append(network_vis)

show(plot)
save(plot, filename=f"{title}.html")