In [1]:
import sys
print(sys.version)
import numpy as np
print(np.__version__)
import pandas as pd
print(pd.__version__)
import json
import seaborn as sns
import holoviews as hv
from holoviews import dim, opts
from bokeh.models import Title
import bokeh.io
from bokeh.io import output_file, save, show

hv.extension('bokeh')
hv.output(size=300)

%matplotlib inline

3.8.5 (default, Sep  4 2020, 02:22:02) 
[Clang 10.0.0 ]
1.19.2
1.1.3


In [2]:
# Load PIRUS Data
pirus = pd.read_csv('../data/clean_data/pirus_deep_clean_Final.csv')

# Load PIRUS Codebook
with open('../data/clean_data/pirus_codebook.json') as f:
    pirus_codebook = json.load(f)

In [3]:
# FILTER PIRUS DATA HERE!
def filter_pirus_by_terrorist_group():
    uni_terrorist = pirus['terrorist_group_name1'].value_counts()
    uni_terrorist.drop('-88', axis = 0, inplace = True)
    uni_terrorist = uni_terrorist.loc[uni_terrorist > 3]
    print(uni_terrorist.index.to_list())
    
    while True:
        inpt = input('Please enter the name of your desired terrorist org: ')
        if inpt in uni_terrorist:
            return pirus.loc[(pirus['terrorist_group_name1'] == inpt)]
        else:
            print('Oops! Invalid terrorist group. Please Try Again!')
df = pirus
# df = filter_pirus_by_terrorist_group()

In [4]:
# Isolate core data frame from pirus
chord_data = pd.DataFrame(df[['loc_plot_state1',
            'loc_plot_state1_us_dummy', 
            'loc_habitation_state1', 
            'loc_habitation_state1_us_dummy',
            'violent', 'subject_id'
           ]])

# Drop all NaNs from data
chord_data.dropna(inplace = True)

# Ensure all location data points are in the US and are Violent instances (1050 rows remaining)
mask = ((chord_data['loc_plot_state1_us_dummy'] == 1) & 
        (chord_data['loc_habitation_state1_us_dummy'] == 1) & 
        (chord_data['violent'] == 1))

chord_data = chord_data.loc[mask]

# Drop Dummies and Violent variables
drop_columns = ['loc_plot_state1_us_dummy', 
                'loc_habitation_state1_us_dummy', 
                'violent']
chord_data.drop(drop_columns,axis = 1, inplace = True)

chord_data

Unnamed: 0,loc_plot_state1,loc_habitation_state1,subject_id
0,Illinois,Florida,1000
3,New York,New Jersey,1005
4,New York,New Jersey,1006
5,New York,New York,1010
6,New York,New York,1013
...,...,...,...
1898,Pennsylvania,Pennsylvania,7476
1900,District of Columbia,New York,7761
1901,South Carolina,South Carolina,7161
1902,Utah,Utah,8341


In [5]:
# Get a list of all the unique states in the filtered dataframe
states = pd.concat([chord_data['loc_plot_state1'], chord_data['loc_habitation_state1']], axis = 0).unique()

# Transform states into a dict object with id codes as the value
state_dict = {states[i] : i for i in range(len(states))}

# Transform state_dict into a dataframe which will be the base for our node data frame
node_base = {i : states[i] for i in range(len(states))}
node_base = pd.DataFrame(list(state_dict.items()),columns = ['loc_habitation_state1','id'])

node_base_data = chord_data.subject_id.groupby([chord_data['loc_habitation_state1'], chord_data['loc_plot_state1']]).count().to_frame(name = 'plots').reset_index()
node_base_data

Unnamed: 0,loc_habitation_state1,loc_plot_state1,plots
0,Alabama,Alabama,13
1,Alabama,Georgia,2
2,Alaska,Alaska,4
3,Alaska,Florida,1
4,Alaska,Indiana,1
...,...,...,...
181,West Virginia,West Virginia,6
182,Wisconsin,Illinois,1
183,Wisconsin,New York,1
184,Wisconsin,Wisconsin,9


In [6]:
# Separate self inflicted instances and travel instances
self_inflicted = node_base_data.loc[node_base_data['loc_habitation_state1'] == node_base_data['loc_plot_state1']]
self_inflicted.columns = ['loc_habitation_state1', 'loc_plot_state1',  'self inflicted plots']
node_base = node_base.merge(self_inflicted, on='loc_habitation_state1', how = 'left')

# Isolate travel instances
travel_plots = node_base_data.loc[~(node_base_data['loc_habitation_state1'] == node_base_data['loc_plot_state1'])]

# Get the sum of incoming flows, the number of instances where the state was the plot state
incoming = travel_plots.plots.groupby(travel_plots['loc_plot_state1']).sum().to_frame(name = '# incoming flows').reset_index()
incoming.columns = ['loc_habitation_state1', 'incoming flows']
node_base = node_base.merge(incoming, on='loc_habitation_state1', how = 'left')

# Get the sum of outgoing flows, the number of instances where the state was the habitation state
outgoing = travel_plots.plots.groupby(travel_plots['loc_habitation_state1']).sum().to_frame(name = '# outgoing flows').reset_index()
outgoing.columns = ['loc_habitation_state1', 'outgoing flows']
node_base = node_base.merge(outgoing, on='loc_habitation_state1', how = 'left')

# Remove unnecessary columns & rename columns
node_base.drop('loc_plot_state1', axis = 1, inplace = True)
node_base.columns = ['state','id','self inflicted plots','incoming flows', 'outgoing flows']

# Transform NaNs to 0s
node_base = node_base.fillna(0)

# Get Total Plots Against for each state
node_base["total plots against"] = node_base["self inflicted plots"] + node_base["incoming flows"]

# Get Total Radicals Produced from 1948 - 2018 for each state
node_base["total radicals produced"] = node_base["self inflicted plots"] + node_base["outgoing flows"]

# Get Total Self Inflicted percentage of whole data
total_plots = node_base_data.plots.sum()
total_self_inflicted = self_inflicted['self inflicted plots'].sum()

# Get Total Interstate Plots percentage of whole data
total_interstate = outgoing['outgoing flows'].sum()

# Calc percentages
percent_self_inflicted = (total_self_inflicted/total_plots)*100
percent_interstate_plots = (total_interstate/total_plots)*100

In [7]:
# Change string state variables to id codes in state_dict
chord_data['loc_plot_state1'] = chord_data['loc_plot_state1'].apply(lambda x:state_dict[x])
chord_data['loc_habitation_state1'] = chord_data['loc_habitation_state1'].apply(lambda x:state_dict[x])

In [8]:
# Build Color Map
face = '#EEAD9A'
KKK = '#E7E3DF'
shirt = '#3186A9'
horn = '#83684C'
sleeve = '#66759A'
forehead = '#EBD79F'
jacket = '#DBB03B'
gun = '#6D6E72'
hand = '#965341'
background = '#8F917F'
beard = '#010503'
shoulder = '#bf4a41'
collar = '#4d6456'

cmap = [beard, background, shirt,
        sleeve, shoulder, gun,
        forehead, horn, collar, 
        face, KKK, jacket,
        hand]

# Print color map
sns.set_palette(sns.color_palette(cmap))
sns.color_palette(cmap)

In [9]:
# Build out final datasets
route_counts = chord_data.subject_id.groupby([chord_data['loc_habitation_state1'], chord_data['loc_plot_state1']]).count().reset_index().sort_values('subject_id')
nodes = hv.Dataset(node_base.sort_values('outgoing flows'),['id'])

# Build bases chord
chord = hv.Chord((route_counts, nodes)).select(value = (7,None))

# Adjust chord optics
chord.opts(
    opts.Chord(cmap=cmap, edge_cmap=cmap, edge_color=dim('loc_habitation_state1').str(), 
               labels='state',node_color=dim('id').str(), height=300, width=300))

# Render chord (prep for adding titles)
print_chord = hv.render(chord)

# Define titles
title = 'Violent Interstate & Self Inflicted Plots by State'
subtitle1 = '%.2f' %percent_self_inflicted + '% Self Inflicted Plots'
subtitle2 = '%.2f' %percent_interstate_plots + '% Interstate Plots'

# Add Titles
print_chord.add_layout(Title(text=subtitle2, text_font_style="italic", text_color = collar), 'above')
print_chord.add_layout(Title(text=subtitle1, text_font_style="italic",text_color = collar), 'above')
print_chord.add_layout(Title(text=title, text_font_size="16pt", text_font_style="bold",text_color = beard), 'above')

# Show Plot
bokeh.io.show(print_chord)

In [10]:
# Print chord to html
renderer = hv.renderer('bokeh')

# Using renderer save
renderer.save(chord, 'graph.html')

# Convert to bokeh figure then save using bokeh
plot = renderer.get_plot(chord).state
save(plot, 'graph.html')

  warn("save() called but no resources were supplied and output_file(...) was never called, defaulting to resources.CDN")
  warn("save() called but no title was supplied and output_file(...) was never called, using default title 'Bokeh Plot'")


'/Users/hannahgross/Desktop/Project2_Nesbitt_Gross/Final Jupyter Notebooks/graph.html'