In [89]:
import os
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

from load_data import *

pd.set_option('display.max_columns', None)

GTD = load_GTD()
PPTUS_data, PPTUS_sources = load_PPTUS()

GTD pickle file found, loading...
PPTUS pickle files found, loading...


In [156]:
# Merge PPTUS and GTD 
print('Original number of attacks in GTD: ', GTD.shape[0])
df = PPTUS_data.merge(GTD, left_on='ORGNAME', right_on='gname', how= 'inner')
print('Number of attacks for which we know the organization ideology in GTD: ', df.shape[0])

Original number of attacks in GTD:  214666
Number of attacks for which we know the organization ideology in GTD:  7131


### Data preparation for packing circles

Need to have the colmns of major goal per each attack. <br<
SHOULD LOOK LIKE THIS:
<p align="center">
  <img src="../plots/sketches/SketchCircles.jpg" 
  style="width: 500px; "/>
</p>

Therefore you have the dominat ideology, and major goals counting one per each group (how?) and then per each sub group have the sub goal.

In [157]:
pol_goal = df.columns[df.columns.str.startswith('G_POL') & ~df.columns.str.endswith('CI') & ~df.columns.str.endswith('SOURCE') & ~df.columns.str.endswith('TXT')]
soc_goal = df.columns[df.columns.str.startswith('G_SOC') & ~df.columns.str.endswith('CI') & ~df.columns.str.endswith('SOURCE') & ~df.columns.str.endswith('TXT')]
economic_goal = df.columns[df.columns.str.startswith('G_ECO') & ~df.columns.str.endswith('CI') & ~df.columns.str.endswith('SOURCE') & ~df.columns.str.endswith('TXT')]
rel_goal = df.columns[df.columns.str.startswith('G_REL') & ~df.columns.str.endswith('CI') & ~df.columns.str.endswith('SOURCE') & ~df.columns.str.endswith('TXT')]


In [158]:
# create a coloums G_POL that is 1 if pol_goal labels are one and 0 otherwise
df['G_POL'] = df[pol_goal].sum(axis=1) != 0.0
df['G_SOC'] = df[soc_goal].sum(axis=1) != 0.0
df['G_ECO'] = df[economic_goal].sum(axis=1) != 0.0
df['G_REL'] = df[rel_goal].sum(axis=1) != 0.0

df['DOM_I'] = df['DOM_I'].replace(
    {1: 'Extreme right', 
     2: 'Extreme left', 
     3: 'Religious', 
     4: 'Ethno-nationalist', 
     5: 'Single issue', 
     -99: 'Uncertain'})

In [159]:
# replace -99 values with 0 in all the df
df = df.replace(-99, 0)

In [160]:
# keep only the columns we need and the columns in pol_goal, soc_goal, economic_goal, rel_goal
df = df[['G_POL', 'G_SOC', 'G_ECO', 'G_REL', 'DOM_I', 'gname'] + list(pol_goal) + list(soc_goal) + list(economic_goal) + list(rel_goal)]
df

Unnamed: 0,G_POL,G_SOC,G_ECO,G_REL,DOM_I,gname,G_POL_1,G_POL_2,G_POL_3,G_POL_4,G_POL_5,G_POL_OTH,G_SOC_1,G_SOC_2,G_SOC_OTH,G_ECO_1,G_ECO_2,G_ECO_OTH,G_REL_1,G_REL_2,G_REL_3,G_REL_OTH
0,False,False,False,False,Extreme left,Action Squad,0,0,0,0,0,0.0,0,0,0.0,0,0,0.0,0,0,0,0.0
1,True,False,True,True,Religious,Al-Qaida,1,1,1,1,0,0.0,0,0,0.0,0,1,0.0,1,0,1,1.0
2,True,False,True,True,Religious,Al-Qaida,1,1,1,1,0,0.0,0,0,0.0,0,1,0.0,1,0,1,1.0
3,True,False,True,True,Religious,Al-Qaida,1,1,1,1,0,0.0,0,0,0.0,0,1,0.0,1,0,1,1.0
4,True,False,True,True,Religious,Al-Qaida,1,1,1,1,0,0.0,0,0,0.0,0,1,0.0,1,0,1,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7126,True,True,True,False,Extreme left,"Weather Underground, Weathermen",1,1,1,0,0,1.0,1,1,1.0,1,1,0.0,0,0,0,0.0
7127,True,True,True,False,Extreme left,White Panther Party,1,1,0,0,0,1.0,1,1,1.0,1,1,0.0,0,0,0,0.0
7128,True,True,True,True,Extreme left,Young Cuba,0,0,0,0,0,0.0,0,0,0.0,0,0,0.0,0,0,0,0.0
7129,True,True,True,True,Extreme left,Young Cuba,0,0,0,0,0,0.0,0,0,0.0,0,0,0.0,0,0,0,0.0


In [161]:
# rename DOM_I with dominant_ideology
df = df.rename(columns={'DOM_I': 'dominant_ideology'})

In [162]:
# take a political dataframe where G_POL is 1
df_pol = df[df['G_POL'] == 1]
df_soc = df[df['G_SOC'] == 1]
df_econ = df[df['G_ECO'] == 1]
df_rel = df[df['G_REL'] == 1]

# now melt the dataframe so that we have a column for each goal and keep the dominnat ideology
df_pol = df_pol.melt(id_vars=['dominant_ideology'], value_vars=pol_goal, var_name='sub_goal', value_name='goal_value')
df_soc = df_soc.melt(id_vars=['dominant_ideology'], value_vars=soc_goal, var_name='sub_goal', value_name='goal_value')
df_econ = df_econ.melt(id_vars=['dominant_ideology'], value_vars=economic_goal, var_name='sub_goal', value_name='goal_value')
df_rel = df_rel.melt(id_vars=['dominant_ideology'], value_vars=rel_goal, var_name='sub_goal', value_name='goal_value')

df_pol['Major_goal'] = 'Political'
df_soc['Major_goal'] = 'Social'
df_econ['Major_goal'] = 'Economic'
df_rel['Major_goal'] = 'Religious'

# put then together by concatenation
df_new = pd.concat([df_pol, df_soc, df_econ, df_rel], axis=0)
# replace the nan values with 0
df_new = df_new.replace(np.nan, 0)

df_new

Unnamed: 0,dominant_ideology,sub_goal,goal_value,Major_goal
0,Religious,G_POL_1,1.0,Political
1,Religious,G_POL_1,1.0,Political
2,Religious,G_POL_1,1.0,Political
3,Religious,G_POL_1,1.0,Political
4,Religious,G_POL_1,1.0,Political
...,...,...,...,...
22251,Ethno-nationalist,G_REL_OTH,0.0,Religious
22252,Single issue,G_REL_OTH,1.0,Religious
22253,Single issue,G_REL_OTH,1.0,Religious
22254,Extreme left,G_REL_OTH,0.0,Religious


In [172]:
# create a tree of the type tree = {'name': 'flare', 'children': []} where children is a list of dictionaries of each ideology and their goals and  their value
# for example: {'name': 'Extreme right', 'children': [{'name': 'G_POL', 'childern': [{'name': 'G_POL1', 'value': 1}, {'name': 'G_POL2', 'value': 2}]}]}

tree = {'name': 'flare', 'children': []}
for ideology in df_new['dominant_ideology'].unique():
    tree['children'].append({'name': ideology, 'children': []})
    for major_goal in df_new['Major_goal'].unique():
        tree['children'][-1]['children'].append({'name': major_goal, 'children': []})
        for sub_goal in df_new['sub_goal'].unique():
            tree['children'][-1]['children'][-1]['children'].append({'name': sub_goal, 'value': df_new[(df_new['dominant_ideology'] == ideology) & (df_new['Major_goal'] == major_goal) & (df_new['sub_goal'] == sub_goal)]['goal_value'].sum()})

tree

{'name': 'flare',
 'children': [{'name': 'Religious',
   'children': [{'name': 'Political',
     'children': [{'name': 'G_POL_1', 'value': 2718.0},
      {'name': 'G_POL_2', 'value': 2716.0},
      {'name': 'G_POL_3', 'value': 2704.0},
      {'name': 'G_POL_4', 'value': 2704.0},
      {'name': 'G_POL_5', 'value': 4.0},
      {'name': 'G_POL_OTH', 'value': 4.0},
      {'name': 'G_SOC_1', 'value': 0.0},
      {'name': 'G_SOC_2', 'value': 0.0},
      {'name': 'G_SOC_OTH', 'value': 0.0},
      {'name': 'G_ECO_1', 'value': 0.0},
      {'name': 'G_ECO_2', 'value': 0.0},
      {'name': 'G_ECO_OTH', 'value': 0.0},
      {'name': 'G_REL_1', 'value': 0.0},
      {'name': 'G_REL_2', 'value': 0.0},
      {'name': 'G_REL_3', 'value': 0.0},
      {'name': 'G_REL_OTH', 'value': 0.0}]},
    {'name': 'Social',
     'children': [{'name': 'G_POL_1', 'value': 0.0},
      {'name': 'G_POL_2', 'value': 0.0},
      {'name': 'G_POL_3', 'value': 0.0},
      {'name': 'G_POL_4', 'value': 0.0},
      {'name': 'G_P

In [174]:
# remove from tree the children that have value 0
for ideology in tree['children']:
    for major_goal in ideology['children']:
        for sub_goal in major_goal['children']:
            if sub_goal['value'] == 0:
                major_goal['children'].remove(sub_goal)
        if len(major_goal['children']) == 0:
            ideology['children'].remove(major_goal)
    if len(ideology['children']) == 0:
        tree['children'].remove(ideology)

tree

{'name': 'flare',
 'children': [{'name': 'Religious',
   'children': [{'name': 'Political',
     'children': [{'name': 'G_POL_1', 'value': 2718.0},
      {'name': 'G_POL_2', 'value': 2716.0},
      {'name': 'G_POL_3', 'value': 2704.0},
      {'name': 'G_POL_4', 'value': 2704.0},
      {'name': 'G_POL_5', 'value': 4.0},
      {'name': 'G_POL_OTH', 'value': 4.0},
      {'name': 'G_SOC_2', 'value': 0.0},
      {'name': 'G_ECO_1', 'value': 0.0},
      {'name': 'G_ECO_OTH', 'value': 0.0},
      {'name': 'G_REL_2', 'value': 0.0},
      {'name': 'G_REL_OTH', 'value': 0.0}]},
    {'name': 'Social',
     'children': [{'name': 'G_POL_2', 'value': 0.0},
      {'name': 'G_POL_4', 'value': 0.0},
      {'name': 'G_POL_OTH', 'value': 0.0},
      {'name': 'G_SOC_1', 'value': 12.0},
      {'name': 'G_SOC_2', 'value': 13.0},
      {'name': 'G_SOC_OTH', 'value': 8.0},
      {'name': 'G_ECO_2', 'value': 0.0},
      {'name': 'G_REL_1', 'value': 0.0},
      {'name': 'G_REL_3', 'value': 0.0}]},
    {'name': 

In [173]:
# save thhe dictionary tree to a JSON file
with open('../data/maps/major_goals.json', 'w') as fp:
    json.dump(tree, fp, indent=4)

In [None]:
df.rename(columns={'DOM_I': 'dominant_ideology'}, inplace=True)
# Keep only columns eventid, gname, attacktype1_txt, targtype1_txt and dominant_ideology
df = df[['eventid', 'gname', 'attacktype1_txt', 'targtype1_txt', 'dominant_ideology']]

# Remove all rows with uncertain dominant ideology
df = df[df['dominant_ideology'] != 'uncertain']

# Create a tree of dictionaries with keys {'name', 'children', 'value'} with layers ideology, attacktype1_txt, targtype1_txt from the df dataframe. Each layer has name, value = number of attacks and children = the next layer
tree = {'name': 'flare', 'children': []}

    

    

# Save ideology_dict as json
with open('../maps/sunburst_tree.json', 'w') as fp:
    json.dump(tree, fp, indent=4)

In [5]:
# Labels for dominant ideologies
DOM_labels = {
    'I_ETHNO': 'Ethno-nationalist',
    'I_REL': 'Religious',
    'I_RACE': 'Racist', 
    'I_LEFT': 'Extreme left',
    'I_RIGHT': 'Extreme right',
    'I_SI': 'Single issue',
}

# Labels for sub-ideologies
POL_labels = {'G_POL_1': 'Protest government policies/laws', 'G_POL_2': 'Protest government ruling party', 'G_POL_3': 'Seek regime type', 
              'G_POL_4': 'Territorial ', 'G_POL_5': 'Influence election', 'G_POL_OTH': 'Other'}
SOC_labels = {'G_SOC_1': 'Protest social discrimination/social justice issues', 'G_SOC_2': 'Protest public & private institutions', 
              'G_SOC_OTH': 'Other'}
ECONOMIC_labels = {'G_ECO_1': 'Protest economic discrimination', 'G_ECO_2': 'Protest government/corporate economic policies', 
                   'G_ECO_OTH': 'Other'}
REL_labels = {'G_REL_1': 'Protest religious discrimination', 'G_REL_2': 'Protest religious institutions', 'G_REL_3': 'Killing infidels/Non-believers', 
              'G_REL_OTH': 'Other'}

GOALS_pairs = {
    'POLITICAL': POL_labels, 
    'SOCIAL': SOC_labels,
    'ECONOMIC': ECONOMIC_labels, 
    'RELIGIOUS': REL_labels
}

In [None]:
# Create a column of Ideologies 
df['Ideologies'] = df.apply(lambda row: [DOM_labels[ideology_ID] for ideology_ID in DOM_labels.keys() if row[ideology_ID] == 1], axis=1)

# Create a column 'Sub-ideologies' that contains a list of sub-ideologies for which the corresponding I_TERMS is 1
df['BIGMajorGoals'] = df.apply(lambda row: ['POLITICAL' if row[sub_ideology] == 1 and (row['G_POL_1'] == 1 or row['G_POL_2'] == 1 or row['G_POL_3'] == 1 or row['G_POL_4'] == 1 or row['G_POL_5'] == 1 or row['G_POL_OTH'] == 1)] +
                                        ['SOCIAL' if row[sub_ideology] == 1 and (row['G_SOC_1'] == 1 or row['G_SOC_2'] == 1 or row['G_SOC_OTH'] == 1)] +
                                        ['ECONOMIC' if row[sub_ideology] == 1 and (row['G_ECO_1'] == 1 or row['G_ECO_2'] == 1 or row['G_ECO_OTH'] == 1)] +
                                        ['RELIGIOUS' if row[sub_ideology] == 1 and  (row['G_REL_1'] == 1 or row['G_REL_2'] == 1 or row['G_REL_3'] == 1 or row['G_REL_OTH'] == 1)],axis=1)

df['MajorGoals'] = df.apply(lambda row: [POL_labels[sub_ideology] for sub_ideology in POL_labels.keys() if row[sub_ideology] == 1 and (row['G_POL_1'] == 1 or row['G_POL_2'] == 1 or row['G_POL_3'] == 1 or row['G_POL_4'] == 1 or row['G_POL_5'] == 1 or row['G_POL_OTH'] == 1)] +
                                        [SOC_labels[sub_ideology] for sub_ideology in SOC_labels.keys() if row[sub_ideology] == 1 and (row['G_SOC_1'] == 1 or row['G_SOC_2'] == 1 or row['G_SOC_OTH'] == 1)] +
                                        [ECONOMIC_labels[sub_ideology] for sub_ideology in ECONOMIC_labels.keys() if row[sub_ideology] == 1 and (row['G_ECO_1'] == 1 or row['G_ECO_2'] == 1 or row['G_ECO_OTH'] == 1)] +
                                        [REL_labels[sub_ideology] for sub_ideology in REL_labels.keys() if row[sub_ideology] == 1 and  (row['G_REL_1'] == 1 or row['G_REL_2'] == 1 or row['G_REL_3'] == 1 or row['G_REL_OTH'] == 1)],axis=1)

In [None]:
df['BIGMajorGoals'] = df.apply(lambda row: ['POLITICAL' if row['sub_ideology'] == 1 and (row['G_POL_1'] == 1 or row['G_POL_2'] == 1 or row['G_POL_3'] == 1 or row['G_POL_4'] == 1 or row['G_POL_5'] == 1 or row['G_POL_OTH'] == 1)] +
                                        ['SOCIAL' if row['sub_ideology'] == 1 and (row['G_SOC_1'] == 1 or row['G_SOC_2'] == 1 or row['G_SOC_OTH'] == 1)] +
                                        ['ECONOMIC' if row['sub_ideology'] == 1 and (row['G_ECO_1'] == 1 or row['G_ECO_2'] == 1 or row['G_ECO_OTH'] == 1)] +
                                        ['RELIGIOUS' if row['sub_ideology'] == 1 and  (row['G_REL_1'] == 1 or row['G_REL_2'] == 1 or row['G_REL_3'] == 1 or row['G_REL_OTH'] == 1)], axis=1)

In [None]:

df['MajorGoals'] = df['MajorGoals'].apply(lambda x: x if len(x)>0 else None)

