In [48]:
#import statements
import pandas as pd

import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import plotly.express as px

df = px.data.iris()
all_dims = ['sepal_length', 'sepal_width', 
            'petal_length', 'petal_width']

app = dash.Dash(__name__)

app.layout = html.Div([
    dcc.Dropdown(
        id="dropdown",
        options=[{"label": x, "value": x} 
                 for x in all_dims],
        value=all_dims[:2],
        multi=True
    ),
    dcc.Graph(id="splom"),
])

@app.callback(
    Output("splom", "figure"), 
    [Input("dropdown", "value")])
def update_bar_chart(dims):
    fig = px.scatter_matrix(
        df, dimensions=dims, color="species")
    return fig

app.run_server(debug=True)

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app '__main__' (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: on


SystemExit: 1

In [69]:
#parsing data from Stata file into pandas dataframe

dtafile = r"C:\Users\abhii\Desktop\Github\Calculating-a-Revolution\dataverse_files\NAVCO2-1_ForPublication.dta"
#df, meta = pyreadstat.read_dta(dtafile)

df = pd.read_stata(dtafile)

In [70]:
#cleaning data to ensure that variables of interest are only represented numerically
df['id'] = pd.to_numeric(df['id'], errors = 'coerce')
df['year'] = pd.to_numeric(df['year'], errors = 'coerce').astype(int)
df['progress'] = pd.to_numeric(df['progress'], errors = 'coerce')
df['total_part'] = pd.to_numeric(df['total_part'], errors = 'coerce')
df['prim_meth'] = pd.to_numeric(df['prim_meth'], errors = 'coerce')
df['success'] = pd.to_numeric(df['success'], errors = 'coerce')


#dropping unknown values from variables of interest
df = df[df['prim_meth'] != '-99']
df = df[df['success'] != '-99']
df = df[df['total_part'] != '-99']
df = df[df['camp_confl_intensity'] != '-99']

In [37]:
#################################################################

In [38]:
df['prim_meth'].corr(df['success'])

0.26418966049117737

This data indicates that there is a very low degree of correllation between whether the campaign is successful and whether their campaign is violent or nonviolent.

In [18]:
#################################################################

In [83]:
print(df['camp_name'].value_counts()) #identifying the longest-running campaign

Myanmar Regime Change Campaign             68
Myanmar Territorial Secession Campaign     66
Naga Rebellion                             63
West Papuan self-determination campaign    50
Palestinian Liberation                     50
                                           ..
Anti-Somoza Strikes                         1
Maoist Anti-Government Protests             1
Anti-Ravalomanana Movement                  1
Anti-Tsiranana Campaign                     1
NLA Insurgency                              1
Name: camp_name, Length: 382, dtype: int64


In [84]:
df_mrcc = df[df['camp_name'] == 'Myanmar Regime Change Campaign']
df_mrcc['Degree of unity amongst opposition groups'] = df_mrcc['camp_confl_intensity']



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [106]:
px.scatter(
  df_mrcc,
  x = 'year',
  y = "progress",
    color = 'Degree of unity amongst opposition groups',
title = 'Progress of the Myanmar Regime Change Campaign from 1948-2013')

# Degree of unity amongst opposition groups KEY
0 - seemingly united
1 - cooperation with moderate disunity (i.e. ideological/policy disagreement)
2 - verbal/active competition among discrete groups, short of physical violence
3 - active competition among groups with violence

In [43]:
#############################################################

In [71]:
#identifying the names of the 5 campaigns with the highest total participation within a given year
df_sorted = (df.sort_values(by = ['total_part'], ascending = False))
top_5 = df_sorted.iloc[0:5]

top_5_data = top_5[['camp_name', 'total_part', 'target', 'year','target_country']]

top_5_data["Campaign"] = top_5_data["camp_name"] + " (" + top_5_data["year"].astype(str) + ")"
top_5_data['Total participation'] = top_5_data['total_part']
top_5_data['Target'] = top_5_data["target"] + " (" + top_5_data["target_country"].astype(str) + ")"

print(top_5_data)

                             camp_name  total_part            target  year  \
232     Cultural Revolution Red Guards    22000000      Anti-Maoists  1967   
1485                        Solidarity    10000000  Communist regime  1980   
1486                        Solidarity    10000000  Communist regime  1981   
1488                        Solidarity     9500000  Communist regime  1983   
112   Argentina pro-democracy movement     9000000    military junta  1983   

     target_country                                 Campaign  \
232           China    Cultural Revolution Red Guards (1967)   
1485         Poland                        Solidarity (1980)   
1486         Poland                        Solidarity (1981)   
1488         Poland                        Solidarity (1983)   
112       Argentina  Argentina pro-democracy movement (1983)   

      Total participation                      Target  
232              22000000        Anti-Maoists (China)  
1485             10000000   Commun



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [72]:
px.bar(
  top_5_data,
  x = 'Campaign',
  y = "Total participation",
    color = 'Target',
  title = 'Total Campaign Membership of the 5 Largest Campaign Years'
)