In [37]:
import pandas as pd
import pyreadstat
import plotly.express as px

In [38]:
#parsing data from Stata file into pandas dataframe

dtafile = 'NAVCO2-1_ForPublication.dta'
df, meta = pyreadstat.read_dta(dtafile)

In [39]:
#cleaning data to ensure that variables of interest are only represented numerically
df['id'] = pd.to_numeric(df['id'], errors = 'coerce')
df['year'] = pd.to_numeric(df['year'], errors = 'coerce').astype(int)
df['progress'] = pd.to_numeric(df['progress'], errors = 'coerce')
df['total_part'] = pd.to_numeric(df['total_part'], errors = 'coerce')
df['prim_meth'] = pd.to_numeric(df['prim_meth'], errors = 'coerce')
df['success'] = pd.to_numeric(df['success'], errors = 'coerce')


#dropping unknown values from variables of interest
df = df[df['prim_meth'] != '-99']
df = df[df['success'] != '-99']
df = df[df['total_part'] != '-99']
df = df[df['camp_confl_intensity'] != '-99']

In [40]:
#################################################################

In [41]:
df['prim_meth'].corr(df['success'])

0.2641896604911782

In [42]:
#This data indicates that there is a very low degree of correllation between whether the campaign is successful and whether their campaign is violent or nonviolent.

In [43]:
#################################################################

In [44]:
print(df['camp_name'].value_counts()) #identifying the longest-running campaign

Myanmar Regime Change Campaign             68
Myanmar Territorial Secession Campaign     66
Naga Rebellion                             63
West Papuan self-determination campaign    50
Palestinian Liberation                     50
                                           ..
Anti-Somoza Strikes                         1
Maoist Anti-Government Protests             1
Anti-Ravalomanana Movement                  1
Anti-Tsiranana Campaign                     1
NLA Insurgency                              1
Name: camp_name, Length: 382, dtype: int64


In [45]:
df_mrcc = df[df['camp_name'] == 'Myanmar Regime Change Campaign']
df_mrcc['Degree of unity amongst opposition groups'] = mrcc_data['camp_confl_intensity']



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [46]:
px.defaults.color_continuous_scale = px.colors.sequential.Blackbody
px.scatter(
  df_mrcc,
  x = 'year',
  y = "progress",
    color = 'Degree of unity amongst opposition groups',
  title = 'Progress of the Myanmar Regime Change Campaign from 1948-2013'
)

In [167]:
#############################################################

In [12]:
#identifying the names of the 5 campaigns with the highest total participation within a given year
df_sorted = (df.sort_values(by = ['total_part'], ascending = False))
top_5 = df_sorted.iloc[0:5]

top_5_data = top_5[['camp_name', 'total_part', 'target', 'year','target_country']]

top_5_data["Campaign"] = top_5_data["camp_name"] + " (" + top_5_data["year"].astype(str) + ")"
top_5_data['Total participation'] = top_5_data['total_part']
top_5_data['Target'] = top_5_data["target"] + " (" + top_5_data["target_country"].astype(str) + ")"

print(top_5_data)

                             camp_name  total_part            target  year  \
232     Cultural Revolution Red Guards    22000000      Anti-Maoists  1967   
1485                        Solidarity    10000000  Communist regime  1980   
1486                        Solidarity    10000000  Communist regime  1981   
1488                        Solidarity     9500000  Communist regime  1983   
112   Argentina pro-democracy movement     9000000    military junta  1983   

     target_country                                 Campaign  \
232           China    Cultural Revolution Red Guards (1967)   
1485         Poland                        Solidarity (1980)   
1486         Poland                        Solidarity (1981)   
1488         Poland                        Solidarity (1983)   
112       Argentina  Argentina pro-democracy movement (1983)   

      Total participation                      Target  
232              22000000        Anti-Maoists (China)  
1485             10000000   Commun



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [13]:
px.bar(
  top_5_data,
  x = 'Campaign',
  y = "Total participation",
    color = 'Target',
  title = 'Total Campaign Membership of the 5 Largest Campaign Years'
)