In [13]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme(style='white',font_scale=1.2)

df = pd.read_csv(r"""C:\Users\palmi\data301\data301_project\course-project-solo_319\data\raw\african_crises.csv""")

### Preliminary Section:
#### This section involves some cleaning, wrangling, and processing of my raw data to enable method chaining.

In [2]:
df

Unnamed: 0,case,cc3,country,year,systemic_crisis,exch_usd,domestic_debt_in_default,sovereign_external_debt_default,gdp_weighted_default,inflation_annual_cpi,independence,currency_crises,inflation_crises,banking_crisis
0,1,DZA,Algeria,1870,1,0.052264,0,0,0.0,3.441456,0,0,0,crisis
1,1,DZA,Algeria,1871,0,0.052798,0,0,0.0,14.149140,0,0,0,no_crisis
2,1,DZA,Algeria,1872,0,0.052274,0,0,0.0,-3.718593,0,0,0,no_crisis
3,1,DZA,Algeria,1873,0,0.051680,0,0,0.0,11.203897,0,0,0,no_crisis
4,1,DZA,Algeria,1874,0,0.051308,0,0,0.0,-3.848561,0,0,0,no_crisis
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1054,70,ZWE,Zimbabwe,2009,1,354.800000,1,1,0.0,-7.670000,1,1,0,crisis
1055,70,ZWE,Zimbabwe,2010,0,378.200000,1,1,0.0,3.217000,1,0,0,no_crisis
1056,70,ZWE,Zimbabwe,2011,0,361.900000,1,1,0.0,4.920000,1,0,0,no_crisis
1057,70,ZWE,Zimbabwe,2012,0,361.900000,1,1,0.0,3.720000,1,0,0,no_crisis


In [4]:
# I will now drop columns that are irrelevant to my research interests

df1 = df.drop(['case', 'cc3', 'gdp_weighted_default', 'independence', 'currency_crises', 'inflation_crises', 'banking_crisis'], axis=1)
df1

Unnamed: 0,country,year,systemic_crisis,exch_usd,domestic_debt_in_default,sovereign_external_debt_default,inflation_annual_cpi
0,Algeria,1870,1,0.052264,0,0,3.441456
1,Algeria,1871,0,0.052798,0,0,14.149140
2,Algeria,1872,0,0.052274,0,0,-3.718593
3,Algeria,1873,0,0.051680,0,0,11.203897
4,Algeria,1874,0,0.051308,0,0,-3.848561
...,...,...,...,...,...,...,...
1054,Zimbabwe,2009,1,354.800000,1,1,-7.670000
1055,Zimbabwe,2010,0,378.200000,1,1,3.217000
1056,Zimbabwe,2011,0,361.900000,1,1,4.920000
1057,Zimbabwe,2012,0,361.900000,1,1,3.720000


In [5]:
# Now to drop any NaN values

df1.dropna() 

Unnamed: 0,country,year,systemic_crisis,exch_usd,domestic_debt_in_default,sovereign_external_debt_default,inflation_annual_cpi
0,Algeria,1870,1,0.052264,0,0,3.441456
1,Algeria,1871,0,0.052798,0,0,14.149140
2,Algeria,1872,0,0.052274,0,0,-3.718593
3,Algeria,1873,0,0.051680,0,0,11.203897
4,Algeria,1874,0,0.051308,0,0,-3.848561
...,...,...,...,...,...,...,...
1054,Zimbabwe,2009,1,354.800000,1,1,-7.670000
1055,Zimbabwe,2010,0,378.200000,1,1,3.217000
1056,Zimbabwe,2011,0,361.900000,1,1,4.920000
1057,Zimbabwe,2012,0,361.900000,1,1,3.720000


In [8]:
# Now I will drop all rows that contain data from before the year 2000

df1 = df1[df1.year >= 2000]
df1.reset_index(drop=True)

Unnamed: 0,country,year,systemic_crisis,exch_usd,domestic_debt_in_default,sovereign_external_debt_default,inflation_annual_cpi
0,Algeria,2000,0,75.3428,0,0,0.300
1,Algeria,2001,0,77.8196,0,0,4.200
2,Algeria,2002,0,79.7234,0,0,1.430
3,Algeria,2003,0,72.6128,0,0,4.259
4,Algeria,2004,0,72.6137,0,0,3.972
...,...,...,...,...,...,...,...
188,Zimbabwe,2009,1,354.8000,1,1,-7.670
189,Zimbabwe,2010,0,378.2000,1,1,3.217
190,Zimbabwe,2011,0,361.9000,1,1,4.920
191,Zimbabwe,2012,0,361.9000,1,1,3.720


## Task 3: Method Chaining and Python Programs

### Step 1: Build and test your method chain(s):

In [2]:
import pandas as pd
import numpy as np

data = pd.read_csv(r"""C:\Users\palmi\data301\data301_project\course-project-solo_319\data\raw\african_crises.csv""")

# Method Chain 1 (for cleaning data and dropping columns):

df = (   
    pd.DataFrame(data=data,columns=['case', 'cc3', 'country', 'year', 'systemic_crisis', 'exch_usd', 'domestic_debt_in_default', 'sovereign_external_debt_default', 'gdp_weighted_default', 'inflation_annual_cpi', 'independence', 'currency_crises', 'inflation_crises', 'banking_crisis'])
    .drop(['case', 'cc3', 'gdp_weighted_default', 'independence', 'currency_crises', 'inflation_crises', 'banking_crisis'], axis=1)
    .dropna()
    .reset_index(drop=True)
)

# Method Chain 2 (for dropping rows and resetting index):

df1 = (
    df[df.year >= 2000]
    .reset_index(drop=True)
)

df1

Unnamed: 0,country,year,systemic_crisis,exch_usd,domestic_debt_in_default,sovereign_external_debt_default,inflation_annual_cpi
0,Algeria,2000,0,75.3428,0,0,0.300
1,Algeria,2001,0,77.8196,0,0,4.200
2,Algeria,2002,0,79.7234,0,0,1.430
3,Algeria,2003,0,72.6128,0,0,4.259
4,Algeria,2004,0,72.6137,0,0,3.972
...,...,...,...,...,...,...,...
188,Zimbabwe,2009,1,354.8000,1,1,-7.670
189,Zimbabwe,2010,0,378.2000,1,1,3.217
190,Zimbabwe,2011,0,361.9000,1,1,4.920
191,Zimbabwe,2012,0,361.9000,1,1,3.720


### Step 2: Wrap your method chain(s) in a function:

In [7]:
def clean_and_wrangle(path):
    
    # Method Chain 1 (for cleaning data and dropping columns):
    
    df = (   
    pd.DataFrame(data=data,columns=['case', 'cc3', 'country', 'year', 'systemic_crisis', 'exch_usd', 'domestic_debt_in_default', 'sovereign_external_debt_default', 'gdp_weighted_default', 'inflation_annual_cpi', 'independence', 'currency_crises', 'inflation_crises', 'banking_crisis'])
    .drop(['case', 'cc3', 'gdp_weighted_default', 'independence', 'currency_crises', 'inflation_crises', 'banking_crisis'], axis=1)
    .dropna()
    .reset_index(drop=True)
    )
    
    # Method Chain 2 (for dropping rows and resetting index):
    
    df1 = (
    df[df.year >= 2000]
    .reset_index(drop=True)
    )
    
    return df1

In [10]:
clean_and_wrangle(r"""C:\Users\palmi\data301\data301_project\course-project-solo_319\data\raw\african_crises.csv""")

Unnamed: 0,country,year,systemic_crisis,exch_usd,domestic_debt_in_default,sovereign_external_debt_default,inflation_annual_cpi
0,Algeria,2000,0,75.3428,0,0,0.300
1,Algeria,2001,0,77.8196,0,0,4.200
2,Algeria,2002,0,79.7234,0,0,1.430
3,Algeria,2003,0,72.6128,0,0,4.259
4,Algeria,2004,0,72.6137,0,0,3.972
...,...,...,...,...,...,...,...
188,Zimbabwe,2009,1,354.8000,1,1,-7.670
189,Zimbabwe,2010,0,378.2000,1,1,3.217
190,Zimbabwe,2011,0,361.9000,1,1,4.920
191,Zimbabwe,2012,0,361.9000,1,1,3.720
