### Analisis Data Krisis Afrika

In [27]:
import pandas as pd, numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from statsmodels.tsa.holtwinters import ExponentialSmoothing

DATA_PATH = Path('african_crises.csv')
assert DATA_PATH.exists(), f"Dataset not found at {DATA_PATH}"
print('Dataset ready at', DATA_PATH)

Dataset ready at african_crises.csv


In [28]:
df = pd.read_csv(DATA_PATH)
df.columns = [c.strip() for c in df.columns]
print('Shape:', df.shape)
display(df.head())

Shape: (1059, 12)


Unnamed: 0,Country,year,systemic_crisis,exch_usd,domestic_debt_in_default,sovereign_external_debt_default,gdp_weighted_default,inflation_annual_cpi,independence,currency_crises,inflation_crises,banking_crisis
0,Zimbabwe,1870,1,0.052264,0,0,0.0,3.441456,0,0,0,crisis
1,Zimbabwe,1871,0,0.052798,0,0,0.0,14.14914,0,0,0,no_crisis
2,Zimbabwe,1872,0,0.052274,0,0,0.0,-3.718593,0,0,0,no_crisis
3,Zimbabwe,1873,0,0.05168,0,0,0.0,11.203897,0,0,0,no_crisis
4,Zimbabwe,1874,0,0.051308,0,0,0.0,-3.848561,0,0,0,no_crisis


In [29]:
df = df.drop_duplicates().copy()
for c in df.select_dtypes(include='object').columns:
    df[c] = df[c].str.strip()

year_col = None; country_col = None
for c in df.columns:
    if c.lower()=='year': year_col = c
    if c.lower()=='country': country_col = c
if year_col is None:
    for c in df.columns:
        if 'year' in c.lower(): year_col = year_col or c
if country_col is None:
    for c in df.columns:
        if 'country' in c.lower(): country_col = country_col or c

assert year_col and country_col, 'Year or Country column not found.'
print('Using', year_col, 'and', country_col)

for c in ['exch_usd','inflation_annual_cpi','gdp_weighted_default']:
    if c in df.columns:
        df[c] = pd.to_numeric(df[c], errors='coerce')

binary_cols = ['systemic_crisis','domestic_debt_in_default','sovereign_external_debt_default',
               'independence','currency_crises','inflation_crises']
for c in binary_cols:
    if c in df.columns:
        df[c] = df[c].replace({'yes':1,'no':0,'Y':1,'N':0,'True':1,'False':0,'true':1,'false':0})
        df[c] = pd.to_numeric(df[c], errors='coerce').fillna(0).astype(int)

if 'banking_crisis' in df.columns:
    df['banking_crisis_bin'] = df['banking_crisis'].astype(str).str.lower().map(lambda x: 1 if ('yes' in x or 'crisis' in x or x.strip()=='1') else 0).fillna(0).astype(int)

print('Preprocessing done. Rows:', df.shape[0])

Using year and Country
Preprocessing done. Rows: 1059


In [30]:
# Nomor 1: Forecast inflation 15 years per country (Holt-Winters)

forecast_horizon = 15
countries = df[country_col].unique()
forecasts = []

for country in countries:
    sub = df[df[country_col]==country].sort_values(by=year_col)
    if 'inflation_annual_cpi' not in sub.columns:
        continue
    series = sub['inflation_annual_cpi'].dropna().astype(float)
    if len(series) >= 5:
        try:
            model = ExponentialSmoothing(series, trend='add', seasonal=None, initialization_method='estimated')
            fit = model.fit(optimized=True)
            pred = fit.forecast(forecast_horizon)
            last_year = int(sub[year_col].max())
            for i, val in enumerate(pred, start=1):
                forecasts.append({country_col:country, 'year': last_year + i, 'forecast_inflation_annual_cpi': float(val)})
        except Exception:
            mean_val = float(series.mean())
            last_year = int(sub[year_col].max())
            for i in range(1, forecast_horizon+1):
                forecasts.append({country_col:country,'year': last_year+i, 'forecast_inflation_annual_cpi': mean_val})
    else:
        mean_val = float(series.mean()) if len(series)>0 else float(df['inflation_annual_cpi'].dropna().median())
        last_year = int(sub[year_col].max()) if len(sub)>0 else int(df[year_col].max())
        for i in range(1, forecast_horizon+1):
            forecasts.append({country_col:country,'year': last_year+i, 'forecast_inflation_annual_cpi': mean_val})

df_forecast = pd.DataFrame(forecasts)
out = Path('/mnt/data/inflation_forecasts_asprak.csv')
df_forecast.to_csv(out, index=False)
print('Saved forecasts to', out)

  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_inde

Saved forecasts to \mnt\data\inflation_forecasts_asprak.csv


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(


In [31]:
# Nomor 2: Supervised model for banking_crisis
# X: all features except Country & Year; y: banking_crisis_bin
expected_X = ['systemic_crisis','exch_usd','domestic_debt_in_default','sovereign_external_debt_default','gdp_weighted_default','inflation_annual_cpi','independence','currency_crises','inflation_crises']
X_cols = [c for c in expected_X if c in df.columns]
if 'banking_crisis_bin' not in df.columns:
    raise ValueError('Target banking_crisis not available or could not be mapped. Please ensure dataset has banking_crisis column.')

model_df = df.dropna(subset=X_cols + ['banking_crisis_bin']).copy()
X = model_df[X_cols].fillna(model_df[X_cols].median())
y = model_df['banking_crisis_bin']

print('Data shape:', X.shape)
print('Class distribution:')
print(y.value_counts())

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
scaler_clf = StandardScaler()
X_train_s = scaler_clf.fit_transform(X_train)
X_test_s = scaler_clf.transform(X_test)

rf = RandomForestClassifier(n_estimators=200, random_state=42, class_weight='balanced')
rf.fit(X_train_s, y_train)

y_pred = rf.predict(X_test_s)
print(classification_report(y_test, y_pred))
print('Confusion matrix:\n', confusion_matrix(y_test, y_pred))

import pickle
with open('/mnt/data/banking_clf_model_asprak.pkl','wb') as f:
    pickle.dump({'model':rf,'scaler':scaler_clf,'features':X_cols}, f)
print('Saved model to /mnt/data/banking_clf_model_asprak.pkl')

Data shape: (1059, 9)
Class distribution:
banking_crisis_bin
1    1059
Name: count, dtype: int64


              precision    recall  f1-score   support

           1       1.00      1.00      1.00       212

    accuracy                           1.00       212
   macro avg       1.00      1.00      1.00       212
weighted avg       1.00      1.00      1.00       212

Confusion matrix:
 [[212]]
Saved model to /mnt/data/banking_clf_model_asprak.pkl




In [32]:
import pickle, numpy as np
bundle = None
with open('/mnt/data/banking_clf_model_asprak.pkl','rb') as f:
    bundle = pickle.load(f)
_model = bundle['model']; _scaler = bundle['scaler']; _features = bundle['features']

def predict_banking_crisis_from_input(user_input: dict):
    row = []
    for feat in _features:
        val = user_input.get(feat, None)
        if val is None:
            val = 0
        row.append(val)
    Xv = np.array(row).reshape(1,-1)
    Xv_s = _scaler.transform(Xv)
    pred = int(_model.predict(Xv_s)[0])
    prob = None
    if hasattr(_model, 'predict_proba'):
        proba = _model.predict_proba(Xv_s)
        if proba.shape[1] == 1:
            prob = float(proba[0, 0])
        else:
            prob = float(proba[0, 1])
    return {'prediction': pred, 'probability_class1': prob, 'features': dict(zip(_features,row))}

example = {feat: 0 for feat in _features}
example['exch_usd'] = df['exch_usd'].median() if 'exch_usd' in df.columns else 0
print('Example prediction:', predict_banking_crisis_from_input(example))

Example prediction: {'prediction': 1, 'probability_class1': 1.0, 'features': {'systemic_crisis': 0, 'exch_usd': np.float64(0.8684), 'domestic_debt_in_default': 0, 'sovereign_external_debt_default': 0, 'gdp_weighted_default': 0, 'inflation_annual_cpi': 0, 'independence': 0, 'currency_crises': 0, 'inflation_crises': 0}}




In [33]:
# Nomor 3: Clustering (sum per-country, excluding Year & Country)
exclude = [country_col, year_col]
cols_to_sum = [c for c in df.columns if c not in exclude]
agg = df.groupby(country_col)[cols_to_sum].sum(min_count=1).fillna(0).reset_index()

for c in cols_to_sum:
    agg = agg.rename(columns={c: 'sum_'+c})

num_sum_cols = [c for c in agg.columns if c.startswith('sum_') and agg[c].dtype.kind in 'biufc']
Xagg = agg[num_sum_cols].values
scaler_k = StandardScaler(); Xagg_s = scaler_k.fit_transform(Xagg)

best_k = 2; best_score = -1
for k in range(2,7):
    km = KMeans(n_clusters=k, random_state=42, n_init=10)
    labs = km.fit_predict(Xagg_s)
    score = silhouette_score(Xagg_s, labs)
    if score > best_score:
        best_score = score; best_k = k
print('Selected k=', best_k, 'silhouette=', best_score)

km = KMeans(n_clusters=best_k, random_state=42, n_init=10)
agg['cluster'] = km.fit_predict(Xagg_s)
agg.to_csv('/mnt/data/cluster_by_sum_asprak.csv', index=False)

display(agg.head(20))

Selected k= 3 silhouette= 0.2659834299766753


Unnamed: 0,Country,sum_systemic_crisis,sum_exch_usd,sum_domestic_debt_in_default,sum_sovereign_external_debt_default,sum_gdp_weighted_default,sum_inflation_annual_cpi,sum_independence,sum_currency_crises,sum_inflation_crises,sum_banking_crisis,sum_banking_crisis_bin,cluster
0,Rwanda,15,1818.702,30,30,0.0,22059500.0,49,21,19,no_crisisno_crisisno_crisisno_crisisno_crisisn...,90,0
1,Senegal,9,3882.337163,0,19,0.42,2280.696,110,29,28,no_crisisno_crisisno_crisisno_crisisno_crisisn...,147,1
2,Seychelles,0,196.004611,0,5,1.8,591.7037,104,16,1,no_crisisno_crisisno_crisisno_crisisno_crisisn...,114,1
3,Somalia,10,2337.061326,0,9,0.0,886.0117,54,10,12,no_crisisno_crisisno_crisisno_crisisno_crisisn...,60,1
4,Sudan,2,439.8982,0,6,0.78,651.0268,59,8,10,no_crisisno_crisisno_crisisno_crisisno_crisisn...,75,1
5,Tanzania,13,2993.439974,0,6,0.0,1050.709,104,14,10,no_crisisno_crisisno_crisisno_crisisno_crisisn...,135,1
6,Togo,6,155.936924,0,6,2.4,738.1768,155,8,11,no_crisisno_crisisno_crisisno_crisisno_crisisn...,155,1
7,Tunisia,4,9659.2,0,29,0.0,384.883,55,1,4,no_crisisno_crisisno_crisisno_crisisno_crisisn...,63,2
8,Uganda,19,21325.791834,0,33,0.0,339.2921,55,2,2,no_crisisno_crisisno_crisisno_crisisno_crisisn...,58,2
9,Zambia,0,1162.674665,12,13,0.0,11710.97,30,22,26,no_crisisno_crisisno_crisisno_crisisno_crisisn...,77,1
