In [108]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import statsmodels.formula.api as smf
import statsmodels.api as sm
import joblib
from math import ceil
from scipy import stats
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score, StratifiedKFold
from sklearn.tree import DecisionTreeClassifier
from sklearn.cluster import KMeans
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier, GradientBoostingClassifier, GradientBoostingClassifier
from sklearn.metrics import mean_absolute_error, mean_squared_error, root_mean_squared_error, r2_score, accuracy_score, classification_report
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix, roc_curve, roc_auc_score, auc, precision_recall_curve
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder, StandardScaler, MinMaxScaler, OneHotEncoder
from sklearn.pipeline import Pipeline
from imblearn.pipeline import Pipeline

In [109]:
def loading_df():
    data_path = r"D:\DDI\terrorism\data\globalterrorismdb_0718dist.csv"
    df = pd.read_csv(data_path)

    # Rename columns for ML use
    df = df.rename(columns={
        'iyear': 'Year',
        'imonth': 'Month',
        'region_txt': 'Region',
        'attacktype1_txt': 'Attack_Type',
        'weaptype1_txt': 'Weapon_Type',
        'natlty1_txt': 'Nationality'
    })

    # Military target mapping (do this before selecting columns)
    df['ismilitary'] = (df['targtype1_txt'] == 'Military').astype(int)

    # Select only relevant columns
    df = df[['Year', 'Month', 'Region', 'Attack_Type',
             'Weapon_Type', 'ismilitary', 'Nationality', 'success']]
    return df

df = loading_df()

  df = pd.read_csv(data_path)


In [23]:
df['Year'].value_counts().sort_index().sample(5)
# 1970 - 2017 -> sliders

Year
1974      581
1971      471
1991     4683
2015    14965
2014    16903
Name: count, dtype: int64

In [24]:
df['Month'].value_counts().sort_index()
# 0 - unknown, 1- Jan to 12 - Dec -> Sliders

Month
0        20
1     14936
2     13879
3     15257
4     15152
5     16875
6     15359
7     16268
8     15800
9     14180
10    15563
11    14906
12    13496
Name: count, dtype: int64

In [25]:
df['Region'].value_counts()
# Select Box

Region
Middle East & North Africa     50474
South Asia                     44974
South America                  18978
Sub-Saharan Africa             17550
Western Europe                 16639
Southeast Asia                 12485
Central America & Caribbean    10344
Eastern Europe                  5144
North America                   3456
East Asia                        802
Central Asia                     563
Australasia & Oceania            282
Name: count, dtype: int64

In [26]:
df['Weapon_Type'].value_counts()
# Select Box

Weapon_Type
Explosives                                                                     92426
Firearms                                                                       58524
Unknown                                                                        15157
Incendiary                                                                     11135
Melee                                                                           3655
Chemical                                                                         321
Sabotage Equipment                                                               141
Vehicle (not to include vehicle-borne explosives, i.e., car or truck bombs)      136
Other                                                                            114
Biological                                                                        35
Fake Weapons                                                                      33
Radiological                                         

In [27]:
nationality = df['Nationality'].value_counts().unique()
print(len(nationality))  # Should print 205
print(nationality[:10])  # Preview first 10

147
[24113 13900 12098 10931  7922  6685  5840  5212  5065  4636]


In [None]:
def input_to_model(df):
    """
    Generate a random attack scenario from the dataset for model input.
    df: your loaded dataframe
    """
    
    # Get unique options for categorical columns
    regions = df['Region'].unique()
    attack_types = df['Attack_Type'].unique()
    weapon_types = df['Weapon_Type'].unique()
    nationalities = df['Nationality'].unique()
    
    # Generate random input
    input_data = pd.DataFrame([{
        'Year': np.random.randint(1971, 2018),  # 2017 inclusive
        'Month': np.random.randint(0, 13),      # 1â€“12
        'Region': np.random.choice(regions),
        'Attack_Type': np.random.choice(attack_types),
        'Weapon_Type': np.random.choice(weapon_types),
        'ismilitary': np.random.choice([0, 1]),
        'Nationality': np.random.choice(nationalities)

    
    }])
    return input_data

random_data = input_to_model(df)

### MVP Testing:

    bm_terrorist_success_rate.joblib
    rfc_terrorist_success_rate.joblib
    lg_terrorist_success_rate.joblib

In [180]:
print(saved_data.keys())

dict_keys(['model', 'coefficients', 'metadata'])


In [193]:
randm_data = input_to_model(df)

saved_data = joblib.load('../model/bm_terrorist_success_rate.joblib')
model = saved_data['model']
metadata = saved_data['metadata']
coefficients = saved_data['coefficients']

prediction = model.predict(randm_data)
confidence = model.predict_proba(randm_data)
print(randm_data)
# print(prediction)
# print("Confidence probabilities:", confidence[0])
print(f"\n--- Insert Model Coefficients ---")
print(coefficients)
print("Accuracy:", metadata['accuracy'])
# Human-readable interpretation
interpretation = "Likely to succeed" if prediction[0] == 1 else "Likely to fail"
confidence_score = confidence[0][prediction[0]]
print(f"Prediction: {interpretation} ({confidence_score*100:.2f} Confidence)")


   Year  Month                       Region  \
0  1972      1  Central America & Caribbean   

                           Attack_Type   Weapon_Type  ismilitary Nationality  
0  Hostage Taking (Barricade Incident)  Radiological           0     Germany  

--- Insert Model Coefficients ---
                               Feature  Importance
1                                Month    0.264221
0                                 Year    0.253141
26              Weapon_Type_Explosives    0.052204
16           Attack_Type_Assassination    0.038341
2                           ismilitary    0.035600
..                                 ...         ...
201               Nationality_Slovenia    0.000000
211             Nationality_St. Martin    0.000000
226           Nationality_Turkmenistan    0.000000
236                Nationality_Vanuatu    0.000000
240  Nationality_Virgin Islands (U.S.)    0.000000

[248 rows x 2 columns]
Accuracy: 0.89
Prediction: Likely to fail (80.00 Confidence)
