# Conflict in Lebanon

In [2]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import geopandas as gpd

from datetime import datetime

# from acled_country_analysis import analysis
# from acled_country_analysis import visuals

import bokeh
from bokeh.models import Tabs, Panel, TabPanel
from bokeh.core.validation.warnings import EMPTY_LAYOUT, MISSING_RENDERERS
from bokeh.plotting import show, output_notebook

## Assignment

Lebanon has seen multiple conflicts since before its [inception](https://www.cfr.org/global-conflict-tracker/conflict/political-instability-lebanon). The assignment is to observe conflict trends using at least two datasources. Following this, the team will research completed by Novta & Pugacheva, 2020, to estimate the cost of conflict. 

## Data

### Armed Conflict Location & Event Data

The Armed Conflict Location & Event Data Project (ACLED) is a disaggregated data collection, analysis, and crisis mapping project. ACLED collects information on the dates, actors, locations, fatalities, and types of all reported political violence and protest events around the world. Access to this data is via a contract between the World bank and ACLED and can be extracted by any World bank employee upon registering for an API key. 


### UCDP Conflict Data

The team used a secondary conflict dataset based on [UCDP Conflict Analysis](https://www.pcr.uu.se/research/ucdp/methodology/), which primarily uses news data to generate geospatially-located conflict statistics. These are available on [SharePoint](https://worldbankgroup.sharepoint.com.mcas.ms/teams/DevelopmentDataPartnershipCommunity-WBGroup/Shared%20Documents/Forms/AllItems.aspx?csf=1&web=1&e=Yvwh8r&cid=fccdf23e%2D94d5%2D48bf%2Db75d%2D0af291138bde&FolderCTID=0x012000CFAB9FF0F938A64EBB297E7E16BDFCFD&id=%2Fteams%2FDevelopmentDataPartnershipCommunity%2DWBGroup%2FShared%20Documents%2FProjects%2FData%20Lab%2FLebanon%20Economic%20Analytics%2FData%2Fconflicts&viewid=80cdadb3%2D8bb3%2D47ae%2D8b18%2Dc1dd89c373c5). 


## Methodology and Implementation

ACLED and UCDP data were analysed and aggregated to admin levels gathered from [HdX](https://data.humdata.org/). The number of fatalities and conflicts were then shown by different event types and different admin regions. The results from both the sources was compared.

Write the raw data to a file and read from it. This is to avoid running the API again in case of having to restart kernel/continue analysis

In [3]:
# acled.to_csv('../../data/acled/acled_lebanon_2016_2023.csv')
acled = pd.read_csv("../../data/conflict/2012-01-01-2024-08-26-Lebanon.csv")

In [126]:
ucdp = pd.read_csv("../../data/ucdp_conflict/gedevents-2023-08-23.csv")

In [4]:
import re


def extract_second_number(input_string):
    # Use regular expressions to find all numbers in the input string
    numbers = re.findall(r"[-+]?\d*\.\d+|\d+", input_string)

    # Check if there is at least a second number
    if len(numbers) >= 2:
        # Return the second number as a float
        return float(numbers[1])
    else:
        # Return None if there is no second number
        return None

In [127]:
ucdp["longitude"] = ucdp["longitude"].apply(lambda x: extract_second_number(x))
ucdp.rename(columns={"latitude": "longitude", "longitude": "latitude"}, inplace=True)

ucdp["deaths_total"] = (
    ucdp[["deaths_a", "deaths_b", "deaths_civilians", "deaths_unknown"]]
    .fillna(0)
    .sum(axis=1)
)

ucdp["date_start"] = ucdp["date_start"].apply(
    lambda x: datetime.strptime(x, "%m/%d/%Y %H:%M:%S")
)

In [5]:
lebanon_adm0 = gpd.read_file(
    "../../data/shapefiles/lbn_adm_cdr_20200810/lbn_admbnda_adm0_cdr_20200810.shp"
)
lebanon_adm1 = gpd.read_file(
    "../../data/shapefiles/lbn_adm_cdr_20200810/lbn_admbnda_adm1_cdr_20200810.shp"
)
lebanon_adm2 = gpd.read_file(
    "../../data/shapefiles/lbn_adm_cdr_20200810/lbn_admbnda_adm2_cdr_20200810.shp"
)
lebanon_adm3 = gpd.read_file(
    "../../data/shapefiles/lbn_adm_cdr_20200810/lbn_admbnda_adm3_cdr_20200810.shp"
)

In [6]:
# acled = gpd.GeoDataFrame(acled, geometry=gpd.points_from_xy(acled['longitude'], acled['latitude']))
acled["event_date"] = acled["event_date"].apply(
    lambda x: datetime.strptime(x, "%d %B %Y")
)

In [28]:
from shapely.geometry import Point


def convert_to_gdf(df):
    geometry = [Point(xy) for xy in zip(df.longitude, df.latitude)]
    gdf = gpd.GeoDataFrame(df, crs="EPSG:4326", geometry=geometry)

    return gdf

def get_acled_by_admin(
    adm,
    acled,
    columns=["ADM4_EN", "ADM3_EN", "ADM2_EN", "ADM1_EN"],
    nearest=False,
    event_date="event_date",
    fatalities="fatalities",
    freq="M"
):
    acled_adm2 = convert_to_gdf(acled)
    if nearest == True:
        acled_adm2 = (
            adm.sjoin_nearest(acled_adm2, max_distance=2000)[
                [event_date, fatalities] + columns
            ]
            .groupby([pd.Grouper(key=event_date, freq=freq)] + columns)[fatalities]
            .agg(["sum", "count"])
            .reset_index()
        )
    else:
        acled_adm2 = (
            adm.sjoin(acled_adm2)[[event_date, fatalities] + columns]
            .groupby([pd.Grouper(key=event_date, freq=freq)] + columns)[fatalities]
            .agg(["sum", "count"])
            .reset_index()
        )
    acled_adm2.rename(columns={"sum": "fatalities", "count": "nrEvents"}, inplace=True)
    # acled_adm2['conflictIndex'] = acled_adm2.apply(lambda row: gmean([row['nrEvents'], row['fatalities']]), axis=1)
    # acled_adm2['conflictIndexLog'] = np.log(acled_adm2['conflictIndex'])
    # acled_adm2['event_date_map'] = acled_adm2['event_date'].apply(lambda x: x.date().replace(day=1))

    return acled_adm2.reset_index()


In [16]:
import bokeh
from bokeh.layouts import column
from bokeh.models import Legend, TabPanel

from bokeh.core.validation.warnings import EMPTY_LAYOUT, MISSING_RENDERERS

bokeh.core.validation.silence(EMPTY_LAYOUT, True)
bokeh.core.validation.silence(MISSING_RENDERERS, True)
from bokeh.plotting import figure


color_palette = [
    "#4E79A7",  # Blue
    "#F28E2B",  # Orange
    "#E15759",  # Red
    "#76B7B2",  # Teal
    "#59A14F",  # Green
    "#EDC948",  # Yellow
    "#B07AA1",  # Purple
    "#FF9DA7",  # Pink
    "#9C755F",  # Brown
    "#BAB0AC",  # Gray
    "#7C7C7C",  # Dark gray
    "#6B4C9A",  # Violet
    "#D55E00",  # Orange-red
    "#CC61B0",  # Magenta
    "#0072B2",  # Bright blue
    "#329262",  # Peacock green
    "#9E5B5A",  # Brick red
    "#636363",  # Medium gray
    "#CD9C00",  # Gold
    "#5D69B1",  # Medium blue
]


bokeh.core.validation.silence(EMPTY_LAYOUT, True)




def get_line_plot(
    ooklaUsers,
    title,
    source,
    earthquakes=False,
    subtitle=None,
    measure="conflictIndex",
    category="DT",
    event_date="event_date",
):
    p2 = figure(x_axis_type="datetime", width=800, height=500, toolbar_location="above")

    p2.add_layout(Legend(), "right")

    for id, adm2 in enumerate(ooklaUsers[category].unique()):
        df = ooklaUsers[ooklaUsers[category] == adm2][
            [event_date, measure]
        ].reset_index(drop=True)
        p2.line(
            df[event_date],
            df[measure],
            line_width=2,
            line_color=color_palette[id],
            legend_label=adm2,
        )

    p2.legend.click_policy = "hide"
    if subtitle is not None:
        p2.title = subtitle

    title_fig = figure(
        title=title,
        toolbar_location=None,
        width=800,
        height=40,
    )
    title_fig.title.align = "left"
    title_fig.title.text_font_size = "14pt"
    title_fig.border_fill_alpha = 0
    title_fig.outline_line_width = 0

    # with silence(MISSING_RENDERERS):
    sub_title = figure(
        title=source,
        toolbar_location=None,
        width=800,
        height=40,
    )
    sub_title.title.align = "left"
    sub_title.title.text_font_size = "10pt"
    sub_title.title.text_font_style = "normal"
    sub_title.border_fill_alpha = 0
    sub_title.outline_line_width = 0

    layout = column(title_fig, p2, sub_title)

    #     if earthquakes:
    #         p2.renderers.extend([
    #         Span(
    #             location=datetime(2020, 3, 15),
    #             dimension="height",
    #             line_color='#7C7C7C',
    #             line_width=2,
    #             line_dash=(4,4)
    #       ),
    #         Span(
    #             location=datetime(2021, 2, 15),
    #             dimension="height",
    #             line_color='#7C7C7C',
    #             line_width=2,
    #             line_dash=(4,4)
    #         ),
    #     ]
    # )

    return layout


In [9]:
acled_adm1 = get_acled_by_admin(lebanon_adm1, acled, columns=["admin1Name"])
acled_adm0 = get_acled_by_admin(lebanon_adm0, acled, columns=["admin0Name"])
acled_adm2 = get_acled_by_admin(
    lebanon_adm2, acled, columns=["admin0Name", "admin1Name", "admin2Name"]
)
acled_adm3 = get_acled_by_admin(
    lebanon_adm3,
    acled,
    columns=["admin0Name", "admin1Name", "admin2Name", "admin3Name"],
)

  .groupby([pd.Grouper(key=event_date, freq="M")] + columns)[fatalities]
  .groupby([pd.Grouper(key=event_date, freq="M")] + columns)[fatalities]
  .groupby([pd.Grouper(key=event_date, freq="M")] + columns)[fatalities]
  .groupby([pd.Grouper(key=event_date, freq="M")] + columns)[fatalities]


In [129]:
ucdp_adm0 = get_acled_by_admin(
    lebanon_adm0,
    ucdp,
    columns=["admin0Name"],
    event_date="date_start",
    fatalities="deaths_total",
)

In [15]:
# output_notebook()
# bokeh.core.validation.silence(EMPTY_LAYOUT, True)
# bokeh.core.validation.silence(MISSING_RENDERERS, True)

# tabs = []
# titles = [
#     "Number of conflict events at a national level",
#     "Number of fatalities at a national level",
# ]

# # df = get_acled_by_admin(myanmar_adm1, acled, columns = ['country'])
# for idx, type in enumerate(["nrEvents", "fatalities"]):
#     tabs.append(
#         Panel(
#             child=get_line_plot(
#                 ucdp_adm0,
#                 f"{titles[idx]}",
#                 "Source: UCDP",
#                 earthquakes=False,
#                 subtitle="",
#                 category="admin0Name",
#                 measure=type,
#                 event_date="date_start",
#             ),
#             title=type.capitalize(),
#         )
#     )

# tabs = Tabs(tabs=tabs, sizing_mode="scale_both")
# show(tabs, warn_on_missing_glyphs=False)

In [18]:
#output_file("bokeh_plot.html")
output_notebook()
bokeh.core.validation.silence(EMPTY_LAYOUT, True)
bokeh.core.validation.silence(MISSING_RENDERERS, True)

tabs = []
titles = [
    "Number of conflict events at a national level",
    "Number of fatalities at a national level",
]

for idx, type in enumerate(["nrEvents", "fatalities"]):
    tabs.append(
        TabPanel(
            child=get_line_plot(
                acled_adm0,
                f"{titles[idx]}",
                "Source: ACLED",
                earthquakes=False,
                subtitle="",
                category="admin0Name",
                measure=type,
            ),
            title=type.capitalize(),
        )
    )

tabs = Tabs(tabs=tabs, sizing_mode="scale_both")
show(tabs, warn_on_missing_glyphs=False)

In [22]:
output_notebook()
bokeh.core.validation.silence(EMPTY_LAYOUT, True)
bokeh.core.validation.silence(MISSING_RENDERERS, True)

tabs = []
titles = ["Number of conflict events", "Number of fatalities"]

for idx, type in enumerate(["nrEvents", "fatalities"]):
    tabs.append(
        TabPanel(
            child=get_line_plot(
                acled_adm1,
                f"{titles[idx]} by admin 1",
                "Source: ACLED",
                subtitle="",
                category="admin1Name",
                measure=type,
            ),
            title=type.capitalize(),
        )
    )

tabs = Tabs(tabs=tabs, sizing_mode="scale_both")
show(tabs, warn_on_missing_glyphs=False)

In [33]:
acled_events = get_acled_by_admin(lebanon_adm0, acled, columns=["event_type"])
acled_events_daily = get_acled_by_admin(lebanon_adm0, acled, columns=['event_type'], freq='D')

  .groupby([pd.Grouper(key=event_date, freq=freq)] + columns)[fatalities]


In [20]:
output_notebook()
bokeh.core.validation.silence(EMPTY_LAYOUT, True)
bokeh.core.validation.silence(MISSING_RENDERERS, True)

tabs = []
titles = ["Number of conflict events", "Number of fatalities"]

for idx, type in enumerate(["nrEvents", "fatalities"]):
    tabs.append(
        TabPanel(
            child=get_line_plot(
                acled_events,
                f"{titles[idx]} by event type",
                "Source: ACLED",
                subtitle="",
                category="event_type",
                measure=type,
            ),
            title=type.capitalize(),
        )
    )

tabs = Tabs(tabs=tabs, sizing_mode="scale_both")
show(tabs, warn_on_missing_glyphs=False)

## Updated Findings (As of August 2024)

* The number of events in the 'Explosions/Remote Violence' category have shot up in 2024. There were mote than 800 events of remote violence in the beginning of 2024. 
* The trend of high explosions has continued throughout 2024 although it has started to decline. This decline could also be because the most recent data has not yet been updated by ACLED. 
* The total number of events in 2024 are nearing the previous all-time-high in 2020. 


<!-- ## Previous Findings (As of October 2023)

* The UNCDP and ACLED datasets cover different time periods and types of events. ACELD data also includes protests, which are not included in the UNCDP data, which may explain why the total number of events to not appear comparable. Further analysis is needed to determine whether the datasets are more comparable when ACLED data are filtered for violent conflict. 
* The Beirut explosion occured in 2020, which is reflected by the protest spikes in the ACLED data.
* In 2021, two power stations shut down due to fuel shortage resulting in a powercut, which is reflected by protest and riot spikes in the ACLED data.
* Conflict-related fatalities were highest in 2017, resulting from "battle". 
* Mount Lebanon, which is near Beirut, also saw a higher number of conflicts compared to the other admin regions.
* The number of deaths from conflict were highest in the Baalbek el Hermel province, which could be because of clan violence that occured in the region before 2018. -->

In [22]:
acled_adm1[["event_date", "admin1Name", "fatalities", "nrEvents"]].to_csv(
    "../../data/conflict/acled_admin1.csv"
)
acled_adm2[["event_date", "admin1Name", "admin2Name", "fatalities", "nrEvents"]].to_csv(
    "../../data/conflict/acled_admin2.csv"
)
acled_events[["event_date", "event_type", "fatalities", "nrEvents"]].to_csv(
    "../../data/conflict/acled_events.csv"
)
acled_adm3[
    ["event_date", "admin1Name", "admin2Name", "admin3Name", "fatalities", "nrEvents"]
].to_csv("../../data/conflict/acled_admin3.csv")

In [35]:
acled_events_daily[acled_events_daily['event_type']!='Protests'].to_csv('../../data/conflict/acled_events_without_protests.csv')

In [25]:
acled[acled['event_date']>'2024-01-01'][['event_date', 'event_type', 'sub_event_type', 'actor1', 'actor2', 'notes', 'fatalities']].to_excel('../../data/conflict/acled_2024.xlsx')

## Limitations

ACLED is a crowdsourced dataset. Despite it being verified through local sources, it does not capture all the of the conflicts that occur in the region. 

## References

Novta, Natalija, and Evgenia Pugacheva. "The macroeconomic costs of conflict." Journal of Macroeconomics 68 (2021): 103286.