# Conflict in Lebanon

In [110]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import geopandas as gpd

from datetime import datetime

from acled_country_analysis import analysis
from acled_country_analysis import visuals

import bokeh
from bokeh.models import Tabs, Panel
from bokeh.core.validation.warnings import EMPTY_LAYOUT, MISSING_RENDERERS
from bokeh.plotting import show, output_notebook

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Assignment

Lebanon has seen multiple conflicts since before its [inception](https://www.cfr.org/global-conflict-tracker/conflict/political-instability-lebanon). The assignment is to observe conflict trends using at least two datasources. Following this, the team will research completed by Novta & Pugacheva, 2020, to estimate the cost of conflict. 

## Data

### Armed Conflict Location & Event Data

The Armed Conflict Location & Event Data Project (ACLED) is a disaggregated data collection, analysis, and crisis mapping project. ACLED collects information on the dates, actors, locations, fatalities, and types of all reported political violence and protest events around the world. Access to this data is via a contract between the World bank and ACLED and can be extracted by any World bank employee upon registering for an API key. 


### UCDP Conflict Data

The team used a secondary conflict dataset based on [UCDP Conflict Analysis](https://www.pcr.uu.se/research/ucdp/methodology/), which primarily uses news data to generate geospatially-located conflict statistics. These are available on [SharePoint](https://worldbankgroup.sharepoint.com.mcas.ms/teams/DevelopmentDataPartnershipCommunity-WBGroup/Shared%20Documents/Forms/AllItems.aspx?csf=1&web=1&e=Yvwh8r&cid=fccdf23e%2D94d5%2D48bf%2Db75d%2D0af291138bde&FolderCTID=0x012000CFAB9FF0F938A64EBB297E7E16BDFCFD&id=%2Fteams%2FDevelopmentDataPartnershipCommunity%2DWBGroup%2FShared%20Documents%2FProjects%2FData%20Lab%2FLebanon%20Economic%20Analytics%2FData%2Fconflicts&viewid=80cdadb3%2D8bb3%2D47ae%2D8b18%2Dc1dd89c373c5). 


## Methodology and Implementation

ACLED and UCDP data were analysed and aggregated to admin levels gathered from [HdX](https://data.humdata.org/). The number of fatalities and conflicts were then shown by different event types and different admin regions. The results from both the sources was compared.

Write the raw data to a file and read from it. This is to avoid running the API again in case of having to restart kernel/continue analysis

In [15]:
# acled.to_csv('../../data/acled/acled_lebanon_2016_2023.csv')
acled = pd.read_csv("../../data/acled/2012-01-01-2023-08-21-Lebanon.csv")

In [126]:
ucdp = pd.read_csv("../../data/ucdp_conflict/gedevents-2023-08-23.csv")

In [67]:
import re


def extract_second_number(input_string):
    # Use regular expressions to find all numbers in the input string
    numbers = re.findall(r"[-+]?\d*\.\d+|\d+", input_string)

    # Check if there is at least a second number
    if len(numbers) >= 2:
        # Return the second number as a float
        return float(numbers[1])
    else:
        # Return None if there is no second number
        return None

In [127]:
ucdp["longitude"] = ucdp["longitude"].apply(lambda x: extract_second_number(x))
ucdp.rename(columns={"latitude": "longitude", "longitude": "latitude"}, inplace=True)

ucdp["deaths_total"] = (
    ucdp[["deaths_a", "deaths_b", "deaths_civilians", "deaths_unknown"]]
    .fillna(0)
    .sum(axis=1)
)

ucdp["date_start"] = ucdp["date_start"].apply(
    lambda x: datetime.strptime(x, "%m/%d/%Y %H:%M:%S")
)

In [56]:
lebanon_adm0 = gpd.read_file(
    "../../data/shapefiles/lbn_adm_cdr_20200810/lbn_admbnda_adm0_cdr_20200810.shp"
)
lebanon_adm1 = gpd.read_file(
    "../../data/shapefiles/lbn_adm_cdr_20200810/lbn_admbnda_adm1_cdr_20200810.shp"
)
lebanon_adm2 = gpd.read_file(
    "../../data/shapefiles/lbn_adm_cdr_20200810/lbn_admbnda_adm2_cdr_20200810.shp"
)
lebanon_adm3 = gpd.read_file(
    "../../data/shapefiles/lbn_adm_cdr_20200810/lbn_admbnda_adm3_cdr_20200810.shp"
)

In [17]:
# acled = gpd.GeoDataFrame(acled, geometry=gpd.points_from_xy(acled['longitude'], acled['latitude']))
acled["event_date"] = acled["event_date"].apply(
    lambda x: datetime.strptime(x, "%d %B %Y")
)

In [59]:
acled_adm1 = analysis.get_acled_by_admin(lebanon_adm1, acled, columns=["admin1Name"])
acled_adm0 = analysis.get_acled_by_admin(lebanon_adm0, acled, columns=["admin0Name"])
acled_adm2 = analysis.get_acled_by_admin(
    lebanon_adm2, acled, columns=["admin0Name", "admin1Name", "admin2Name"]
)
acled_adm3 = analysis.get_acled_by_admin(
    lebanon_adm3,
    acled,
    columns=["admin0Name", "admin1Name", "admin2Name", "admin3Name"],
)

  result = getattr(ufunc, method)(*inputs, **kwargs)


In [129]:
ucdp_adm0 = analysis.get_acled_by_admin(
    lebanon_adm0,
    ucdp,
    columns=["admin0Name"],
    event_date="date_start",
    fatalities="deaths_total",
)

In [132]:
output_notebook()
bokeh.core.validation.silence(EMPTY_LAYOUT, True)
bokeh.core.validation.silence(MISSING_RENDERERS, True)

tabs = []
titles = [
    "Number of conflict events at a national level",
    "Number of fatalities at a national level",
]

# df = get_acled_by_admin(myanmar_adm1, acled, columns = ['country'])
for idx, type in enumerate(["nrEvents", "fatalities"]):
    tabs.append(
        Panel(
            child=visuals.get_line_plot(
                ucdp_adm0,
                f"{titles[idx]}",
                "Source: UCDP",
                earthquakes=False,
                subtitle="",
                category="admin0Name",
                measure=type,
                event_date="date_start",
            ),
            title=type.capitalize(),
        )
    )

tabs = Tabs(tabs=tabs, sizing_mode="scale_both")
show(tabs, warn_on_missing_glyphs=False)

In [32]:
output_notebook()
bokeh.core.validation.silence(EMPTY_LAYOUT, True)
bokeh.core.validation.silence(MISSING_RENDERERS, True)

tabs = []
titles = [
    "Number of conflict events at a national level",
    "Number of fatalities at a national level",
]

# df = get_acled_by_admin(myanmar_adm1, acled, columns = ['country'])
for idx, type in enumerate(["nrEvents", "fatalities"]):
    tabs.append(
        Panel(
            child=visuals.get_line_plot(
                acled_adm0,
                f"{titles[idx]}",
                "Source: ACLED",
                earthquakes=False,
                subtitle="",
                category="admin0Name",
                measure=type,
            ),
            title=type.capitalize(),
        )
    )

tabs = Tabs(tabs=tabs, sizing_mode="scale_both")
show(tabs, warn_on_missing_glyphs=False)

In [35]:
output_notebook()
bokeh.core.validation.silence(EMPTY_LAYOUT, True)
bokeh.core.validation.silence(MISSING_RENDERERS, True)

tabs = []
titles = ["Number of conflict events", "Number of fatalities"]

for idx, type in enumerate(["nrEvents", "fatalities"]):
    tabs.append(
        Panel(
            child=visuals.get_line_plot(
                acled_adm1,
                f"{titles[idx]} by admin 1",
                "Source: ACLED",
                subtitle="",
                category="admin1Name",
                measure=type,
            ),
            title=type.capitalize(),
        )
    )

tabs = Tabs(tabs=tabs, sizing_mode="scale_both")
show(tabs, warn_on_missing_glyphs=False)

In [21]:
acled_events = analysis.get_acled_by_admin(lebanon_adm0, acled, columns=["event_type"])

  result = getattr(ufunc, method)(*inputs, **kwargs)


In [36]:
output_notebook()
bokeh.core.validation.silence(EMPTY_LAYOUT, True)
bokeh.core.validation.silence(MISSING_RENDERERS, True)

tabs = []
titles = ["Number of conflict events", "Number of fatalities"]

for idx, type in enumerate(["nrEvents", "fatalities"]):
    tabs.append(
        Panel(
            child=visuals.get_line_plot(
                acled_events,
                f"{titles[idx]} by event type",
                "Source: ACLED",
                subtitle="",
                category="event_type",
                measure=type,
            ),
            title=type.capitalize(),
        )
    )

tabs = Tabs(tabs=tabs, sizing_mode="scale_both")
show(tabs, warn_on_missing_glyphs=False)

## Findings 

* The UNCDP and ACLED datasets cover different time periods and types of events. ACELD data also includes protests, which are not included in the UNCDP data, which may explain why the total number of events to not appear comparable. Further analysis is needed to determine whether the datasets are more comparable when ACLED data are filtered for violent conflict. 
* The Beirut explosion occured in 2020, which is reflected by the protest spikes in the ACLED data.
* In 2021, two power stations shut down due to fuel shortage resulting in a powercut, which is reflected by protest and riot spikes in the ACLED data.
* Conflict-related fatalities were highest in 2017, resulting from "battle". 
* Mount Lebanon, which is near Beirut, also saw a higher number of conflicts compared to the other admin regions.
* The number of deaths from conflict were highest in the Baalbek el Hermel province, which could be because of clan violence that occured in the region before 2018.

In [60]:
acled_adm1[["event_date", "admin1Name", "fatalities", "nrEvents"]].to_csv(
    "../../data/acled/acled_admin1.csv"
)
acled_adm2[["event_date", "admin1Name", "admin2Name", "fatalities", "nrEvents"]].to_csv(
    "../../data/acled/acled_admin2.csv"
)
acled_events[["event_date", "event_type", "fatalities", "nrEvents"]].to_csv(
    "../../data/acled/acled_events.csv"
)
acled_adm3[
    ["event_date", "admin1Name", "admin2Name", "admin3Name", "fatalities", "nrEvents"]
].to_csv("../../data/acled/acled_admin3.csv")

## Limitations

ACLED is a crowdsourced dataset. Despite it being verified through local sources, it does not capture all the of the conflicts that occur in the region. 

## References

Novta, Natalija, and Evgenia Pugacheva. "The macroeconomic costs of conflict." Journal of Macroeconomics 68 (2021): 103286.