# IPC Alerts of Acute Food Insecurity
Capture current IPC alerts as displayed on this ipcinfo.org [page](http://www.ipcinfo.org/ipcinfo-website/resources/alerts-archive/en/).

In [3]:
# General imports
# helpers
from pathlib import Path
import urllib.request
from datetime import datetime
# import shutil
# import tqdm
import copy

# data processing
import pandas as pd
import requests
from urllib.request import Request, urlopen

from bs4 import BeautifulSoup

## Finding all alerts, then finding all related documents

In [125]:
def get_all_alerts_hyperlinks(url = "http://www.ipcinfo.org/ipcinfo-website/resources/alerts-archive/en/"):
    # Find all hyperlinks for alerts on the page,
    headers = {"User-Agent": "Mozilla/5.0"}
    req = Request(url=url, headers=headers)
    with urllib.request.urlopen(req) as page:

        # Parse .xml file
        xml = BeautifulSoup(page.read(), "lxml")

        # Get all dataset info
        links = []
        for link in soup.findAll("a"):
            title = link.get("title")
            url = link.get("href")
            if (
                (title is not None)
                and (title != "image thumbnail")
                and (url.startswith("/ipcinfo-website/alerts-archive/issue-"))
            ):
                url = "http://www.ipcinfo.org" + url
                links.append({"title": title, "url": url})
    # TODO: add the alerts paragraph to the metadata
    return links

In [128]:
def get_pdf_from_url(url, output_path=Path("../../../data/sources/document.pdf")):
    """Download a pdf from a given url.
    :param url: web url from where to download the pdf
    :param output_path: a pathlib.Path object to which to write the downloaded pdf
    """
    # create the folder if not existing
    output_path.parent.mkdir(parents=True, exist_ok=True)
    # download
    r = requests.get(url)
    with open(output_path, "wb") as outfile:
        outfile.write(r.content)

def get_all_documents_from_alerts_references(links):
    documents = []
    for link in links:
        headers = {"User-Agent": "Mozilla/5.0"}
        req = Request(url=link["url"], headers=headers)

        with urllib.request.urlopen(req) as page:
            xml = BeautifulSoup(page.read(), "lxml")
            alert_page_title = xml.find("div", class_="csc-header csc-header-n2").text

            # TODO : Using spacy entity recognition, find the country/region + map it to ISO code

            for new_link in xml.find_all("a"):
                href = new_link.get("href")
                if href.endswith(".pdf"):
                    if href.startswith("/"):
                        href = "http://www.ipcinfo.org" + href
                    # For each download, based on the pdf's name (report/annexes/snapshots, etc...), indicate the type of file in a metadata file
                    document_type = "unknown_type"
                    pdf_filename = href.split("/")[-1]
                    for potential_document_type in ["snapshot", "report", "annexes"]:
                        if potential_document_type in pdf_filename.lower():
                            document_type = potential_document_type
                    output_path = output_folder_path / pdf_filename
                    link.update(
                        {
                            "alert_page_title": alert_page_title,
                            "pdf_url": href,
                            "document_type": document_type,
                            "pdf_local_path": str(output_path),
                        }
                    )
                    # Download the items
                    get_pdf_from_url(href, output_path=output_path)
                    documents.append(copy.copy(link))

    # save the resulting csv of metadata
    df = pd.DataFrame.from_dict(documents)
    df.to_csv(output_metadata_path, sep=";", index=False)
    return df

## Scrape them all

In [129]:
ipc_alerts_hyperlink = (
    "http://www.ipcinfo.org/ipcinfo-website/resources/alerts-archive/en/"
)
output_folder_path = Path("../../../data/clean/ipc_alerts_acute_food_insecurity/")
output_metadata_path = output_folder_path / "ipc_alerts_metadata.csv"

# Find all hyperlinks for alerts on the page,
links = get_all_alerts_hyperlinks(url=ipc_alerts_hyperlink)
get_all_documents_from_alerts_references(links)

Unnamed: 0,title,url,alert_page_title,pdf_url,document_type,pdf_local_path
0,Over 13 million people in Afghanistan likely t...,http://www.ipcinfo.org/ipcinfo-website/alerts-...,Afghanistan: Over 11 million people acutely fo...,http://www.ipcinfo.org/fileadmin/user_upload/i...,snapshot,..\..\..\data\clean\ipc_alerts_acute_food_inse...
1,Over 13 million people in Afghanistan likely t...,http://www.ipcinfo.org/ipcinfo-website/alerts-...,Afghanistan: Over 11 million people acutely fo...,http://www.ipcinfo.org/fileadmin/user_upload/i...,report,..\..\..\data\clean\ipc_alerts_acute_food_inse...
2,Over 13 million people in Afghanistan likely t...,http://www.ipcinfo.org/ipcinfo-website/alerts-...,Afghanistan: Over 11 million people acutely fo...,http://www.ipcinfo.org/fileadmin/user_upload/i...,report,..\..\..\data\clean\ipc_alerts_acute_food_inse...
3,Over 13 million people in Afghanistan likely t...,http://www.ipcinfo.org/ipcinfo-website/alerts-...,Afghanistan: Over 11 million people acutely fo...,http://www.ipcinfo.org/fileadmin/user_upload/i...,snapshot,..\..\..\data\clean\ipc_alerts_acute_food_inse...
4,Over 13 million people in Afghanistan likely t...,http://www.ipcinfo.org/ipcinfo-website/alerts-...,Afghanistan: Over 11 million people acutely fo...,http://www.ipcinfo.org/fileadmin/user_upload/i...,unknown_type,..\..\..\data\clean\ipc_alerts_acute_food_inse...
5,Over 21 million people in DR Congo facing high...,http://www.ipcinfo.org/ipcinfo-website/alerts-...,Over 21 million people in DR Congo facing high...,http://www.ipcinfo.org/fileadmin/user_upload/i...,report,..\..\..\data\clean\ipc_alerts_acute_food_inse...
6,Over 21 million people in DR Congo facing high...,http://www.ipcinfo.org/ipcinfo-website/alerts-...,Over 21 million people in DR Congo facing high...,http://www.ipcinfo.org/fileadmin/user_upload/i...,annexes,..\..\..\data\clean\ipc_alerts_acute_food_inse...
7,Over 21 million people in DR Congo facing high...,http://www.ipcinfo.org/ipcinfo-website/alerts-...,Over 21 million people in DR Congo facing high...,http://www.ipcinfo.org/fileadmin/user_upload/i...,snapshot,..\..\..\data\clean\ipc_alerts_acute_food_inse...
8,Over 21 million people in DR Congo facing high...,http://www.ipcinfo.org/ipcinfo-website/alerts-...,Over 21 million people in DR Congo facing high...,http://www.ipcinfo.org/fileadmin/user_upload/i...,snapshot,..\..\..\data\clean\ipc_alerts_acute_food_inse...
9,Ethiopia: 8.5 million people in urgent need of...,http://www.ipcinfo.org/ipcinfo-website/alerts-...,Over 8.5 million people in seven regions of Et...,http://www.ipcinfo.org/fileadmin/user_upload/i...,report,..\..\..\data\clean\ipc_alerts_acute_food_inse...


In [131]:
df.head()

Unnamed: 0,title,url,pdf_url,document_type,alert_page_title,pdf_local_path
0,Over 13 million people in Afghanistan likely t...,http://www.ipcinfo.org/ipcinfo-website/alerts-...,http://www.ipcinfo.org/fileadmin/user_upload/i...,snapshot,Afghanistan: Over 11 million people acutely fo...,..\..\..\data\clean\ipc_alerts_acute_food_inse...
1,Over 13 million people in Afghanistan likely t...,http://www.ipcinfo.org/ipcinfo-website/alerts-...,http://www.ipcinfo.org/fileadmin/user_upload/i...,report,Afghanistan: Over 11 million people acutely fo...,..\..\..\data\clean\ipc_alerts_acute_food_inse...
2,Over 13 million people in Afghanistan likely t...,http://www.ipcinfo.org/ipcinfo-website/alerts-...,http://www.ipcinfo.org/fileadmin/user_upload/i...,report,Afghanistan: Over 11 million people acutely fo...,..\..\..\data\clean\ipc_alerts_acute_food_inse...
3,Over 13 million people in Afghanistan likely t...,http://www.ipcinfo.org/ipcinfo-website/alerts-...,http://www.ipcinfo.org/fileadmin/user_upload/i...,snapshot,Afghanistan: Over 11 million people acutely fo...,..\..\..\data\clean\ipc_alerts_acute_food_inse...
4,Over 13 million people in Afghanistan likely t...,http://www.ipcinfo.org/ipcinfo-website/alerts-...,http://www.ipcinfo.org/fileadmin/user_upload/i...,unknown_type,Afghanistan: Over 11 million people acutely fo...,..\..\..\data\clean\ipc_alerts_acute_food_inse...
