# Mapping of human-wildlife conflict in India
## This script is for visual representation of data related to human-wildlife conflict in India. The conflict data has been sourced from two sources: journal articles sourced from PubMed Central (https://www.ncbi.nlm.nih.gov/pmc/) and GoogleNews portal. The process has been described below.

This script take resources from and save outputs to following Google Drive folder:
https://drive.google.com/drive/u/0/folders/1VrRL4Nc3AYb8neCeYJ_99jQ3nrC_p8vw

To run the script on local machine, download the Google Drive folder (https://drive.google.com/drive/u/0/folders/1VrRL4Nc3AYb8neCeYJ_99jQ3nrC_p8vw) and change the path of folder path and outpur directory in the script to the location of downloaded folder.

In [None]:
# creating a virtual environment for the project.

!pip install virtualenv
!virtualenv myenv
!source myenv/bin/activate

# Installing the required libraries

!pip install pandas numpy geopandas requests datetime Biopython newspaper3k serpapi google-search-results spacy geopy folium

# Importing libraries

import pandas as pd
import numpy as np
import geopandas as gpd
import requests
import datetime
from google.colab import drive
from datetime import timedelta
import csv
import json
from Bio import Entrez
import newspaper
from serpapi import GoogleSearch
import spacy
from spacy.tokens import DocBin
from spacy.training import Example
from tqdm import tqdm
from geopy.geocoders import Nominatim
import folium
from folium.plugins import MarkerCluster

# Setting up Google Drive folder to store the data
# code4Nature = https://drive.google.com/drive/u/0/folders/1VrRL4Nc3AYb8neCeYJ_99jQ3nrC_p8vw

# mount Google Drive
drive.mount('/content/drive')
folder_path = "/content/drive/MyDrive/code4Nature"  # change the path if you are running on your local machine
output_dir = "/content/drive/My Drive/code4Nature"  # change the path if you are running on your local machine

# Define Geocoder
geolocator = Nominatim(user_agent="my-geocoding-app")

# Downloading spacy Large English model and defining a nlp object
# source:  https://spacy.io/ , https://spacy.io/usage/models

! python -m spacy download en_core_web_lg
nlp = spacy.load("en_core_web_lg") # inbuilt NER model for location

nlp_hwc = spacy.load(output_dir) # custom NER model for human-wildlife conflict


### Content search starts here
# Searching PubMed database (https://www.ncbi.nlm.nih.gov/pmc/) to get the scholarly articles
# with keyword 'human-wildlife conflict india' in 'Title/Abstract'.
# Documentation: https://www.ncbi.nlm.nih.gov/books/NBK25500/

def search(query):
    Entrez.email = 'shaurabh.anand@apu.edu.in' # Individual account only for testing purpose. Not for production and deployement.
    handle = Entrez.esearch(db='pubmed',
                            sort='relevance',
                            retmax='500',
                            retmode='xml',
                            term=query,
                            field='Title/Abstract')
    results = Entrez.read(handle)
    return results

def fetch_details(id_list):
    ids = ','.join(id_list)
    Entrez.email = 'shaurabh.anand@apu.edu.in' # Individual account only for testing purpose. Not for production and deployement.
    handle = Entrez.efetch(db='pubmed',
                           retmode='xml',
                           id=ids)
    results = Entrez.read(handle)
    return results

def extract_data(papers):
    extracted_data = []
    for paper in papers['PubmedArticle']:
        title = paper['MedlineCitation']['Article']['ArticleTitle']
        abstract = paper['MedlineCitation']['Article'].get('Abstract', {}).get('AbstractText', '')

        # Access the list containing the ELocationID
        elocation_list = paper['MedlineCitation']['Article'].get('ELocationID', [])

        # Find and return DOI ID (assuming only one DOI)
        article_id = 'N/A'
        for element in elocation_list:
            if element.attributes['EIdType'] == 'doi':
                article_id = element
                break
        base_url = 'https://doi.org/'  # PubMed base URL
        link = f"{base_url}{article_id}" if article_id != 'N/A' else 'N/A'

        extracted_data.append({'Title': title, 'Abstract': abstract, 'Link': link})
    return extracted_data

def write_to_csv(papers, csv_file):
    extracted_data = extract_data(papers)
    fieldnames = ['Title', 'Abstract', 'Link']

    with open(csv_file, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(extracted_data)

if __name__ == '__main__':
    # Perform search and fetch details
    keywords = ['human-wildlife conflict India', 'wildlife crop loss India', 'wildlife crop damage India', 'livestock depredation India', 'wildlife conflict India']
    id_list = []

    for keyword in keywords:
        result = search(keyword)
        id_list.extend(result['IdList'])

    papers = fetch_details(id_list)
    write_to_csv(papers, 'hwc_pubmed_articles.csv')

pubmed_paper = pd.read_csv("hwc_pubmed_articles.csv")

# Load spaCy English model
nlp = spacy.load("en_core_web_lg")

# Define Geocoder
geolocator = Nominatim(user_agent="my-geocoding-app", timeout=10)

# Define search and fetch functions for PubMed
def nlp_location(abstract, nlp):
    doc = nlp(abstract)
    locations = [ent.text for ent in doc.ents if ent.label_ == "GPE"] # Extract location based on spacy 'Named Entity Recognition (NER) model'
    return locations

pubmed_paper["locations"] = pubmed_paper["Abstract"].apply(lambda x: nlp_location(x, nlp))

# Defining a function to get species data from the abstract
def nlp_species(abstract, nlp_hwc):
    doc = nlp_hwc(abstract)
    species_info = [ent.text for ent in doc.ents if ent.label_ in ("FAUNA_ENGLISH_NAME", "FAUNA_LATIN_NAME")] # Extract both English and Latin name of animal species based on custom NER model
    return species_info

pubmed_paper["species"] = pubmed_paper["Abstract"].apply(lambda x: nlp_species(x, nlp_hwc))

# Defining a function to get conflict info from the abstract
def nlp_conflict(abstract, nlp_hwc):
    doc = nlp_hwc(abstract)
    conflict_info = [ent.text for ent in doc.ents if ent.label_ == "CONFLICT_TYPE"]
    return conflict_info

pubmed_paper["conflict_info"] = pubmed_paper["Abstract"].apply(lambda x: nlp_conflict(x, nlp_hwc))

# Function to geocode a single location
def geocode_location(locations):
    geocoded = []
    for location_text in locations:
        try:
            location = geolocator.geocode(location_text)
            if location:
                geocoded.append({"latitude": location.latitude, "longitude": location.longitude})
            else:
                geocoded.append({"latitude": None, "longitude": None})
        except (geopy.exc.GeocoderTimedOut, geopy.exc.GeocoderServiceError) as e:
            print(f"Error geocoding location: {location_text} ({type(e)})")
            geocoded.append({"latitude": None, "longitude": None})
    return geocoded

# Apply geocoding to the locations column
pubmed_paper["geocoded"] = pubmed_paper["locations"].apply(geocode_location)

# Saving the final file
filename = "pubmed_df.csv"
pubmed_paper.to_csv(f"{folder_path}/{filename}", index=False)

# Searching GoogleNews using serpapi. Only for testing
# Documentation: https://serpapi.com/google-news-api

params = {
    "engine": "google_news",
    "q": "human-wildlife-conflict india",
    "gl": "in",
    "api_key": "bedfc950b4dd950b98eb72d44c22e853d16a7d63aaf64c7838f79dde1dbcbc0d" # Private Key, should not be shared. Supports only 100 searches per month
}

search = GoogleSearch(params)
results = search.get_dict()
news_results = results["news_results"]

# Convert to dataframe
news_df = pd.DataFrame(news_results)

def article_text(row):
    url = row["link"]
    try:
        article_obj = newspaper.Article(url)
        article_obj.download()
        article_obj.parse()
        return article_obj.text
    except (newspaper.ArticleException, ConnectionError, TimeoutError) as e:
        print(f"Error processing article: {url} ({type(e)})")
        return ""

news_df["article_text"] = news_df.apply(article_text, axis=1)

# Defining a function to extract locations from the article text
def nlp_location_article(article_text, nlp):
    doc = nlp(article_text)
    locations = [ent.text for ent in doc.ents if ent.label_ == "GPE"] # Extract location based on spacy 'Named Entity Recognition (NER) model'
    return locations

news_df["locations"] = news_df["article_text"].apply(lambda x: nlp_location_article(x, nlp))

# Defining a function to get species data from the article text
def nlp_species_article(article_text, nlp_hwc):
    doc = nlp_hwc(article_text)
    species_info = [ent.text for ent in doc.ents if ent.label_ in ("FAUNA_ENGLISH_NAME", "FAUNA_LATIN_NAME")] # Extract both English and Latin name of animal species based on custom NER model
    return species_info

news_df["species"] = news_df["article_text"].apply(lambda x: nlp_species_article(x, nlp_hwc))

# Defining a function to get conflict info from the article text
def nlp_conflict_article(article_text, nlp_hwc):
    doc = nlp_hwc(article_text)
    conflict_info = [ent.text for ent in doc.ents if ent.label_ == "CONFLICT_TYPE"]
    return conflict_info

news_df["conflict_info"] = news_df["article_text"].apply(lambda x: nlp_conflict_article(x, nlp_hwc))

# Apply geocoding to the locations column
news_df["geocoded"] = news_df["locations"].apply(geocode_location)

# Saving the final file
filename = "news_df.csv"
news_df.to_csv(f"{folder_path}/{filename}", index=False)

### Mapping and visualization block starts here

# Define the bounding box for India. Approximate estimate. Not for official publications
min_lat, max_lat = 6.0, 37.0
min_lon, max_lon = 68.0, 97.0

# Function to check if a point is within the bounding box of India
def is_within_india(latitude, longitude):
    return min_lat <= latitude <= max_lat and min_lon <= longitude <= max_lon

# Create the map centered on India
m = folium.Map(location=[20.5937, 78.9629], zoom_start=5)

# Add markers for PubMed data
for idx, row in pubmed_paper.iterrows():
    for loc in row["geocoded"]:
        if loc["latitude"] and loc["longitude"]:
            lat, lon = loc["latitude"], loc["longitude"]
            if is_within_india(lat, lon):
                folium.Marker(
                    location=[lat, lon],
                    popup=f"<b>Title:</b> {row['Title']}<br><b>Conflict Type:</b> {', '.join(row['conflict_info'])}<br><b>Species:</b> {', '.join(row['species'])}<br><a href='{row['Link']}'>Link</a>",
                    tooltip=row['Title'],
                    icon=folium.Icon(color='red')
                ).add_to(m)

# Add markers for News data
for idx, row in news_df.iterrows():
    for loc in row["geocoded"]:
        if loc["latitude"] and loc["longitude"]:
            lat, lon = loc["latitude"], loc["longitude"]
            if is_within_india(lat, lon):
                folium.Marker(
                    location=[lat, lon],
                    popup=f"<b>Title:</b> {row['title']}<br><b>Conflict Type:</b> {', '.join(row['conflict_info'])}<br><b>Species:</b> {', '.join(row['species'])}<br><a href='{row['link']}'>Link</a>",
                    tooltip=row['title'],
                    icon=folium.Icon(color='blue')
                ).add_to(m)
# displays the map
m

### Mapping and visualization block ends here

