In [2]:
import sys
sys.path.append('/home/antoine/Documents/GitHub/Link-Encyclo')
sys.path.append('/home/antoine/Documents/GitHub/datas')
from EncycloObject import Article, Book
from WikidataTools import WikidataObject as wdo
import os
import folium
import pickle
import requests

In [3]:
with open('sample_france.pkl', 'rb') as f:
    sample_france = pickle.load(f)

**Input**
- `sample` : a `Book` instance of articles, each having the attribute `preds`
- `sample_name` : a string such as `champagne_france`: the name of the sample we are working on
- `id_preds` the Key to look or inside the predictions, so we can reach them with `article.preds['id_preds']`
- optional: `bbox` : the bounding box to plot
```python
folium.Rectangle(
bounds=[[1, 2], [8, 9]],
color='green',
fill=False,
)
```

Structure de preds :
- Liste de dict 
- avec comme keys : 'rank' (1,2, 3...), 'pred_id' (qids), 'pred_coords'(tuple), 'acc@10' (booléen)



**Output** 
- html maps are saved under `output/maps_{id_preds}/sample_name`
- `final_map`: a Folium map

**problèmes**
- si je veux plotter (champagne, france) ET (suabe, allemagne) --> 2 commus

In [4]:
def retrieve_infos(qid,
                   tupling = True,
                   solr_instance = 'http://localhost:8983/solr/frenchtapioca5/query'):
    """Retrieve the coordinates of a qid
    Given the Solr collection available @ solr_instance"""
    params = {
        'q': 'id:{}'.format(qid),
        'wt': 'json',
        'fl': 'label,coordinates,desc'
    }
    try:
        r = requests.post(solr_instance, data=params)
        r.raise_for_status()
        response_json = r.json()
        if len(response_json['response']['docs']) == 0:
            return None, None, None
        coords = response_json['response']['docs'][0].get('coordinates')
        label = response_json['response']['docs'][0].get('label')
        desc = response_json['response']['docs'][0].get('desc')
        if tupling:
            return tuple(map(float, coords[0].split(','))), label, desc
        else:
            return coords, label, desc
    except (requests.RequestException, KeyError, IndexError) as e:
        return None

retrieve_infos(qid = 'Q142', tupling=True)

In [4]:
def preds2map(sample, sample_name, expe_id, bbox=None):
    """mapping"""
    
    m = folium.Map(location=[48.8566, 2.3522], zoom_start=5)
    
    for article in sample:
        
        # are there any predictions ?
        predictions = article.preds[expe_id]
        if not predictions:
            continue

        # get the best prediction
        best_candidate = predictions.pop(0)
        if best_candidate['pred_id'] == 'Q0': # Entity linking failed
            continue
        ressource = wdo(best_candidate.get('pred_id'))
        label = ressource._get_label(lang='en')
        desc = ressource._get_description(lang='en')

        # we have gold coordinates 
        if hasattr(article, 'gold_nugues'):
            gap = best_candidate['acc@10']
            gold = wdo(article.gold_nugues)
            gold_label = gold._get_label(lang='en')
            gold_desc = gold._get_description(lang='en')
            marker_color = 'green' if gap else 'red'
            html = f"""
            <html>
                <body>
                    <a href="{article.artfl}" target="_blank"><strong>{article.hash}</strong></a>
                    <p style="color: #555;">{article.text[:150]}...</p>
                    <p><strong>GOLD :</strong> <a href="{gold.link}" target="_blank">{gold_label}</a>, {gold_desc}, {gold.uri}</p>
                    <p><strong>PRED n°{best_candidate['rank']} : <a href="{ressource.link}" target="_blank">{label}</a>, {desc}, {ressource.uri}</p>
                </body>
            </html>
            """
            # delete wdo objects
            del gold
            del ressource

        # we don't have gold coordinates <p><strong> Best prediction :</strong> <a href="{wdo(best_candidate['pred_id']).link}" target="_blank">{best_candidate['pred_id']}</a></p>
        else:    
            marker_color = 'beige'            
            html = f"""
            <!DOCTYPE html>
            <html>
            <head>
            <title>Marker Popup</title>
            </head>
            <body>
            <a href="{article.artfl}" target="_blank"><strong>{article.hash}</strong></a>
            <p style="color: #555;">{article.text[:150]}...</p>
            <p> <strong>PRED n°{best_candidate['rank']} : <a href="{ressource.link}" target="_blank">{label}</a>, {desc}, {ressource.uri}</p>
            <form action="http://localhost:5000/save_annotation" method="post" target="hidden_iframe">
                <label><input type="checkbox" name="correct" value="correct"> Correct</label><br>
                <label><input type="checkbox" name="wrong" value="wrong"> Wrong</label><br>
                <label><input type="checkbox" name="abberant" value="abberant"> Abberant</label><br>
                <label for="annotation">Annotation suggérée : </label><br>
                <textarea id="annotation" name="annotation" rows="1" cols="30"></textarea><br>
                <input type="hidden" id="hash" name="hash" value="{article.hash}">
                <label for="note">Remarque : </label><br>
                <textarea id="note" name="note" rows="1" cols="30"></textarea><br>
                <input type="submit" value="Sauvegarder">
            <p><strong> Other predictions :</strong> </p>
            """

            for pred in predictions :
                ressource = wdo(pred['pred_id'])
                label = ressource._get_label(lang='en')
                desc = ressource._get_description(lang='en')
                html += f"""
                <p>n°{pred['rank']} : <a href="{ressource.link}" target="_blank">{label}</a>, {desc}, {ressource.uri}</p>
                """

            html += """
            </form>
            <iframe name="hidden_iframe" style="display:none;"></iframe>
            </body>
            </html>
            """

            del ressource

        # Add the marker to the map
        folium.Marker(
            location=best_candidate['pred_coords'],
            popup=folium.Popup(html, max_width=500),
            icon=folium.Icon(color=marker_color)
        ).add_to(m)

        # add the bbox to the map
        if bbox:
            bbox.add_to(m)

    # save the map and return it
    # if no foler output/maps_{preds_id} exists, create it
    if not os.path.exists(f'outputs/maps_{expe_id}'):
        os.makedirs(f'outputs/maps_{expe_id}')
    m.save(f'outputs/maps_{expe_id}/{sample_name}.html')
    return m


In [5]:
m = preds2map(sample = sample_france[500:550], sample_name = 'test_france', expe_id = 'test_france')

In [5]:
import json
print(json.dumps(sample_france[0].preds, indent=2))

{
  "test_france": [
    {
      "rank": 0,
      "pred_id": "Q3390498",
      "pred_coords": [
        48.8458,
        2.37243
      ],
      "acc@10": false,
      "label": [
        "place des Combattants-en-Afrique-du-Nord"
      ],
      "desc": "place de Paris, France"
    },
    {
      "rank": 1,
      "pred_id": "Q106643503",
      "pred_coords": [
        48.880970675,
        2.42174555
      ],
      "acc@10": false,
      "label": [
        "avenue des Combattants-d'Afrique-du-Nord"
      ],
      "desc": "avenue des Lilas, en France"
    },
    {
      "rank": 2,
      "pred_id": "Q110955946",
      "pred_coords": [
        43.28810336792453,
        5.412446551886793
      ],
      "acc@10": false,
      "label": [
        "chemin de l'Arm\u00e9e d'Afrique"
      ],
      "desc": "chemin de Marseille, en France"
    },
    {
      "rank": 3,
      "pred_id": "Q113448661",
      "pred_coords": [
        48.964191957142866,
        2.524119314285714
      ],
      "acc@10

In [6]:

class Mention(object):
    """
    **adapted from
    Delpeuch, A. (2019). Opentapioca: Lightweight entity linking for wikidata. arXiv preprint arXiv:1904.09131.**
    
    A mention is a phrase which can be associated with various candidate items (tags).

    """

    def __init__(self, hash, gold, link, context, tags,
                 start = None, end = None ):
        super(Mention, self).__init__()
        self.hash = hash
        self.gold = gold
        self.link = link
        self.context = context
        self.start = start
        self.end = end
        self.tags = tags

class Tag(object) :
    """
    Here, a tag is a spatial entity candidate for a mention
    """

    def __init__(self, rank, uri, experiment_id, related_to,
                 tag_coords = None, acc = None):
        super(Tag, self).__init__()
        self.rank = rank
        self.uri = uri
        self.experiment_id = experiment_id
        self.related_to = related_to
        self.tag_coords = tag_coords
        self.acc = acc

In [7]:
paris = wdo(uri=142)
paris.expe_id = 'test_france'

In [8]:
MENTIONS = []

for article in sample_france :
    TAGS = [] #{'test_france': []}
    for pred in article.preds['test_france'] :
        TAGS.append(Tag(rank=pred['rank'],
                                       uri=pred['pred_id'],
                                       experiment_id='test_france',
                                       related_to=article.hash,
                                       tag_coords=pred['pred_coords'],
                                       acc=pred['acc@10'] if 'acc@10' in pred.keys() else None) )

    m = Mention(
        hash=article.hash, link = article.artfl, context=article.text, tags = TAGS,
        gold = article.gold_nugues if hasattr(article, 'gold_nugues') else None
        )
    
    MENTIONS.append(m)

In [9]:
MENTIONS

[<__main__.Mention at 0x72a8e92f6210>,
 <__main__.Mention at 0x72a8e92f72d0>,
 <__main__.Mention at 0x72a8e92f74d0>,
 <__main__.Mention at 0x72a8e92f7dd0>,
 <__main__.Mention at 0x72a8e92fc810>,
 <__main__.Mention at 0x72a8e92fd3d0>,
 <__main__.Mention at 0x72a8e92fd910>,
 <__main__.Mention at 0x72a8e92fe690>,
 <__main__.Mention at 0x72a8e92ff7d0>,
 <__main__.Mention at 0x72a9fc250590>,
 <__main__.Mention at 0x72a8e92be710>,
 <__main__.Mention at 0x72a8e93b4c10>,
 <__main__.Mention at 0x72a8e9277ad0>,
 <__main__.Mention at 0x72a8e93e0810>,
 <__main__.Mention at 0x72a8e9259990>,
 <__main__.Mention at 0x72a8e924b9d0>,
 <__main__.Mention at 0x72a8e9248b50>,
 <__main__.Mention at 0x72a8e9249010>,
 <__main__.Mention at 0x72a8e9676010>,
 <__main__.Mention at 0x72a8e9108110>,
 <__main__.Mention at 0x72a8e9108490>,
 <__main__.Mention at 0x72a8e9108710>,
 <__main__.Mention at 0x72a8e9108a10>,
 <__main__.Mention at 0x72a8e910a550>,
 <__main__.Mention at 0x72a8e910af90>,
 <__main__.Mention at 0x7

In [10]:
MENTIONS[0].tags

[<__main__.Tag at 0x72a8e92f6410>,
 <__main__.Tag at 0x72a8e9776dd0>,
 <__main__.Tag at 0x72a8e93cc690>,
 <__main__.Tag at 0x72a8e92f5e50>,
 <__main__.Tag at 0x72a8e92f5bd0>,
 <__main__.Tag at 0x72a8e92f6050>,
 <__main__.Tag at 0x72a8e92f61d0>,
 <__main__.Tag at 0x72a8e92f6290>,
 <__main__.Tag at 0x72a8e92f6250>,
 <__main__.Tag at 0x72a8e92f6190>]

In [28]:
m = [m for m in MENTIONS if m.hash == '1/2599/ANDONVILLE'][0]
for tag in m.tags:
    print(tag.acc)
    wd = wdo(tag.uri)
    print(wd._get_label(lang='en'), wd._get_description(lang='en'))
    print(wd._distance_to(wdo(m.gold)))


False
Andonville commune in Loiret, France
0.0


In [23]:
[m for m in MENTIONS if m.hash == '1/2599/ANDONVILLE'][0].

AttributeError: 'Mention' object has no attribute 'acc'

In [19]:
def preds2map(sample, sample_name, expe_id, bbox=None):
    """mapping"""
    
    m = folium.Map(location=[48.8566, 2.3522], zoom_start=5)
    
    for mention in sample:
        
        # are there any tags for the experiment ?
        tags = [tag for tag in mention.tags if tag.experiment_id == expe_id]
        if not tags:
            continue

        # get the best prediction
        best_candidate = [tag for tag in tags if tag.rank == 0][0]
        if best_candidate.uri == 'Q0': # Entity linking failed
            continue
        # turning it into a WikidataObject
        best_cand_wdo = wdo(uri=best_candidate.uri )
        label = best_cand_wdo._get_label(lang='en')
        desc = best_cand_wdo._get_description(lang='en')

        # in case we have gold coordinates :
        if hasattr(mention, 'gold') and not mention.gold is None:
            gap = best_candidate.acc
            gold_wdo = wdo(uri=mention.gold)
            gold_label = gold_wdo._get_label(lang='en')
            gold_desc = gold_wdo._get_description(lang='en')
            marker_color = 'green' if gap else 'red'
            html = f"""
            <html>
                <body>
                    <a href="{mention.link}" target="_blank"><strong>{mention.hash}</strong></a>
                    <p style="color: #555;">{mention.context[:150]}...</p>
                    <p><strong>GOLD :</strong> <a href="{gold_wdo.link}" target="_blank">{gold_label}</a>, {gold_desc}, {gold_wdo.uri}</p>
                    <p><strong> All predictions :</strong> </p>
            """
            for tag in tags :
                cand_wdo = wdo(tag.uri)
                label = cand_wdo._get_label(lang='en')
                desc = cand_wdo._get_description(lang='en')
                html += f"""
                <p>n°{tag.rank} : <a href="{cand_wdo.link}" target="_blank">{label}</a>, {desc}, {cand_wdo.uri}</p>
                """
                del cand_wdo

            html += f"""
            <form action="http://localhost:5000/save_annotation" method="post" target="hidden_iframe">
                <label><input type="checkbox" name="correct" value="correct"> Correct</label><br>
                <label><input type="checkbox" name="wrong" value="wrong"> Wrong</label><br>
                <label><input type="checkbox" name="abberant" value="abberant"> Abberant</label><br>
                <label for="annotation">Suggested annotation : </label><br>
                <textarea id="annotation" name="annotation" rows="1" cols="30"></textarea><br>
                <input type="hidden" id="hash" name="hash" value="{mention.hash}">
                <label for="note">Note : </label><br>
                <textarea id="note" name="note" rows="1" cols="30"></textarea><br>
                <input type="submit" value="Save">
            </form>
            <iframe name="hidden_iframe" style="display:none;"></iframe>
            </body>
            </html>
            """
            # delete wdo objects
            del gold_wdo
            del best_cand_wdo

        # in case we don't have gold coordinates
        else:    
            marker_color = 'beige'            
            html = f"""
            <!DOCTYPE html>
            <html>
            <head>
            <title>Marker Popup</title>
            </head>
            <body>
            <a href="{mention.link}" target="_blank"><strong>{mention.hash}</strong></a>
            <p style="color: #555;">{mention.context[:150]}...</p>
            <p> <strong>Best Prediction : <a href="{best_cand_wdo.link}" target="_blank">{label}</a>, {desc}, {best_cand_wdo.uri}</p>
            <form action="http://localhost:5000/save_annotation" method="post" target="hidden_iframe">
                <label><input type="checkbox" name="correct" value="correct"> Correct</label><br>
                <label><input type="checkbox" name="wrong" value="wrong"> Wrong</label><br>
                <label><input type="checkbox" name="abberant" value="abberant"> Abberant</label><br>
                <label for="annotation">Suggested annotation : </label><br>
                <textarea id="annotation" name="annotation" rows="1" cols="30"></textarea><br>
                <input type="hidden" id="hash" name="hash" value="{mention.hash}">
                <label for="note">Note : </label><br>
                <textarea id="note" name="note" rows="1" cols="30"></textarea><br>
                <input type="submit" value="Save">
            <p><strong> All predictions :</strong> </p>
            """

            for tag in tags :
                cand_wdo = wdo(tag.uri)
                label = cand_wdo._get_label(lang='en')
                desc = cand_wdo._get_description(lang='en')
                html += f"""
                <p>n°{tag.rank} : <a href="{cand_wdo.link}" target="_blank">{label}</a>, {desc}, {cand_wdo.uri}</p>
                """
                del cand_wdo

            html += """
            </form>
            <iframe name="hidden_iframe" style="display:none;"></iframe>
            </body>
            </html>
            """

            

        # Add the marker to the map
        folium.Marker(
            location=best_candidate.tag_coords,
            popup=folium.Popup(html, max_width=500),
            icon=folium.Icon(color=marker_color)
        ).add_to(m)

        # add the bbox to the map
        if bbox:
            bbox.add_to(m)

    # save the map and return it
    # if no foler output/maps_{preds_id} exists, create it
    if not os.path.exists(f'outputs/maps_{expe_id}'):
        os.makedirs(f'outputs/maps_{expe_id}')
    m.save(f'outputs/maps_{expe_id}/{sample_name}.html')
    return m


In [20]:
m = preds2map(sample = MENTIONS[:50], sample_name = 'test_france', expe_id = 'test_france')