# Projet

### Imports

In [61]:
# Imports
import glob, os
import pandas
import urllib
from SPARQLWrapper import SPARQLWrapper
import PIL.Image

### WikiBaseIntegrator Imports

In [93]:
from wikibaseintegrator import WikibaseIntegrator
from wikibaseintegrator.wbi_config import config

config['USER_AGENT'] = 'test'

## 1. Collecte de données

### Dataframe Functions

In [62]:
def jsonToDataframe(path):   
    with open(path, "r", encoding="utf-8") as f:
        dataframe = pandas.read_json(f, orient='index')
    return dataframe

def dataframeToJson(path, dataframe):   
    with open(path, "w", encoding="utf-8") as f:
        f.write(dataframe.to_json(orient='index', indent=2))

### WikiBaseIntegrator Functions

In [98]:
wbi = WikibaseIntegrator()

def GetCountryLabelInFrench(itemID):
    item_data=wbi.item.get(itemID) # get the item global data
    country_id=item_data.claims.get('P17')[0].mainsnak.datavalue['value']['id'] # get the country id of the item
    country=wbi.item.get(country_id) # get the country global data
    return country.labels.get('fr').value # get the country label in french

def AddCountryTags(dataframe): 
    items_tuples=[]
    for i in range(len(dataframe.index)): # get the item id of each row
        itemID=dataframe._get_value(i,"entityID")
        items_tuples.append(itemID)
    for i in range(len(items_tuples)): # add the country tag to each row
        country = GetCountryLabelInFrench(items_tuples[i])
        dataframe.at[i, "tags"].append("#"+country)
    return dataframe

def GetArchitecturalStyle(styleID):
    styleL=[]
    for i in range(len(styleID)):
        style = wbi.item.get(styleID[i])
        if style.labels.get('fr') is None:
            style_name = style.labels.get('en').value
        else:
            style_name = style.labels.get('fr').value
        styleL.append("#"+style_name)
    return styleL # get the style label in french

def AddStyleTags(dataframe):
    for k in range(len(dataframe.index)):
        style = GetArchitecturalStyle(dataframe._get_value(k,"style"))
        for l in range(len(style)):
            dataframe.at[k, "tags"].append(style[l])
    return dataframe

### Récupération du résultat d'une query Wikidata

In [99]:
size = 5
dataPath = "data.json"
imagesPath = "./images/"

# Query de Wikidata
sparql = SPARQLWrapper("https://query.wikidata.org/sparql")
query = """
SELECT ?item ?itemLabel ?pic (YEAR(?date) as ?year) ?style WHERE {
  ?item wdt:P31 wd:Q4989906;
    wdt:P18 ?pic;
    wdt:P571 ?date;
    wdt:P149 ?style.
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
ORDER BY ?item
LIMIT """
# On précise le nombre d'entrées que l'on veut
query = query + str(size)

# On récupère le résultat de la query
sparql.setQuery(query)
sparql.setReturnFormat("json")
results = sparql.query().convert()
# On normalise les résultat pour pandas
array = []
for data in results["results"]["bindings"]:
    name = data["itemLabel"]["value"]
    format = data["pic"]["value"].split(".")[-1]
    path = f"{imagesPath}{name}.{format}"
    date = data["year"]["value"]
    style = data["style"]["value"]
    array.append([name,  
                  data["pic"]["value"], 
                  data["item"]["value"].split(sep='/')[-1],
                  format,
                  path,
                  None,
                  ['#monument'],
                  date,
                  style.split(sep='/')[-1]])
                  

dataframe = pandas.DataFrame(array, columns=["label", "link", "entityID", "format", "path", "size", "tags", "date","style"])

#On fusionne les lignes avec le même label en gardant les différents styles
dataframe = dataframe.groupby('label').agg({'link':'first', 'entityID':'first', 'format':'first', 'path':'first', 'size':'first', 'tags':'first', 'date':'first', 'style':list}).reset_index()
# On enregistre les données au format JSON
dataframeToJson(dataPath, dataframe)

# Nettoyage du dossier /images
if not os.path.isdir('./images'):
  os.mkdir('./images')

# Nettoyage du dossier /images
else:
  files = glob.glob('./images/*')
  for f in files:
      os.remove(f)



dataframe


Unnamed: 0,label,link,entityID,format,path,size,tags,date,style
0,Buenos Aires Cabildo,http://commons.wikimedia.org/wiki/Special:File...,Q1024829,jpg,./images/Buenos Aires Cabildo.jpg,,[#monument],1725,[Q840829]
1,Group of Houses of the Santa Casa de Misericórdia,http://commons.wikimedia.org/wiki/Special:File...,Q10300408,jpg,./images/Group of Houses of the Santa Casa de ...,,[#monument],1612,"[Q131808, Q840829, Q1114972]"
2,Iglesia de la Concepción,http://commons.wikimedia.org/wiki/Special:File...,Q1024943,jpg,./images/Iglesia de la Concepción.jpg,,[#monument],1511,[Q37853]


### Téléchargement des Images

In [64]:
dataframe = jsonToDataframe(dataPath)
for i in range(len(dataframe.index)):
    # On télécharge l'image
    link = dataframe._get_value(i,"link")
    path = dataframe._get_value(i,"path")
    response = urllib.request.urlretrieve(link, path)

KeyError: 'link'

### Récupération des Données EXIFS

In [None]:
dataframe = jsonToDataframe(dataPath)

wantedExifs = [274, 315, 306, 272]
columnNames = ["orientation", "artist", "date", 'appareil']

for i,tag in enumerate(wantedExifs):
    dataframe[columnNames[i]] = None

# On parcours les entrées
for i in range(len(dataframe.index)):
    path = dataframe._get_value(i,"path")
    img = PIL.Image.open(path)
    exif_data = img._getexif()
    img.close()

    dataframe.at[i, "size"] = img.size
    if exif_data:
      for tag in exif_data:
          if tag in wantedExifs:
              dataframe.at[i, columnNames[wantedExifs.index(tag)]] = exif_data[tag]

# On enregistre les données au format JSON
dataframeToJson(dataPath, dataframe)
        
dataframe 


## 2. Annotation des images

### Fonction pour récupérer et ajouter le tag siècle

In [100]:
def find_century(year):
    year = int(year)
    if (year <= 0):
        cent_tag=("#Antiquité")
    elif (year <= 100):
        cent_tag ="#1er siècle"
    elif (year % 100 == 0):
        cent_tag="#"+str(year // 100)+"ème siècle"
    else:
        cent_tag="#"+str(year // 100 + 1)+"ème siècle"
    return cent_tag

def add_century_tag(dataframe):
    for i in range(len(dataframe.index)):
        year = dataframe._get_value(i,"date")
        dataframe.at[i, "tags"].append(find_century(year))
    return dataframe

### Test Ajout du tag siècle

In [101]:
add_century_tag(dataframe)

Unnamed: 0,label,link,entityID,format,path,size,tags,date,style
0,Buenos Aires Cabildo,http://commons.wikimedia.org/wiki/Special:File...,Q1024829,jpg,./images/Buenos Aires Cabildo.jpg,,"[#monument, #18ème siècle]",1725,[Q840829]
1,Group of Houses of the Santa Casa de Misericórdia,http://commons.wikimedia.org/wiki/Special:File...,Q10300408,jpg,./images/Group of Houses of the Santa Casa de ...,,"[#monument, #17ème siècle]",1612,"[Q131808, Q840829, Q1114972]"
2,Iglesia de la Concepción,http://commons.wikimedia.org/wiki/Special:File...,Q1024943,jpg,./images/Iglesia de la Concepción.jpg,,"[#monument, #16ème siècle]",1511,[Q37853]


### Test Ajout des tags country et style

In [102]:
AddCountryTags(dataframe)

Unnamed: 0,label,link,entityID,format,path,size,tags,date,style
0,Buenos Aires Cabildo,http://commons.wikimedia.org/wiki/Special:File...,Q1024829,jpg,./images/Buenos Aires Cabildo.jpg,,"[#monument, #18ème siècle, #Argentine]",1725,[Q840829]
1,Group of Houses of the Santa Casa de Misericórdia,http://commons.wikimedia.org/wiki/Special:File...,Q10300408,jpg,./images/Group of Houses of the Santa Casa de ...,,"[#monument, #17ème siècle, #Brésil]",1612,"[Q131808, Q840829, Q1114972]"
2,Iglesia de la Concepción,http://commons.wikimedia.org/wiki/Special:File...,Q1024943,jpg,./images/Iglesia de la Concepción.jpg,,"[#monument, #16ème siècle, #Espagne]",1511,[Q37853]


In [103]:
AddStyleTags(dataframe)

Unnamed: 0,label,link,entityID,format,path,size,tags,date,style
0,Buenos Aires Cabildo,http://commons.wikimedia.org/wiki/Special:File...,Q1024829,jpg,./images/Buenos Aires Cabildo.jpg,,"[#monument, #18ème siècle, #Argentine, #archit...",1725,[Q840829]
1,Group of Houses of the Santa Casa de Misericórdia,http://commons.wikimedia.org/wiki/Special:File...,Q10300408,jpg,./images/Group of Houses of the Santa Casa de ...,,"[#monument, #17ème siècle, #Brésil, #maniérism...",1612,"[Q131808, Q840829, Q1114972]"
2,Iglesia de la Concepción,http://commons.wikimedia.org/wiki/Special:File...,Q1024943,jpg,./images/Iglesia de la Concepción.jpg,,"[#monument, #16ème siècle, #Espagne, #baroque]",1511,[Q37853]


## Analyse des Données

In [None]:
# Création de la dataframe des utilisateurs
usersPath = 'users.json'

array = [['default',[]]]
usersDataframe = pandas.DataFrame(array, columns=['username', 'likedImages'])

dataframeToJson(usersPath, usersDataframe)

usersDataframe