# Projet

### Imports

In [40]:
# Imports
import glob, os
import pandas
import urllib
from SPARQLWrapper import SPARQLWrapper
import PIL.Image

## 1. Collecte de données

### Récupération du résultat d'une query Wikidata

In [41]:
size = 100
dataFileName = "data.json"
imagesPath = "./images/"

# Query de Wikidata
sparql = SPARQLWrapper("https://query.wikidata.org/sparql")
query = """
SELECT ?item ?itemLabel ?pic WHERE {
  ?item wdt:P31 wd:Q146;
    wdt:P18 ?pic.
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
LIMIT """
# On précise le nombre d'entrées que l'on veut
query = query + str(size)

# On récupère le résultat de la query
sparql.setQuery(query)
sparql.setReturnFormat("json")
results = sparql.query().convert()

# On normalise les résultat pour pandas
array = []
for data in results["results"]["bindings"]:
    name = data["itemLabel"]["value"]
    format = data["pic"]["value"].split(".")[-1]
    path = f"{imagesPath}{name}.{format}"

    array.append([name,  
                  data["pic"]["value"], 
                  data["item"]["value"],
                  format,
                  path,
                  None])
                  

dataframe = pandas.DataFrame(array, columns=["label", "link", "wikiDataRef", "format", "path", "size"])

# On enregistre les données au format JSON
with open(dataFileName, "w") as f:
    f.write(dataframe.to_json())

# Nettoyage du dossier /images
files = glob.glob('./images/*')
for f in files:
    os.remove(f)



dataframe


Unnamed: 0,label,link,wikiDataRef,format,path,size
0,senior cats,http://commons.wikimedia.org/wiki/Special:File...,http://www.wikidata.org/entity/Q123185365,jpg,./images/senior cats.jpg,
1,Myka,http://commons.wikimedia.org/wiki/Special:File...,http://www.wikidata.org/entity/Q28665865,jpg,./images/Myka.jpg,
2,Gli,http://commons.wikimedia.org/wiki/Special:File...,http://www.wikidata.org/entity/Q28792126,jpg,./images/Gli.jpg,
3,Toffee,http://commons.wikimedia.org/wiki/Special:File...,http://www.wikidata.org/entity/Q49581026,jpg,./images/Toffee.jpg,
4,Crimean Tom,http://commons.wikimedia.org/wiki/Special:File...,http://www.wikidata.org/entity/Q51596094,jpg,./images/Crimean Tom.jpg,
5,Peter II,http://commons.wikimedia.org/wiki/Special:File...,http://www.wikidata.org/entity/Q51827254,jpg,./images/Peter II.jpg,
6,Arcturus Aldebaran Powers,http://commons.wikimedia.org/wiki/Special:File...,http://www.wikidata.org/entity/Q52150156,jpg,./images/Arcturus Aldebaran Powers.jpg,
7,Nitama,http://commons.wikimedia.org/wiki/Special:File...,http://www.wikidata.org/entity/Q53216650,jpg,./images/Nitama.jpg,
8,Foss,http://commons.wikimedia.org/wiki/Special:File...,http://www.wikidata.org/entity/Q54087698,jpg,./images/Foss.jpg,
9,Şero,http://commons.wikimedia.org/wiki/Special:File...,http://www.wikidata.org/entity/Q61133276,jpg,./images/Şero.jpg,


### Téléchargement des Images

In [43]:
for i in range(len(dataframe.index)):
    # On télécharge l'image
    link = dataframe._get_value(i,"link")
    path = dataframe._get_value(i,"path")
    response = urllib.request.urlretrieve(link, path)

### Récupération des Données EXIFS

In [44]:

wantedExifs = [274, 315, 306]
columnNames = ["orientation", "artist", "date"]g

for i,tag in enumerate(wantedExifs):
    dataframe[columnNames[i]] = None

# On parcours les entrées
for i in range(len(dataframe.index)):
    path = dataframe._get_value(i,"path")
    img = PIL.Image.open(path)
    exif_data = img._getexif()
    img.close()

    dataframe.at[i, "size"] = img.size
    if exif_data:
      for tag in exif_data:
          if tag in wantedExifs:
              dataframe.at[i, columnNames[wantedExifs.index(tag)]] = exif_data[tag]
#test             
dataframe 

Unnamed: 0,label,link,wikiDataRef,format,path,size,orientation,artist,date
0,senior cats,http://commons.wikimedia.org/wiki/Special:File...,http://www.wikidata.org/entity/Q123185365,jpg,./images/senior cats.jpg,"(3017, 1911)",,,2022:07:20 10:02:22
1,Myka,http://commons.wikimedia.org/wiki/Special:File...,http://www.wikidata.org/entity/Q28665865,jpg,./images/Myka.jpg,"(257, 340)",1.0,,2016:12:23 23:19:41
2,Gli,http://commons.wikimedia.org/wiki/Special:File...,http://www.wikidata.org/entity/Q28792126,jpg,./images/Gli.jpg,"(450, 562)",1.0,,2022:05:26 23:58:01
3,Toffee,http://commons.wikimedia.org/wiki/Special:File...,http://www.wikidata.org/entity/Q49581026,jpg,./images/Toffee.jpg,"(3488, 2616)",1.0,,2006:11:26 10:59:53
4,Crimean Tom,http://commons.wikimedia.org/wiki/Special:File...,http://www.wikidata.org/entity/Q51596094,jpg,./images/Crimean Tom.jpg,"(960, 736)",1.0,,2011:01:13 17:34:46
5,Peter II,http://commons.wikimedia.org/wiki/Special:File...,http://www.wikidata.org/entity/Q51827254,jpg,./images/Peter II.jpg,"(902, 1000)",,,
6,Arcturus Aldebaran Powers,http://commons.wikimedia.org/wiki/Special:File...,http://www.wikidata.org/entity/Q52150156,jpg,./images/Arcturus Aldebaran Powers.jpg,"(546, 842)",1.0,,2017:11:10 16:09:40
7,Nitama,http://commons.wikimedia.org/wiki/Special:File...,http://www.wikidata.org/entity/Q53216650,jpg,./images/Nitama.jpg,"(1920, 2560)",1.0,,2012:01:05 11:35:19
8,Foss,http://commons.wikimedia.org/wiki/Special:File...,http://www.wikidata.org/entity/Q54087698,jpg,./images/Foss.jpg,"(500, 371)",,,
9,Şero,http://commons.wikimedia.org/wiki/Special:File...,http://www.wikidata.org/entity/Q61133276,jpg,./images/Şero.jpg,"(3264, 2448)",1.0,,2015:04:16 15:47:23
