# Projet

### Imports

In [5]:
# Imports
import glob, os
import pandas
import urllib
from SPARQLWrapper import SPARQLWrapper
import PIL.Image
from datetime import datetime

## 1. Collecte de données

### Dataframe Functions

In [2]:
def jsonToDataframe(path):   
    with open(path, "r", encoding="utf-8") as f:
        dataframe = pandas.read_json(f, orient='index')
    return dataframe

def dataframeToJson(path, dataframe):   
    with open(path, "w", encoding="utf-8") as f:
        f.write(dataframe.to_json(orient='index', indent=2))

### Récupération du résultat d'une query Wikidata

In [23]:
size = 5
dataPath = "data.json"
imagesPath = "./images/"

# Query de Wikidata
sparql = SPARQLWrapper("https://query.wikidata.org/sparql")
query = """
SELECT ?item ?itemLabel ?pic (YEAR(?date) as ?year) WHERE {
  ?item wdt:P31 wd:Q4989906;
    wdt:P18 ?pic;
    wdt:P571 ?date.
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
LIMIT """
# On précise le nombre d'entrées que l'on veut
query = query + str(size)

# On récupère le résultat de la query
sparql.setQuery(query)
sparql.setReturnFormat("json")
results = sparql.query().convert()

# On normalise les résultat pour pandas
array = []
for data in results["results"]["bindings"]:
    name = data["itemLabel"]["value"]
    format = data["pic"]["value"].split(".")[-1]
    path = f"{imagesPath}{name}.{format}"
    date = data["year"]["value"]

    array.append([name,  
                  data["pic"]["value"], 
                  data["item"]["value"].split(sep='/')[-1],
                  format,
                  path,
                  None,
                  ['#monument'],
                  date])
                  

dataframe = pandas.DataFrame(array, columns=["label", "link", "entityID", "format", "path", "size", "tags","date"])

# On enregistre les données au format JSON
dataframeToJson(dataPath, dataframe)

# Nettoyage du dossier /images
if not os.path.isdir('./images'):
  os.mkdir('./images')

# Nettoyage du dossier /images
else:
  files = glob.glob('./images/*')
  for f in files:
      os.remove(f)



dataframe


Unnamed: 0,label,link,entityID,format,path,size,tags,date
0,National Monument on Dam Square,http://commons.wikimedia.org/wiki/Special:File...,Q473851,jpg,./images/National Monument on Dam Square.jpg,,[#monument],1956
1,Palacio Episcopal de Astorga,http://commons.wikimedia.org/wiki/Special:File...,Q507390,JPG,./images/Palacio Episcopal de Astorga.JPG,,[#monument],1889
2,Monastery of San Adrián de Sasabe,http://commons.wikimedia.org/wiki/Special:File...,Q518072,jpg,./images/Monastery of San Adrián de Sasabe.jpg,,[#monument],801
3,Plaza de toros de la Malagueta,http://commons.wikimedia.org/wiki/Special:File...,Q523311,jpg,./images/Plaza de toros de la Malagueta.jpg,,[#monument],1876
4,Biblioteca Museu Víctor Balaguer,http://commons.wikimedia.org/wiki/Special:File...,Q526170,jpg,./images/Biblioteca Museu Víctor Balaguer.jpg,,[#monument],1884


### Téléchargement des Images

In [20]:
dataframe = jsonToDataframe(dataPath)
for i in range(len(dataframe.index)):
    # On télécharge l'image
    link = dataframe._get_value(i,"link")
    path = dataframe._get_value(i,"path")
    response = urllib.request.urlretrieve(link, path)

### Récupération des Données EXIFS

In [18]:
dataframe = jsonToDataframe(dataPath)

wantedExifs = [274, 315, 306, 272]
columnNames = ["orientation", "artist", "date", 'appareil']

for i,tag in enumerate(wantedExifs):
    dataframe[columnNames[i]] = None

# On parcours les entrées
for i in range(len(dataframe.index)):
    path = dataframe._get_value(i,"path")
    img = PIL.Image.open(path)
    exif_data = img._getexif()
    img.close()

    dataframe.at[i, "size"] = img.size
    if exif_data:
      for tag in exif_data:
          if tag in wantedExifs:
              dataframe.at[i, columnNames[wantedExifs.index(tag)]] = exif_data[tag]

# On enregistre les données au format JSON
dataframeToJson(dataPath, dataframe)
        
dataframe 


ValueError: Must have equal len keys and value when setting with an iterable

## Analyse des Données

In [None]:
# Création de la dataframe des utilisateurs
usersPath = 'users.json'

array = [['default',[]]]
usersDataframe = pandas.DataFrame(array, columns=['username', 'likedImages'])

dataframeToJson(usersPath, usersDataframe)

usersDataframe