# Projet

### Imports

In [1]:
# Imports
import glob, os
import pandas
import urllib
from SPARQLWrapper import SPARQLWrapper
import PIL.Image
from skimage import io
import ipywidgets
import matplotlib.pyplot as plt
import numpy
from sklearn.cluster import KMeans
from random import randint

### WikiBaseIntegrator Imports

In [2]:
from wikibaseintegrator import WikibaseIntegrator
from wikibaseintegrator.wbi_config import config

config['USER_AGENT'] = 'test'

## 1. Collecte de données

### Dataframe Functions

In [3]:
def getData():   
    path = "data.json"
    with open(path, "r", encoding="utf-8") as f:
        dataframe = pandas.read_json(f, orient='index')
    return dataframe

def saveData(dataframe):   
    path = "data.json"
    with open(path, "w", encoding="utf-8") as f:
        f.write(dataframe.to_json(orient='index', indent=2))

def getUsers():   
    path = "users.json"
    with open(path, "r", encoding="utf-8") as f:
        dataframe = pandas.read_json(f, orient='index')
    return dataframe

def saveUsers(dataframe):   
    path = "users.json"
    with open(path, "w", encoding="utf-8") as f:
        f.write(dataframe.to_json(orient='index', indent=2))

### WikiBaseIntegrator Functions

In [4]:
wbi = WikibaseIntegrator()

def GetCountryLabelInFrench(itemID): #Permet de récupérer le label du pays en français à partir de son qualifier/ID WikiData
    item_data=wbi.item.get(itemID) #Récupère les données du monument
    country_id=item_data.claims.get('P17')[0].mainsnak.datavalue['value']['id'] #Récupère l'ID du pays
    country=wbi.item.get(country_id) #Récupère les données du pays
    return country.labels.get('fr').value #Retourne le label du pays en français

def AddCountryTags(dataframe): #Permet d'ajouter le tag du pays à chaque ligne du dataframe
    items_tuples=[]
    for i in range(len(dataframe.index)): # Récupère les IDs de toutes les lignes du dataframe
        itemID=dataframe._get_value(i,"entityID")
        items_tuples.append(itemID)
    for i in range(len(items_tuples)): # Ajoute le pays à chaque ligne du dataframe
        country = GetCountryLabelInFrench(items_tuples[i])
        dataframe.at[i, "tags"].append("#"+country)
    return dataframe

def GetArchitecturalStyle(styleID): #Permet de récupérer le nom du style architectural en français à partir de son qualifier/ID WikiData
    styleL=[]
    for i in range(len(styleID)): # Récupère les IDs de style architectural pour chaque ligne du dataframe
        style = wbi.item.get(styleID[i]) #Récupère les données du style architectural
        style_name = style.labels.get('fr').value #Récupère le nom du style en français
        styleL.append("#"+style_name) #Ajoute le nom du style à la liste de résultat
    return styleL #Retourne le nom du style architectural

def AddStyleTags(dataframe): #Permet d'ajouter les tags des styles architecturaux à chaque ligne du dataframe
    for k in range(len(dataframe.index)): #On parcourt chaque ligne du dataframe
        style = GetArchitecturalStyle(dataframe._get_value(k,"style"))
        for l in range(len(style)): #On parcourt chaque indice de la liste de style (un monument peut avoir plusieurs styles)
            dataframe.at[k, "tags"].append(style[l])
    return dataframe

### Récupération du résultat d'une query Wikidata

In [5]:
size = 20
imagesPath = "./images/"

# Query de Wikidata
sparql = SPARQLWrapper("https://query.wikidata.org/sparql")
query = """
SELECT ?item ?itemLabel ?pic (YEAR(?date) as ?year) ?style WHERE {
  ?item wdt:P31 wd:Q4989906;
    wdt:P18 ?pic;
    wdt:P571 ?date;
    wdt:P149 ?style.
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
LIMIT """
# On précise le nombre d'entrées que l'on veut
query = query + str(size)

# On récupère le résultat de la query
sparql.setQuery(query)
sparql.setReturnFormat("json")
results = sparql.query().convert()
print(results)

# On normalise les résultats pour pandas
array = []
for trainData in results["results"]["bindings"]:
    name = trainData["itemLabel"]["value"]
    format = trainData["pic"]["value"].split(".")[-1]
    path = f"{imagesPath}{name}.{format}"
    if "year" in trainData.keys():
        date = trainData["year"]["value"]
    else:
        date = None
    style = trainData["style"]["value"]
    array.append([name,  
                  trainData["pic"]["value"], 
                  trainData["item"]["value"].split(sep='/')[-1],
                  format,
                  path,
                  (),
                  None,
                  ['#monument'],
                  date,
                  style.split(sep='/')[-1]])
                  

dataframe = pandas.DataFrame(array, columns=["label", "link", "entityID", "format", "path", "size", "sizeCategory", "tags", "date","style"])

#On fusionne les lignes avec le même label en gardant les différents styles
dataframe = dataframe.groupby('label').agg({'link':'first', 'entityID':'first', 'format':'first', 'path':'first', 'size':'first', 'sizeCategory':'first','tags':'first', 'date':'first', 'style':list}).reset_index()

# On enregistre les données au format JSON
saveData(dataframe)

# Nettoyage du dossier /images
if not os.path.isdir('./images'):
  os.mkdir('./images')

# Nettoyage du dossier /images
else:
  files = glob.glob('./images/*')
  for f in files:
      os.remove(f)

dataframe

{'head': {'vars': ['item', 'itemLabel', 'pic', 'year', 'style']}, 'results': {'bindings': [{'item': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q1139113'}, 'style': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q46261'}, 'pic': {'type': 'uri', 'value': 'http://commons.wikimedia.org/wiki/Special:FilePath/Estibalizko%20santutegi%20erromanikoa.jpg'}, 'itemLabel': {'xml:lang': 'en', 'type': 'literal', 'value': 'Sanctuary of Nuestra Señora de Estíbaliz'}}, {'item': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q1064212'}, 'style': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q4692'}, 'pic': {'type': 'uri', 'value': 'http://commons.wikimedia.org/wiki/Special:FilePath/SevillaPalacioDeLebrija01.JPG'}, 'itemLabel': {'xml:lang': 'en', 'type': 'literal', 'value': 'Palacio de la Condesa de Lebrija'}, 'year': {'datatype': 'http://www.w3.org/2001/XMLSchema#integer', 'type': 'literal', 'value': '1600'}}, {'item': {'type': 'uri', 'value': 'http://www.wiki

Unnamed: 0,label,link,entityID,format,path,size,sizeCategory,tags,date,style
0,Basilica of Candelaria,http://commons.wikimedia.org/wiki/Special:File...,Q34485,jpg,./images/Basilica of Candelaria.jpg,(),,[#monument],1959.0,[Q54111]
1,Cathedral of La Laguna,http://commons.wikimedia.org/wiki/Special:File...,Q34562,jpg,./images/Cathedral of La Laguna.jpg,(),,[#monument],1515.0,"[Q54111, Q186363, Q54111, Q186363]"
2,Church of San Salvador de Priesca,http://commons.wikimedia.org/wiki/Special:File...,Q1130947,jpg,./images/Church of San Salvador de Priesca.jpg,(),,[#monument],901.0,[Q708807]
3,Hospital de los Reyes Católicos,http://commons.wikimedia.org/wiki/Special:File...,Q155443,jpg,./images/Hospital de los Reyes Católicos.jpg,(),,[#monument],1511.0,[Q577852]
4,Longoria Palace,http://commons.wikimedia.org/wiki/Special:File...,Q1164358,jpg,./images/Longoria Palace.jpg,(),,[#monument],1904.0,[Q1295040]
5,Monastery of San Miguel de Escalada,http://commons.wikimedia.org/wiki/Special:File...,Q203741,jpg,./images/Monastery of San Miguel de Escalada.jpg,(),,[#monument],901.0,[Q1951304]
6,Monument to the Unknown Hero,http://commons.wikimedia.org/wiki/Special:File...,Q1187994,jpg,./images/Monument to the Unknown Hero.jpg,(),,[#monument],1938.0,[Q54111]
7,Palacio de la Condesa de Lebrija,http://commons.wikimedia.org/wiki/Special:File...,Q1064212,JPG,./images/Palacio de la Condesa de Lebrija.JPG,(),,[#monument],1600.0,[Q4692]
8,Panathenaic Stadium,http://commons.wikimedia.org/wiki/Special:File...,Q208811,jpg,./images/Panathenaic Stadium.jpg,(),,[#monument],-328.0,[Q331273]
9,Royal Convent of La Encarnación,http://commons.wikimedia.org/wiki/Special:File...,Q205524,jpg,./images/Royal Convent of La Encarnación.jpg,(),,[#monument],1616.0,[Q840829]


### Téléchargement des Images

In [7]:
dataframe = getData()
for i in range(len(dataframe.index)):
    # On télécharge l'image
    link = dataframe._get_value(i,"link")
    path = dataframe._get_value(i,"path")
    response = urllib.request.urlretrieve(link, path)


 
img = io.imread(path)
io.imshow(img)

  dataframe = pandas.read_json(f, orient='index')


HTTPError: HTTP Error 429: Too many requests. Please comply with the User-Agent policy to get a higher rate limit: https://meta.wikimedia.org/wiki/User-Agent_policy

### Récupération des Données EXIFS

In [8]:
dataframe = getData()

wantedExifs = [274, 315, 306, 272]
columnNames = ["orientation", "artist", "dateTaken", 'appareil']

for i,tag in enumerate(wantedExifs):
    dataframe[columnNames[i]] = None
    

# On parcourt les entrées
for i in range(len(dataframe.index)):
    path = dataframe._get_value(i,"path")
    img = PIL.Image.open(path)
    exif_data = img._getexif()
    img.close()

    dataframe.at[i, "size"] = img.size
    if img.size[0] < img.size[1]:
        dataframe.at[i, "orientation"] = 0
    else:
        dataframe.at[i, "orientation"] = 1

    if exif_data:
      for tag in exif_data:
            if tag in wantedExifs:
                dataframe.at[i, columnNames[wantedExifs.index(tag)]] = exif_data[tag]
                
dataframe.astype({'orientation': 'int32'})

# On enregistre les données au format JSON
saveData(dataframe)
        
dataframe 


  dataframe = pandas.read_json(f, orient='index')


FileNotFoundError: [Errno 2] No such file or directory: './images/Stonehenge.jpg'

## 2. Annotation des images

### Fonction pour récupérer et ajouter le tag siècle

In [None]:
def find_century(year): #Permet de trouver le siècle à partir de l'année
    year = int(year)
    if (year <= 0): 
        cent_tag=("#Antiquité")
    elif (year <= 100):
        cent_tag ="#1er siècle"
    elif (year % 100 == 0):
        cent_tag="#"+str(year // 100)+"ème siècle"
    else:
        cent_tag="#"+str(year // 100 + 1)+"ème siècle"
    return cent_tag

def add_century_tag(dataframe): #Permet d'ajouter le tag siècle à chaque ligne du dataframe
    for i in range(len(dataframe.index)):
        year = dataframe._get_value(i,"date")
        if year is not None: #Prise en charge du cas où le monument n'a pas de date précise de construction
            dataframe.at[i, "tags"].append(find_century(year))
        else:
            dataframe.at[i, "tags"].append("#Inconnu")
    return dataframe

### Test Ajout du tag siècle

In [None]:
add_century_tag(dataframe)

### Test Ajout des tags country et style

In [None]:
AddCountryTags(dataframe)

In [None]:
AddStyleTags(dataframe)

## Analyse des Données

### Kmeans et Couleurs dominantes

In [9]:
dataframe = getData()
dataframe["dColors"] = None
dataframe["dProportions"] = None

nColors = 6
laziness = 100

kmeans = KMeans(n_clusters=nColors, random_state=0, n_init="auto")


for i, path in enumerate(dataframe["path"]):
    img = PIL.Image.open(path).convert('RGB')
    pixelData = numpy.array(img).reshape(-1, 3)[::laziness]
    kmeans.fit(pixelData)

    proportions = numpy.array([0 for i in range(nColors)])
    for x in kmeans.labels_:
        proportions[x] += numpy.float64(1)
    proportions = proportions / kmeans.labels_.size

    dataframe.at[i,"dColors"] = numpy.int32(kmeans.cluster_centers_)
    dataframe.at[i,"dProportions"] = proportions
    
saveData(dataframe)



  dataframe = pandas.read_json(f, orient='index')


FileNotFoundError: [Errno 2] No such file or directory: './images/Stonehenge.jpg'

In [None]:
# Création de la dataframe des utilisateurs

array = [['default',[],[]]]
usersDataframe = pandas.DataFrame(array, columns=['name', 'likes','dislikes'])

saveUsers(usersDataframe)

usersDataframe

In [None]:
def addUser(name):
    usersDf = getUsers()
    usersDf.loc[len(usersDf.index)] = [name,[],[]] 
    saveUsers(usersDf)
    

In [None]:
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

def getRecommandation(user):

    likedImages = getUsers()["likes"][getUsers().index["name" == user].tolist()[0]]
    dislikedImages = getUsers()["dislikes"][getUsers().index["name" == user].tolist()[0]]

    trainDataIndexes = likedImages + dislikedImages
    result = []
    for i in trainDataIndexes:
        if i in likedImages:
            result.append("likes")
        else:
            result.append("dislikes")
    
    data = numpy.array(getData()[['tags','orientation']])
    trainData = [trainData[i] for i in trainDataIndexes]
    
    for i in range(trainData.shape[0]):
        for j in range(trainData.shape[1]):
            trainData[i][j] = str(trainData[i][j])


    # Encode categorical features and labels
    label_encoders = [LabelEncoder() for _ in range(len(trainData[0]))]
    encoded_data = []
    for i, column in enumerate(zip(*trainData)):
        encoded_data.append(label_encoders[i].fit_transform(column))

    X = list(zip(*encoded_data))  # Features
    y = result  # Labels


    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    classifier = svm.SVC()
    classifier.fit(X_train, y_train)
    y_pred = classifier.predict(X_test)

    # Evaluate the classifier
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy: {accuracy}")

    # prediction
    for item in data:

        sample_item = numpy.array(getData()[['tags','orientation']])[-1]
        sample_item = [str(attribute) for attribute in sample_item]

    encoded_item = [label_encoders[i].transform([val])[0] for i, val in enumerate(sample_item)]
    prediction = classifier.predict([encoded_item])[0]
    print(f"Prediction for the sample item: {prediction}")


## Visualisation des Données

In [None]:
dataframe = getData()

criteriaToColumn = {'year':'dateTaken', 'orientation':'orientation'}
criteria = ''
while criteria not in criteriaToColumn :
    criteria = input('criteria :')


categories = ['unknown']
cardinals  = [0]


for i in range(len(dataframe.index)):
    value = str(dataframe._get_value(i, criteriaToColumn[criteria]))

    if value != 'None':
        if criteria == 'year':
            value = value.split(':')[0].strip()
        elif criteria == 'orientation':
            value = {'0':"Portrait", '1':"Landscape"}[value]

        if value in categories:
            cardinals[categories.index(value)] += 1
        else:
            categories.append(value)
            cardinals.append(1)
    else:
        cardinals[0] += 1


plt.bar(categories, cardinals)

In [None]:
dataframe = getData()

imageIndex = randint(0, len(dataframe['label']))

fig, ax = plt.subplots(1,2)

ax[0].pie(dataframe.at[imageIndex,"dProportions"], colors=(numpy.array(dataframe.at[imageIndex, "dColors"])/255))
ax[1].imshow(PIL.Image.open(dataframe.at[imageIndex, "path"]))


## Application

In [None]:
wTab = ipywidgets.widgets.Tab()
currentUser = 'default'


# UsersWidget
wCurrentUser = ipywidgets.widgets.Dropdown(
    options=getUsers()['name'],
    value='default',
    description='Current User :',
    disabled=False
)
def updateCurrentUser(tmp):
    global currentUser
    currentUser = wCurrentUser.value
wCurrentUser.observe(updateCurrentUser)

def updateUserWidget():
    wCurrentUser.options = getUsers()['name']

# Create User Tab
wUsername = ipywidgets.widgets.Text(
    value='',
    placeholder='',
    description='Username:',
    disabled=False   
)
wBtnCreateUser = ipywidgets.widgets.Button(
    description='Create',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Creates a new user',
    icon='check' # (FontAwesome names without the `fa-` prefix)
)

def createUser(tmp):
    newUser = wUsername.value
    addUser(newUser)
    updateUserWidget()


wBtnCreateUser.on_click(createUser)

wBoxCreateUser = ipywidgets.HBox([wUsername,wBtnCreateUser])

# Random Images Tab
dataframe = getData()
imagePaths = dataframe['path']

wBtnGetRnd = ipywidgets.widgets.Button(description="Get new random Images")
wBtnSelect = ipywidgets.widgets.Button(description="Validate Likes")



rowArray = [wBtnGetRnd]
wCheckArray = []
wImgLayout = ipywidgets.Layout(width = '60%')
cellLayout = ipywidgets.Layout(width = '50%', border = '3px')
def getRandomImages(tmp):
    cellArray = []

    global rowArray
    global wCheckArray

    rowArray = [wBtnGetRnd]
    wCheckArray = []

    for i,path in enumerate(imagePaths[:7]):
        with open(path, "rb") as f :
            wImg   = ipywidgets.widgets.Image(value=f.read(), layout = wImgLayout)
            wCheckArray.append(ipywidgets.Checkbox())
            cellArray.append(ipywidgets.HBox([wImg, wCheckArray[-1]], layout = cellLayout))
        if (i+1)%3 == 0:
            rowArray.append(ipywidgets.HBox(cellArray))
            cellArray = []
    rowArray.append(wBtnSelect)
    updateTab(rowArray)

wBtnGetRnd.on_click(getRandomImages)


def get_likes(btn):
    likedImagePaths    = [imagePaths[i] for i, wCheck in enumerate(wCheckArray) if wCheck.value]
    dislikedImagePaths = [path for path in imagePaths if(path not in likedImagePaths)]

    likedImages    = [dataframe.index[dataframe['path'] == path].tolist()[0] for path in likedImagePaths]
    dislikedImages = [dataframe.index[dataframe['path'] == path].tolist()[0] for path in dislikedImagePaths]
    
    
    usersDf = getUsers()
    currentUserIndex = usersDf.index[usersDf['name'] == currentUser].tolist()[0]
    usersDf['likes'][currentUserIndex] = likedImages
    usersDf['dislikes'][currentUserIndex] = dislikedImages
    saveUsers(usersDf)
    print(usersDf)


wBtnSelect.on_click(get_likes)


def updateTab(rowArray):
    wTab.children = [wBoxCreateUser, ipywidgets.VBox(rowArray)]
    wTab.titles   = ['User', 'Random Images']

    return ipywidgets.VBox([wCurrentUser, wTab])


updateTab(rowArray)

