# Projet Machine Learning

## 1 : Download Images

In [1]:
import sys
from SPARQLWrapper import SPARQLWrapper, JSON

endpoint_url = "https://query.wikidata.org/sparql"

# Get cars
query = """SELECT DISTINCT ?item ?itemLabel (SAMPLE(?image) AS ?sampleImage)  
WHERE
{
  ?item wdt:P279 wd:Q1420;
        wdt:P18 ?image.

  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],fr". }
}
GROUP BY ?item ?itemLabel
LIMIT 150
"""


def get_results(endpoint_url, query):
    user_agent = "WDQS-example Python/%s.%s" % (
        sys.version_info[0],
        sys.version_info[1],
    )
    sparql = SPARQLWrapper(endpoint_url, agent=user_agent)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    return sparql.query().convert()


array = []
results = get_results(endpoint_url, query)

print(results)

for result in results["results"]["bindings"]:
    array.append(
        (
            result["item"]["value"],
            result["itemLabel"]["value"],
            result["sampleImage"]["value"],
        )
    )

ModuleNotFoundError: No module named 'SPARQLWrapper'

In [37]:
import pandas as pd
dataframe = pd.DataFrame(array, columns=["id", "nom", "image"])
dataframe = dataframe.astype(
    dtype={"id": "<U200", "nom": "<U200", "image": "<U200"}
)
dataframe

  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (


Unnamed: 0,id,nom,image
0,http://www.wikidata.org/entity/Q4050653,Q4050653,http://commons.wikimedia.org/wiki/Special:File...
1,http://www.wikidata.org/entity/Q111208461,DS E-Tense Performance Concept,http://commons.wikimedia.org/wiki/Special:File...
2,http://www.wikidata.org/entity/Q17073503,Q17073503,http://commons.wikimedia.org/wiki/Special:File...
3,http://www.wikidata.org/entity/Q117232744,Kia Concept EV5,http://commons.wikimedia.org/wiki/Special:File...
4,http://www.wikidata.org/entity/Q113039073,Q113039073,http://commons.wikimedia.org/wiki/Special:File...
...,...,...,...
145,http://www.wikidata.org/entity/Q7976422,Wayne Lifeguard,http://commons.wikimedia.org/wiki/Special:File...
146,http://www.wikidata.org/entity/Q104599203,Q104599203,http://commons.wikimedia.org/wiki/Special:File...
147,http://www.wikidata.org/entity/Q42847657,Q42847657,http://commons.wikimedia.org/wiki/Special:File...
148,http://www.wikidata.org/entity/Q1436782,Ford Contour,http://commons.wikimedia.org/wiki/Special:File...


In [38]:
import requests
import shutil
import os

def download_image(url):
    headers = {"User-Agent": "Mozilla/5.0"}
    request = requests.get(url, allow_redirects=True, headers=headers, stream=True)
    if request.status_code == 200:
        if not os.path.exists('Images'):
            os.makedirs('Images')
        filename = os.path.join('Images', os.path.basename(url))
        with open(filename, "wb") as image:
            request.raw.decode_content = True
            shutil.copyfileobj(request.raw, image)
        return filename
    else:
        return None

In [39]:
dataframe.image.apply(download_image)

0          Images\MPM%20PS%20160%20%2839851917005%29.jpg
1      Images\DS%20E-Tense%20-%20FAI%202017%20%281-3%...
2      Images\Berliet%20%27Dauphine%27%20berline%20ca...
3                 Images\Kia%20EV5%20Concept%20IMG01.jpg
4      Images\1909%20Lane%20Steam%20Cars%20Brochure%2...
                             ...                        
145    Images\Virginia%20Overland%20Bus%20365%201988%...
146    Images\Iveco%20Manticore%20MTV%20prototype%201...
147    Images\WMC%20P%20AS17%20Bentley%20EXP%2012%20S...
148           Images\1998-2000%20Ford%20Contour%20SE.jpg
149    Images\Toyota%20E-AE86%20Sprinter%20Trueno%20G...
Name: image, Length: 150, dtype: object

## On renomme les images

In [40]:
import os

# Chemin du dossier contenant les images
dossier_images = "Images"

# Liste des extensions d'images supportées
extensions_images = [".jpg", ".jpeg", ".png", ".gif", ".bmp"]

# Fonction pour renommer les images
def renommer_images(dossier, prefixe="Image_"):
    # Récupérer la liste des fichiers dans le dossier et les trier
    fichiers = sorted([fichier for fichier in os.listdir(dossier) if any(fichier.lower().endswith(ext) for ext in extensions_images)])
    
    # Compteur pour générer les nouveaux noms
    compteur = 1
    # Parcourir tous les fichiers
    for fichier in fichiers:
        # Construire le nouveau nom du fichier
        nouveau_nom = f"{prefixe}{compteur}{os.path.splitext(fichier)[1]}"
        # Renommer le fichier
        os.rename(os.path.join(dossier, fichier), os.path.join(dossier, nouveau_nom))
        # Incrémenter le compteur
        compteur += 1
        
    print("Images renommées")

# Appeler la fonction pour renommer les images dans le dossier spécifié
renommer_images(dossier_images)


Images renommées


## 2 : Extraire les métadonnées

In [51]:
import os
import json
from PIL import Image

def get_image_info(image_path):
    # Ouvrir l'image avec PIL
    print(image_path)
    with Image.open(image_path) as img:
        # Récupérer les informations demandées
        image_info = {
            "taille": img.size,
            "format": img.format,
            "orientation": "paysage" if img.width > img.height else "portrait" if img.height > img.width else "carre",
            "tags": []
        }
        
        # Vérifier si les informations EXIF existent
        exif_info = img._getexif()
        if exif_info:
            # Ajouter la date de création et le modèle d'appareil photo si disponibles
            image_info["date_creation"] = exif_info.get(36867)  # 36867 est le code EXIF pour la date de création
            image_info["modele_appareil"] = exif_info.get(272)   # 272 est le code EXIF pour le modèle d'appareil photo
    
    return image_info

def get_images_info(directory):
    # Liste des fichiers dans le répertoire
    image_files = os.listdir(directory)

    # Dictionnaire pour stocker les informations de chaque image
    images_info = {}

    # Parcourir chaque fichier image et récupérer les informations
    for image_file in image_files:
        image_path = os.path.join(directory, image_file)
        # Vérifier si le fichier est une image
        if os.path.isfile(image_path) and image_path.lower().endswith(('.jpg', '.jpeg', '.png', '.JPG', '.gif')):
            # Extraire le nom de fichier sans extension
            image_name = os.path.splitext(image_file)[0]
            images_info[image_name] = get_image_info(image_path)
    
    return images_info


# Répertoire contenant les images
directory = "./Images/"

# Récupérer les informations des images
images_info = get_images_info(directory)

# Enregistrer les informations dans un fichier JSON
output_json_file = 'images_data2.json'
with open(output_json_file, 'w') as json_file:
    json.dump(images_info, json_file, indent=4)

print("Les informations des images ont été enregistrées dans le fichier:", output_json_file)


./Images/Image_1.JPG
./Images/Image_10.jpg
./Images/Image_100.jpg
./Images/Image_101.jpg
./Images/Image_102.jpg
./Images/Image_103.jpg
./Images/Image_104.jpg
./Images/Image_105.jpg
./Images/Image_106.jpg
./Images/Image_107.jpg
./Images/Image_108.jpg
./Images/Image_109.jpg
./Images/Image_11.jpg
./Images/Image_110.jpg
./Images/Image_111.JPG
./Images/Image_112.JPG
./Images/Image_113.jpg
./Images/Image_114.jpg
./Images/Image_115.jpg
./Images/Image_116.jpg
./Images/Image_117.JPG
./Images/Image_118.JPG
./Images/Image_119.jpg
./Images/Image_12.jpg
./Images/Image_120.jpg
./Images/Image_121.jpg
./Images/Image_122.jpg
./Images/Image_123.jpg
./Images/Image_124.JPG
./Images/Image_125.jpg
./Images/Image_126.jpg
./Images/Image_127.jpg
./Images/Image_128.jpg
./Images/Image_129.JPG
./Images/Image_13.jpg
./Images/Image_130.jpg
./Images/Image_131.jpeg
./Images/Image_132.jpg
./Images/Image_133.jpg
./Images/Image_134.jpg
./Images/Image_135.jpg
./Images/Image_136.jpg
./Images/Image_137.jpg
./Images/Image_1

## 2 : Étiquetage et annotation

In [83]:
import os

images = []
directory = "./Images"
start_index = 101
end_index = 150

# Fonction pour trier les images par leur numéro
def tri_numerique(nom_image):
    parts = nom_image.split("_")
    if len(parts) >= 2:
        try:
            return int(parts[1].split(".")[0])
        except ValueError:
            return float('inf')  # Si la conversion échoue, renvoie une valeur infinie
    else:
        return float('inf')  # Si le format de nommage n'est pas valide, renvoie une valeur infinie

# Parcourir les fichiers dans le répertoire d'images
for i, file in enumerate(sorted(os.listdir(directory), key=tri_numerique)):
    # Vérifier si l'index de l'image est dans la plage souhaitée
    if start_index <= tri_numerique(file) <= end_index:
        image_path = os.path.join(directory, file)
        # Vérifier si le fichier est une image
        if os.path.isfile(image_path):
            _, extension = os.path.splitext(image_path)
            # Vérifier si le fichier a une extension d'image valide
            if extension.lower() in ['.jpg', '.jpeg', '.png', '.gif']:
                images.append(image_path)

print(images)


['./Images\\Image_101.jpg', './Images\\Image_102.jpg', './Images\\Image_103.jpg', './Images\\Image_104.jpg', './Images\\Image_105.jpg', './Images\\Image_106.jpg', './Images\\Image_107.jpg', './Images\\Image_108.jpg', './Images\\Image_109.jpg', './Images\\Image_110.jpg', './Images\\Image_111.jpg', './Images\\Image_112.jpg', './Images\\Image_113.jpg', './Images\\Image_114.jpg', './Images\\Image_115.jpg', './Images\\Image_116.jpg', './Images\\Image_117.jpg', './Images\\Image_118.jpg', './Images\\Image_119.jpg', './Images\\Image_120.jpg', './Images\\Image_121.jpg', './Images\\Image_122.jpg', './Images\\Image_123.jpg', './Images\\Image_124.jpg', './Images\\Image_125.jpg', './Images\\Image_126.jpg', './Images\\Image_127.jpg', './Images\\Image_128.jpg', './Images\\Image_129.jpg', './Images\\Image_130.jpg', './Images\\Image_131.jpg', './Images\\Image_132.jpg', './Images\\Image_133.jpg', './Images\\Image_134.jpg', './Images\\Image_135.jpg', './Images\\Image_136.jpg', './Images\\Image_137.jpg', 

In [26]:
from ipywidgets import GridspecLayout, Image, interact, widgets
import json
paths = []

checkboxes = [widgets.Checkbox(value=False, description='Favorite') for _ in range(len(images))]

# Create the GridspecLayout widget
layout = GridspecLayout(n_columns=2, n_rows=len(images), width='400px')
for i, (img, checkbox) in enumerate(zip(images, checkboxes)):
  file = open(img, "rb")
  image = file.read()
  image_widget = widgets.Image(
    value=image,
    width=100,
    height=100,
  )
  layout[i,0] = image_widget
  layout[i, 1] = checkbox

# Button to get selected images
button = widgets.Button(description="Select")

# Output widget to display selected images
output = widgets.Output()

# Function to get selected images
def get_selected_images(btn):
    selected_images = [os.path.splitext(os.path.basename(images[i]))[0] for i, checkbox in enumerate(checkboxes) if checkbox.value]
    print(selected_images)
    with open("images_data.json", "r+") as f:
        data = json.load(f)
        for image_name in selected_images:
            if "tags" in data[image_name]:
                # Vérifier si le champ "tags" existe déjà
                if "truck" not in data[image_name]["tags"]:
                    # Ajouter "sport" uniquement si ce n'est pas déjà présent
                    data[image_name]["tags"].append("truck")
            else:
                # Si le champ "tags" n'existe pas, le créer avec "sport"
                data[image_name]["tags"] = ["truck"]
        f.seek(0)
        json.dump(data, f, indent=4)
        f.truncate()
        

# Link button click event to function
button.on_click(get_selected_images)

# Display the layout and button
display(layout, button, output)

GridspecLayout(children=(Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x02\x01\x00\xb4\x00\xb4\x00\x00\xf…

Button(description='Select', style=ButtonStyle())

Output()

['Image_45']


## Extraction des couleurs de chaque image

### Insertion du champ colors dans le JSON

In [77]:
import json

# Ajout du champ color à chaque Image
with open("images_data.json", "r+") as f:
        data = json.load(f)
        for imageClr in data:
            if "colors" in data[imageClr]:
                print("Déjà Présent")
            else:
                # Si le champ "colors" n'existe pas, le créer avec "sport"
                data[imageClr]["colors"] = []
        f.seek(0)
        json.dump(data, f, indent=4)
        f.truncate()


### Fonction pour obtenir les couleurs au format rgb

In [48]:
from PIL import Image as PILImage
import numpy
import math
from scipy.spatial import KDTree
from matplotlib import pyplot as plt, image as mpimg
from sklearn.cluster import KMeans

def getColorsFromImage(self):
        colors = []
        imgfile = PILImage.open(self)
        imgfile = imgfile.resize((300, 300))  # optional, to reduce time
        nb_clusters = 5
        numarray = numpy.array(imgfile.getdata(), numpy.uint8)
        if type(numarray[0]) == numpy.uint8:
            # for each pixel, convert to a 3-tuple of ints
            numarray = numpy.array([[numarray[i], numarray[i], numarray[i]] for i in range(0, len(numarray))])

        clusters = KMeans(n_clusters=nb_clusters, n_init=2)
        # if image is black and white
        # if len(numarray[0]) == 2:
        clusters.fit(numarray)
        npbins = numpy.arange(0, nb_clusters + 1)
        histogram = numpy.histogram(clusters.labels_, bins=npbins)
        labels = numpy.unique(clusters.labels_)
        for i in range(nb_clusters):
            colorRGB = (
                math.ceil(clusters.cluster_centers_[i][0]),
                math.ceil(clusters.cluster_centers_[i][1]),
                math.ceil(clusters.cluster_centers_[i][2]),
            )
            colors.append(colorRGB)
        return colors
        

### Fonction pour obtenir le nom d'une couleur à partir de sa valeur rgb

In [59]:
import webcolors
from webcolors import CSS3_NAMES_TO_HEX, rgb_to_name

def convert_rgb_to_names(rgb_tuple):
        # a dictionary of all the hex and their respective names in css3
        css3_db = CSS3_NAMES_TO_HEX
        names = []
        rgb_values = []
        for color_name, color_hex in css3_db.items():
            names.append(color_name)
            rgb_values.append(hex_to_rgb(color_hex))

        kdt_db = KDTree(rgb_values)
        distance, index = kdt_db.query(rgb_tuple)
        return names[index]


### On ajoute les noms des Images dans le JSON

In [84]:
import json

with open("images_data.json", "r+") as f:
    data = json.load(f)
    for img in images:
        names = []
        image_name = os.path.splitext(os.path.basename(img))[0]
        rgb_colors = getColorsFromImage(img)
        for color in rgb_colors:
            color_name = convert_rgb_to_names(color)
            names.append(color_name)
        print(names)
        if "colors" in data[image_name]:
            # Ajouter des couleurs
            data[image_name]["colors"].append(names)
        else:
            print("Error : pas de champ colors")
    f.seek(0)
    json.dump(data, f, indent=4)
    f.truncate()

[(164, 157, 154), (102, 96, 126), (54, 53, 61), (231, 217, 170), (40, 24, 159)]
['darkgrey', 'dimgray', 'darkslategray', 'palegoldenrod', 'midnightblue']
[(152, 152, 152), (25, 25, 25), (117, 117, 117), (189, 189, 189), (73, 73, 73)]
['darkgrey', 'black', 'gray', 'silver', 'darkslategray']
[(62, 53, 57), (233, 224, 218), (20, 17, 23), (114, 108, 109), (176, 164, 159)]
['darkslategray', 'gainsboro', 'black', 'dimgray', 'darkgrey']
[(104, 105, 74), (169, 180, 114), (232, 209, 205), (239, 87, 88), (55, 38, 26)]
['dimgray', 'darkkhaki', 'lightgray', 'tomato', 'darkslategray']
[(90, 82, 84), (126, 128, 131), (191, 91, 104), (57, 48, 50), (200, 190, 197)]
['dimgray', 'gray', 'indianred', 'darkslategray', 'silver']
[(85, 84, 85), (219, 224, 226), (30, 29, 30), (171, 175, 177), (122, 123, 124)]
['dimgray', 'gainsboro', 'black', 'darkgrey', 'gray']
[(150, 48, 33), (181, 178, 170), (67, 56, 49), (141, 114, 106), (252, 251, 250)]
['brown', 'darkgrey', 'darkslategray', 'gray', 'snow']
[(213, 217, 