In [38]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.preprocessing import image
import numpy as np
import faiss
import requests
from PIL import Image
from io import BytesIO


In [39]:
data = pd.read_csv('datasets/books_data.csv')

In [40]:
# Charger le modèle VGG16 pré-entraîné
model = VGG16(weights='imagenet', include_top=False, pooling='avg')

In [75]:

# Fonction pour télécharger et prétraiter une image à partir d'un lien
def download_and_preprocess_image(img_url):
    response = requests.get(img_url)
    img = Image.open(BytesIO(response.content))
    img = img.resize((224, 224))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    return x

# Fonction pour extraire les caractéristiques d'une image
def extract_features(img_url):
    img = download_and_preprocess_image(img_url)
    features = model.predict(img)
    return features.flatten()

# Exemple d'extraction de caractéristiques pour une image à partir d'un lien
img_url = data.image[4]
# features = extract_features(img_url)
# print(features)




In [42]:
# Indexation avec FAISS
d = features.shape[0]  # Dimension des vecteurs de caractéristiques
index = faiss.IndexFlatL2(d)  # Index L2
index.add(np.array([features]))  # Ajouter les caractéristiques à l'index

In [43]:
type(features)

numpy.ndarray

In [49]:
data.image

0         http://books.google.com/books/content?id=DykPA...
1         http://books.google.com/books/content?id=IjvHQ...
2         http://books.google.com/books/content?id=2tsDA...
3         http://books.google.com/books/content?id=aRSIg...
4                                                       NaN
                                ...                        
212399    http://books.google.com/books/content?id=J7M-N...
212400    http://books.google.com/books/content?id=3n8k6...
212401                                                  NaN
212402    http://books.google.com/books/content?id=JM6YV...
212403    http://books.google.com/books/content?id=dehfP...
Name: image, Length: 212404, dtype: object

In [62]:
data = pd.read_csv('books_preprocessed.csv')

In [64]:
data = data.dropna(subset=['image'])

In [73]:
for img in data['image'].head(20).values:
    print(img)

http://books.google.com/books/content?id=IjvHQsCn_pgC&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api
http://books.google.com/books/content?id=2tsDAAAACAAJ&printsec=frontcover&img=1&zoom=1&source=gbs_api
http://books.google.com/books/content?id=aRSIgJlq6JwC&printsec=frontcover&img=1&zoom=1&source=gbs_api
http://books.google.com/books/content?id=kVqRaiPlx88C&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api
http://books.google.com/books/content?id=lmLqAAAACAAJ&printsec=frontcover&img=1&zoom=1&source=gbs_api
http://books.google.com/books/content?id=o7izAAAAIAAJ&printsec=frontcover&img=1&zoom=1&source=gbs_api
http://books.google.com/books/content?id=iTueuAAACAAJ&printsec=frontcover&img=1&zoom=1&source=gbs_api
http://books.google.com/books/content?id=tX1IswEACAAJ&printsec=frontcover&img=1&zoom=1&source=gbs_api
http://books.google.com/books/content?id=GgnrBwAAQBAJ&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api
http://books.google.com/books/content?id=EzxODwAAQBA

In [90]:
for img in data['image'].head(20).values:
    features = extract_features(img)
    a = index.add(np.array([features]))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 479ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 464ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 460ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 564ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 469ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 447ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 455ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 510ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 638ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 452ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 624ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 434ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 432ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

In [91]:
print(np.size(a))

1


In [85]:
print(data.image[22])

http://books.google.com/books/content?id=7mqCDwAAQBAJ&printsec=frontcover&img=1&zoom=1&source=gbs_api


In [86]:

# # Fonction pour ajouter des images à l'index
# def add_images_to_index(image_urls):
#     for url in image_urls:
#         features = extract_features(url)
#         index.add(np.array([features]))

# Exemple d'ajout de plusieurs images à l'index

# image_urls = [
#     'https://example.com/path/to/cover_image1.jpg',
#     'https://example.com/path/to/cover_image2.jpg',
#     'https://example.com/path/to/cover_image3.jpg'
# ]
# Fonction pour comparer une image de référence avec les images indexées
def compare_image(reference_img_url, k=3):
    reference_features = extract_features(reference_img_url)
    D, I = index.search(np.array([reference_features]), k)  # Recherche des k plus proches voisins
    return I, D

# Exemple de comparaison avec une image de référence
reference_img_url = data.image[22]
indices, distances = compare_image(reference_img_url)

print("Indices des images similaires:", indices)
print("Distances:", distances)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 530ms/step
Indices des images similaires: [[29 19 54]]
Distances: [[   0.     5440.7183 5440.7183]]
