In [35]:
import pandas as pd
import numpy as np
import tensorflow as tf
import requests
from PIL import Image
from io import BytesIO

from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.preprocessing import image
from sklearn.neighbors import NearestNeighbors


In [36]:
# Charger le modèle VGG16 pré-entraîné
model = VGG16(weights='imagenet', include_top=False, pooling='avg')

In [50]:
# Fonction pour télécharger et prétraiter une image à partir d'un lien
def download_and_preprocess_image(img_url):
    response = requests.get(img_url)
    img = Image.open(BytesIO(response.content))
    img = img.resize((224, 224))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    features = model.predict(x)
    return features.flatten()

def search_image(query_image_path, features_list, image_paths, k=5):
    query_features = download_and_preprocess_image(query_image_path)
    knn = NearestNeighbors(n_neighbors=k, algorithm='auto', metric='euclidean').fit(features_list)
    distances, indices = knn.kneighbors([query_features])
    return [image_paths[i] for i in indices[0]]

In [38]:
data = pd.read_csv('datasets/books_preprocessed.csv')
data = data.dropna(subset=['image'])
data.head()

Unnamed: 0,title,description,authors,image,previewLink,publisher,publishedDate,infoLink,categories,ratingsCount
0,Dr. Seuss: American Icon,Philip Nel takes a fascinating look into the k...,Philip Nel,http://books.google.com/books/content?id=IjvHQ...,http://books.google.nl/books?id=IjvHQsCn_pgC&p...,A&C Black,2005-01-01,http://books.google.nl/books?id=IjvHQsCn_pgC&d...,Biography & Autobiography,
1,Wonderful Worship in Smaller Churches,This resource includes twelve principles in un...,David R. Ray,http://books.google.com/books/content?id=2tsDA...,http://books.google.nl/books?id=2tsDAAAACAAJ&d...,,2000,http://books.google.nl/books?id=2tsDAAAACAAJ&d...,Religion,
2,Whispers of the Wicked Saints,Julia Thomas finds her life spinning out of co...,Veronica Haddon,http://books.google.com/books/content?id=aRSIg...,http://books.google.nl/books?id=aRSIgJlq6JwC&d...,iUniverse,2005-02,http://books.google.nl/books?id=aRSIgJlq6JwC&d...,Fiction,
3,The Church of Christ: A Biblical Ecclesiology ...,In The Church of Christ: A Biblical Ecclesiolo...,Everett Ferguson,http://books.google.com/books/content?id=kVqRa...,http://books.google.nl/books?id=kVqRaiPlx88C&p...,Wm. B. Eerdmans Publishing,1996,http://books.google.nl/books?id=kVqRaiPlx88C&d...,Religion,5.0
4,Saint Hyacinth of Poland,The story for children 10 and up of St. Hyacin...,Mary Fabyan Windeatt,http://books.google.com/books/content?id=lmLqA...,http://books.google.nl/books?id=lmLqAAAACAAJ&d...,Tan Books & Pub,2009-01-01,http://books.google.nl/books?id=lmLqAAAACAAJ&d...,Biography & Autobiography,


In [39]:
data.isnull().sum()

title                0
description          0
authors              1
image                0
previewLink          0
publisher        14189
publishedDate      337
infoLink             0
categories        4227
ratingsCount     91309
dtype: int64

In [40]:
data_book = data.loc[:, ["image", "title"]].head(20)
data_book

Unnamed: 0,image,title
0,http://books.google.com/books/content?id=IjvHQ...,Dr. Seuss: American Icon
1,http://books.google.com/books/content?id=2tsDA...,Wonderful Worship in Smaller Churches
2,http://books.google.com/books/content?id=aRSIg...,Whispers of the Wicked Saints
3,http://books.google.com/books/content?id=kVqRa...,The Church of Christ: A Biblical Ecclesiology ...
4,http://books.google.com/books/content?id=lmLqA...,Saint Hyacinth of Poland
6,http://books.google.com/books/content?id=o7izA...,Muslim Womens Choices: Religious Belief and So...
7,http://books.google.com/books/content?id=iTueu...,Dramatica for Screenwriters
8,http://books.google.com/books/content?id=tX1Is...,Mensa Number Puzzles (Mensa Word Games for Kids)
9,http://books.google.com/books/content?id=GgnrB...,Vector Quantization and Signal Compression (Th...
10,http://books.google.com/books/content?id=EzxOD...,A husband for Kutani


In [41]:
features_list = []
image_paths = []
for index, row in data_book.iterrows(): 
    features = download_and_preprocess_image(row['image'])
    features_list.append(features)
    image_paths.append(row['title'])
# np.array(features_list), image_paths

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 844ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 509ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 635ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 536ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 585ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 541ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 513ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 491ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 479ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 509ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 435ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 573ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 449ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

In [42]:
a = np.array(features_list)
np.savetxt("features_list.txt", a, fmt='%d')

In [46]:
np.array(features_list)

array([[0.0000000e+00, 2.3561492e+00, 0.0000000e+00, ..., 8.5208712e+00,
        1.3602154e+00, 0.0000000e+00],
       [1.4307231e-01, 0.0000000e+00, 7.1839581e+00, ..., 9.0615757e-02,
        8.7251562e-01, 1.3606985e+01],
       [6.8885040e-01, 3.1158584e-01, 6.0608830e+00, ..., 5.8218712e-01,
        7.1702042e+00, 2.8416789e+00],
       ...,
       [0.0000000e+00, 1.5174603e-01, 0.0000000e+00, ..., 0.0000000e+00,
        2.7549262e+00, 1.0081217e-02],
       [1.8900478e-01, 0.0000000e+00, 0.0000000e+00, ..., 2.1712349e-01,
        1.5772735e+00, 0.0000000e+00],
       [1.0783390e+00, 1.5851553e-01, 5.1928407e-01, ..., 3.2532740e-01,
        3.2276610e-01, 1.4069693e+00]], dtype=float32)

In [43]:
print(image_paths)

['Dr. Seuss: American Icon', 'Wonderful Worship in Smaller Churches', 'Whispers of the Wicked Saints', 'The Church of Christ: A Biblical Ecclesiology for Today', 'Saint Hyacinth of Poland', 'Muslim Womens Choices: Religious Belief and Social Reality (Cross Cultural Perspectives on Women)', 'Dramatica for Screenwriters', 'Mensa Number Puzzles (Mensa Word Games for Kids)', 'Vector Quantization and Signal Compression (The Springer International Series in Engineering and Computer Science)', 'A husband for Kutani', 'The Ultimate Guide to Law School Admission: Insider Secrets for Getting a "Big Envelope" with Your Acceptance to Law School!', 'The Repeal of Reticence: A History of Americas Cultural and Legal Struggles over Free Speech, Obscenity, Sexual Liberation, and Modern Art', 'Overcoming Hypertension (Dr. Kenneth H. Coopers Preventive Medicine Program)', 'Alaska Sourdough', 'The Oxford Handbook of Free Will (Oxford Handbooks)', 'Eyewitness Travel Guide to Europe', 'Hunting The Hard Way'

In [44]:
import marshal
marshal.dump(image_paths, open("image_paths", 'wb'))

1066

In [52]:

query_image_path = 'https://books.google.com/books/content?id=IjvHQsCn_pgC&printsec=frontcover&img=1&zoom=1&edge=curl&source=gbs_api'
similar_images = search_image(query_image_path, features_list, image_paths)

print("Images similaires trouvées :")
for img_path in similar_images:
    print(img_path)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 584ms/step
Images similaires trouvées :
Dr. Seuss: American Icon
Vector Quantization and Signal Compression (The Springer International Series in Engineering and Computer Science)
The Church of Christ: A Biblical Ecclesiology for Today
Whispers of the Wicked Saints
The Forbidden Stories of Marta Veneranda
