In [1]:
# Importa le librerie
import os
import numpy as np
import pandas as pd
from PIL import Image
from sklearn.cluster import KMeans
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.models import Model
import matplotlib.pyplot as plt

Collecting lime
  Downloading lime-0.2.0.1.tar.gz (275 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m275.7/275.7 kB[0m [31m16.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: lime
  Building wheel for lime (setup.py) ... [?25l[?25hdone
  Created wheel for lime: filename=lime-0.2.0.1-py3-none-any.whl size=283834 sha256=b90c0f80096642f3aedfcc26389601e8713ede0e559cdacf900e01c754825758
  Stored in directory: /root/.cache/pip/wheels/e7/5d/0e/4b4fff9a47468fed5633211fb3b76d1db43fe806a17fb7486a
Successfully built lime
Installing collected packages: lime
Successfully installed lime-0.2.0.1


In [None]:
# Se esiste il file salvato, carica le feature
if os.path.exists("feature_matrix.npy"):
    feature_matrix = np.load("feature_matrix.npy")
    print("✅ Feature caricate:", feature_matrix.shape)
else:
    print("⚠️ Nessun file .npy trovato, verranno rielaborate le immagini.")

In [None]:
from google.colab import files
uploaded = files.upload()

In [3]:
import zipfile

# Percorso del file zip
zip_path = "/opere.zip"

# Percorso di estrazione
extract_path = "./opere"

# Estrai il contenuto
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

print("Cartella opere estratta correttamente.")

Cartella opere estratta correttamente.


In [4]:
# Percorso locale alla cartella immagini
IMAGE_DIR = "./opere"
TARGET_SIZE = (224, 224)  # Dimensione standard per VGG16

In [5]:
import os

# Stampa i primi 5 file trovati nella cartella
print("File trovati nella cartella opere:")
print(os.listdir(IMAGE_DIR)[:5])

File trovati nella cartella opere:
['37344.jpg', '47446.jpg', '100474.jpg', '7768.jpg', '32561.jpg']


In [6]:
# Carica il modello VGG16 pre-addestrato
base_model = VGG16(weights='imagenet', include_top=False)

# Estrazione delle feature dallo strato 'block5_pool'
model = Model(inputs=base_model.input, outputs=base_model.get_layer('block5_pool').output)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m58889256/58889256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [7]:
def extract_features(img_path, model):
    try:
        img = Image.open(img_path).resize(TARGET_SIZE)
        img_array = np.array(img)
        img_array = np.expand_dims(img_array, axis=0)
        img_array = preprocess_input(img_array)
        features = model.predict(img_array, verbose=0)
        features = features.mean(axis=(1, 2)).flatten()
        return features
    except Exception as e:
        print(f"Errore con {img_path}: {e}")
        return None


In [8]:
all_features = []
image_filenames = []

print("Inizio estrazione delle feature...")

counter = 0  # contatore immagini elaborate

for filename in os.listdir(IMAGE_DIR):
    if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
        file_path = os.path.join(IMAGE_DIR, filename)
        features = extract_features(file_path, model)
        if features is not None:
            all_features.append(features)
            image_filenames.append(filename)
            counter += 1

            # Messaggio ogni 100 immagini
            if counter % 100 == 0:
                print(f"{counter} immagini elaborate...")

# Conversione finale in matrice
feature_matrix = np.array(all_features)

print(f"\nEstrazione completata: {len(image_filenames)} immagini.")
print(f"Dimensione matrice delle feature: {feature_matrix.shape}")

Inizio estrazione delle feature...
100 immagini elaborate...
200 immagini elaborate...
300 immagini elaborate...
400 immagini elaborate...
500 immagini elaborate...
600 immagini elaborate...
700 immagini elaborate...
800 immagini elaborate...
900 immagini elaborate...
1000 immagini elaborate...

Estrazione completata: 1000 immagini.
Dimensione matrice delle feature: (1000, 512)


In [None]:
np.save("feature_matrix.npy", feature_matrix)
print("✅ Feature salvate in feature_matrix.npy")

In [None]:
from sklearn.cluster import KMeans
import pandas as pd

K = 5  # Numero di cluster
print(f"Inizio clustering con K = {K}...")

kmeans = KMeans(n_clusters=K, random_state=42, n_init=10)
cluster_labels = kmeans.fit_predict(feature_matrix)

print("Clustering completato.")

# Salva i risultati in un file CSV
results_df = pd.DataFrame({
    'Filename': image_filenames,
    'Cluster': cluster_labels
})
results_df.to_csv("cluster_results.csv", index=False)
print("✅ Risultati salvati in cluster_results.csv")


In [None]:
results_df = pd.DataFrame({
    'Filename': image_filenames,
    'Cluster': cluster_labels
})

print("Distribuzione dei cluster:")
print(results_df['Cluster'].value_counts())

# 📊 Visualizzazione della distribuzione dei cluster
results_df['Cluster'].value_counts().sort_index().plot(kind='bar', color='skyblue')
plt.title("Distribuzione delle opere nei cluster")
plt.xlabel("Cluster")
plt.ylabel("Numero di immagini")
plt.grid(True)
plt.show()


# Visualizza le prime 5 immagini di un cluster
cluster_id_to_show = 0
cluster_images = results_df[results_df['Cluster'] == cluster_id_to_show]['Filename'].tolist()

fig, axes = plt.subplots(1, min(5, len(cluster_images)), figsize=(15, 5))
fig.suptitle(f"Cluster {cluster_id_to_show}", fontsize=16)

for i, filename in enumerate(cluster_images[:5]):
    img_path = os.path.join(IMAGE_DIR, filename)
    img = Image.open(img_path)
    axes[i].imshow(img)
    axes[i].set_title(filename[:15] + '...')
    axes[i].axis('off')

plt.show()

In [None]:
# Seleziona un cluster
cluster_id = 0

# Prendi le prime due immagini di quel cluster
selected_images = results_df[results_df['Cluster'] == cluster_id]['Filename'].tolist()[:2]

# Percorsi completi
img1_path = os.path.join(IMAGE_DIR, selected_images[0])
img2_path = os.path.join(IMAGE_DIR, selected_images[1])


In [None]:
from lime import lime_image
from skimage.segmentation import mark_boundaries

# Inizializza LIME per immagini
explainer = lime_image.LimeImageExplainer()


In [None]:
def predict_fn(images):
    images = preprocess_input(images)
    preds = model.predict(images)
    # Simuliamo una classificazione fittizia sommando le feature
    return np.array([np.sum(p) for p in preds]).reshape(-1, 1)


In [None]:
from skimage.io import imread

# Carica l'immagine
img = imread(img1_path)

# Applica LIME
explanation = explainer.explain_instance(img, predict_fn, top_labels=1, hide_color=0, num_samples=1000)

# Visualizza la mappa
temp, mask = explanation.get_image_and_mask(
    label=explanation.top_labels[0],
    positive_only=True,
    hide_rest=False,
    num_features=5,
    min_weight=0.1
)

plt.imshow(mark_boundaries(temp, mask))
plt.title(f"Zone rilevanti in {selected_images[0]}")
plt.axis('off')
plt.show()


In [None]:
img = imread(img2_path)

explanation = explainer.explain_instance(img, predict_fn, top_labels=1, hide_color=0, num_samples=1000)

temp, mask = explanation.get_image_and_mask(
    label=explanation.top_labels[0],
    positive_only=True,
    hide_rest=False,
    num_features=5,
    min_weight=0.1
)

plt.imshow(mark_boundaries(temp, mask))
plt.title(f"Zone rilevanti in {selected_images[1]}")
plt.axis('off')
plt.show()
