In [None]:
# Importa le librerie
import os
import numpy as np
import pandas as pd
from PIL import Image
from sklearn.cluster import KMeans
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.models import Model
import matplotlib.pyplot as plt
import zipfile
from skimage.segmentation import quickshift
from skimage.segmentation import slic

In [None]:
from google.colab import files
uploaded = files.upload()

In [None]:
zip_path = "opere.zip"
extract_path = "opere"

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

print("✅ Cartella opere estratta correttamente.")

IMAGE_DIR = "./opere/opere"
TARGET_SIZE = (224, 224)

print("📸 File trovati nella cartella opere:")
print(os.listdir(IMAGE_DIR)[:5])


In [None]:
base_model = VGG16(weights='imagenet', include_top=False)
model = Model(inputs=base_model.input, outputs=base_model.get_layer('block5_pool').output)

def extract_features(img_path, model):
    try:
        img = Image.open(img_path).resize(TARGET_SIZE)
        img_array = np.array(img)
        img_array = np.expand_dims(img_array, axis=0)
        img_array = preprocess_input(img_array)
        features = model.predict(img_array, verbose=0)
        features = features.mean(axis=(1, 2)).flatten()
        return features
    except Exception as e:
        print(f"Errore con {img_path}: {e}")
        return None


In [None]:
from google.colab import files
uploaded = files.upload()

In [None]:
if os.path.exists("feature_matrix.npy"):
    feature_matrix = np.load("feature_matrix.npy")
    print("✅ Feature caricate:", feature_matrix.shape)
    image_filenames = os.listdir(IMAGE_DIR)
else:
    print("⚠️ Nessun file .npy trovato, verranno rielaborate le immagini...")

    all_features = []
    image_filenames = []

    print("Inizio estrazione delle feature...")

    counter = 0
    for filename in os.listdir(IMAGE_DIR):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
            file_path = os.path.join(IMAGE_DIR, filename)
            features = extract_features(file_path, model)
            if features is not None:
                all_features.append(features)
                image_filenames.append(filename)
                counter += 1
                if counter % 100 == 0:
                    print(f"{counter} immagini elaborate...")

    feature_matrix = np.array(all_features)
    np.save("feature_matrix.npy", feature_matrix)
    print(f"\n✅ Estrazione completata: {len(image_filenames)} immagini.")
    print(f"✅ Dimensione matrice delle feature: {feature_matrix.shape}")


In [None]:
from sklearn.cluster import KMeans

inertia = []
K_range = range(2, 11)

print("Calcolo dell'inerzia per il Metodo del Gomito...")

for k in K_range:
    kmeans_test = KMeans(n_clusters=k, random_state=42, n_init=10)
    kmeans_test.fit(feature_matrix)
    inertia.append(kmeans_test.inertia_)

plt.figure(figsize=(10, 6))
plt.plot(K_range, inertia, marker='o', linestyle='--')
plt.title('Metodo del Gomito per determinare K')
plt.xlabel('Numero di Cluster (K)')
plt.ylabel('Inerzia (WSS)')
plt.grid(True)
plt.show()

print("Analizza il grafico: il 'gomito' indica il K ottimale.")

In [None]:
K = 5  # Numero di cluster
print(f"Inizio clustering con K = {K}...")

kmeans = KMeans(n_clusters=K, random_state=42, n_init=10)
cluster_labels = kmeans.fit_predict(feature_matrix)

print("Clustering completato.")

# Salva i risultati in un file CSV
results_df = pd.DataFrame({
    'Filename': image_filenames,
    'Cluster': cluster_labels
})
results_df.to_csv("cluster_results.csv", index=False)
print("✅ Risultati salvati in cluster_results.csv")


In [None]:
print("Distribuzione dei cluster:")
print(results_df['Cluster'].value_counts())

# 📊 Visualizzazione della distribuzione dei cluster
results_df['Cluster'].value_counts().sort_index().plot(kind='bar', color='skyblue')
plt.title("Distribuzione delle opere nei cluster")
plt.xlabel("Cluster")
plt.ylabel("Numero di immagini")
plt.grid(True)
plt.show()


In [None]:
from sklearn.manifold import TSNE

print("Inizio la riduzione della dimensionalità con t-SNE...")

tsne = TSNE(n_components=2, random_state=42, perplexity=30, n_iter=300)
features_2d = tsne.fit_transform(feature_matrix)

print("Riduzione completata.")

plt.figure(figsize=(12, 10))
scatter = plt.scatter(
    features_2d[:, 0], features_2d[:, 1],
    c=cluster_labels,
    cmap='viridis',
    alpha=0.6
)

plt.title("Visualizzazione delle Opere d'Arte (t-SNE) colorate per Cluster K-Means")
plt.xlabel("Componente t-SNE 1")
plt.ylabel("Componente t-SNE 2")
plt.colorbar(scatter, label='Cluster ID')
plt.grid(False)
plt.show()


In [None]:
# Visualizza le prime 5 immagini di un cluster
cluster_id_to_show = 0
cluster_images = results_df[results_df['Cluster'] == cluster_id_to_show]['Filename'].tolist()

fig, axes = plt.subplots(1, min(5, len(cluster_images)), figsize=(15, 5))
fig.suptitle(f"Cluster {cluster_id_to_show}", fontsize=16)

for i, filename in enumerate(cluster_images[:5]):
    img_path = os.path.join(IMAGE_DIR, filename)
    img = Image.open(img_path)
    axes[i].imshow(img)
    axes[i].set_title(filename[:15] + '...')
    axes[i].axis('off')

plt.show()

In [None]:
# Seleziona un cluster
cluster_id = 0

# Prendi le prime due immagini di quel cluster
selected_images = results_df[results_df['Cluster'] == cluster_id]['Filename'].tolist()[:2]

# Percorsi completi
img1_path = os.path.join(IMAGE_DIR, selected_images[0])
img2_path = os.path.join(IMAGE_DIR, selected_images[1])


In [None]:
!pip install lime

from lime import lime_image
from skimage.segmentation import mark_boundaries

# Inizializza LIME per immagini
explainer = lime_image.LimeImageExplainer()

from sklearn.preprocessing import normalize  # assicurati che sia importato

def predict_fn_kmeans(images):
    images = preprocess_input(images)
    features = model.predict(images, verbose=0)
    features = features.mean(axis=(1, 2)).reshape(-1, features.shape[-1])
    distances = kmeans.transform(features)
    similarities = 1 / (1 + distances)  # distanza → similarità
    probs = normalize(similarities, norm='l1')  # normalizza come probabilità
    return probs

In [None]:
def genera_mappa_lime(img_path, filename, titolo):
    img = np.array(Image.open(img_path).resize(TARGET_SIZE))
    explanation = explainer.explain_instance(
        img,
        predict_fn_kmeans,
        top_labels=1,
        hide_color=0,
        num_samples=1000,
        segmentation_fn=lambda x: slic(x, n_segments=50, compactness=10, sigma=1)
    )
    temp, mask = explanation.get_image_and_mask(
        label=explanation.top_labels[0],
        positive_only=True,
        hide_rest=False,
        num_features=10,
        min_weight=0.0
    )
    original = Image.open(img_path).resize(TARGET_SIZE)
    fig, axes = plt.subplots(1, 2, figsize=(12, 6))
    axes[0].imshow(original)
    axes[0].set_title(f"Originale ({titolo})")
    axes[0].axis('off')
    axes[1].imshow(temp / 255)
    axes[1].imshow(mask, cmap='jet', alpha=0.5)
    axes[1].set_title("Spiegazione LIME")
    axes[1].axis('off')
    plt.suptitle(f"Analisi Dettagliata LIME: {titolo} (Cluster {cluster_id})", fontsize=16)
    plt.tight_layout(rect=[0, 0.03, 1, 0.90])
    fig.savefig(filename, bbox_inches='tight')
    plt.show()
    plt.close(fig)
    print(f"✅ Mappa LIME salvata come {filename}")
    return temp, mask

In [None]:
lime_filename = f"lime_explanation_{selected_images[0]}.png"
temp1, mask1 = genera_mappa_lime(img1_path, lime_filename, selected_images[0])

In [None]:
lime_filename_2 = f"lime_explanation_{selected_images[1]}.png"
temp2, mask2 = genera_mappa_lime(img2_path, lime_filename_2, selected_images[1])

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(12, 10))

axes[0, 0].imshow(Image.open(img1_path).resize(TARGET_SIZE))
axes[0, 0].set_title(f"Originale {selected_images[0]}")
axes[0, 0].axis('off')

axes[0, 1].imshow(temp1 / 255)
axes[0, 1].imshow(mask1, cmap='jet', alpha=0.5)
axes[0, 1].set_title("LIME 1")
axes[0, 1].axis('off')

axes[1, 0].imshow(Image.open(img2_path).resize(TARGET_SIZE))
axes[1, 0].set_title(f"Originale {selected_images[1]}")
axes[1, 0].axis('off')

axes[1, 1].imshow(temp2 / 255)
axes[1, 1].imshow(mask2, cmap='jet', alpha=0.5)
axes[1, 1].set_title("LIME 2")
axes[1, 1].axis('off')

plt.suptitle("Confronto Completo Originali + Spiegazioni", fontsize=18)
plt.tight_layout()
plt.show()