In [1]:
pip install qdrant-client scikit-learn numpy matplotlib


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m25.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip3 install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [2]:
import numpy as np
import matplotlib.pyplot as plt
from qdrant_client import QdrantClient
from qdrant_client.models import PointStruct, VectorParams, Distance
from sklearn.ensemble import IsolationForest

In [3]:
# Set random seed for reproducibility
np.random.seed(42)

# Generate normal embeddings (centered around 0.5)
normal_data = np.random.normal(loc=0.5, scale=0.1, size=(490, 128))

# Generate anomalies (farther from the normal cluster)
anomalies = np.random.normal(loc=1.5, scale=0.3, size=(10, 128))

# Combine normal and anomalous data
data = np.vstack([normal_data, anomalies])

# Print data shape
print(f"Generated {data.shape[0]} vectors of dimension {data.shape[1]}")

Generated 500 vectors of dimension 128


In [4]:
# Connect to Qdrant (Assuming running locally)
client = QdrantClient("http://localhost:6333")

# Create a collection (if it doesn't exist)
collection_name = "Stronghold"

client.recreate_collection(
    collection_name=collection_name,
    vectors_config=VectorParams(size=128, distance=Distance.COSINE)
)

# Insert data into Qdrant
points = [
    PointStruct(id=i, vector=vector.tolist(), payload={"label": "unknown"})
    for i, vector in enumerate(data)
]
client.upsert(collection_name=collection_name, points=points)

print(f"Inserted {len(points)} vectors into Qdrant.")

Inserted 500 vectors into Qdrant.


  client.recreate_collection(


In [6]:
print(client.get_collections())

collections=[CollectionDescription(name='Stronghold'), CollectionDescription(name='hnm'), CollectionDescription(name='hnm_full'), CollectionDescription(name='mtg')]


In [7]:
collection_info = client.get_collection(collection_name="Stronghold")
print("Number of vectors in Qdrant:", collection_info.points_count)

Number of vectors in Qdrant: 500


In [8]:
print(client.count(collection_name="Stronghold"))

count=500


In [11]:
retrieved_points = client.scroll(
    collection_name="Stronghold", limit=500, with_payload=True
)[0]

print(f"Retrieved {len(retrieved_points)} vectors")

Retrieved 500 vectors


In [14]:
# Train Isolation Forest model
iso_forest = IsolationForest(contamination=0.05, random_state=42)
predictions = iso_forest.fit_predict(data)

# Convert predictions (-1 = anomaly, 1 = normal)
anomaly_labels = ["Wraith" if p == -1 else "Shadows" for p in predictions]

# Count anomalies
print(f"✅ Detected {anomaly_labels.count('Wraith')} anomalies out of {len(data)} vectors.")

✅ Detected 25 anomalies out of 500 vectors.


In [15]:
# Update payloads in Qdrant with anomaly labels
for i, point_id in enumerate(retrieved_ids):
    client.set_payload(
        collection_name="Stronghold",
        points=[point_id],
        payload={"anomaly": anomaly_labels[i]}
    )
print("✅ Updated Qdrant with anomaly labels.")

✅ Updated Qdrant with anomaly labels.


In [21]:
# Update payloads in Qdrant with anomaly labels and image URLs
for i, point_id in enumerate(retrieved_ids):
    # Define image URL based on label
    image_url = "https://i.ibb.co/Q7z72wq3/shadows.png" if anomaly_labels[i] == "Shadows" else "https://i.ibb.co/NnS6DV5z/wraith.png"
    
    client.set_payload(
        collection_name="Stronghold",
        points=[point_id],
        payload={
            "anomaly": anomaly_labels[i],
            "image_url": image_url
        }
    )
print("✅ Updated Qdrant with anomaly labels and image URLs.")

✅ Updated Qdrant with anomaly labels and image URLs.


In [None]:
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

# Reduce dimensions to 2D using PCA
pca = PCA(n_components=2)
data_2d = pca.fit_transform(data)

# Assign colors based on anomaly labels
colors = ["red" if label == "Wraith" else "blue" for label in anomaly_labels]

# Scatter plot
plt.figure(figsize=(10, 8))
plt.scatter(data_2d[:, 0], data_2d[:, 1], c=colors, alpha=0.7)
plt.xlabel("PCA Component 1")
plt.ylabel("PCA Component 2")
plt.title("Anomaly Detection Visualization")
plt.legend(handles=[
    plt.Line2D([0], [0], marker='o', color='w', markerfacecolor='blue', label='Shadow', markersize=10),
    plt.Line2D([0], [0], marker='o', color='w', markerfacecolor='red', label='Wraith', markersize=10)
])
plt.show()