In [None]:
pip install pinecone-client tensorflow

In [None]:
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.preprocessing import image
from pinecone import Pinecone, ServerlessSpec
import numpy as np



# Initialize Pinecone
pinecone = Pinecone(api_key="1efa3b1d-0137-41b1-b7ba-0cbf19382b38")

# Create or connect to a Pinecone index
index_name = 'image-similarity'
# pinecone.delete_index("image-similarity")
if index_name not in pinecone.list_indexes():
    pinecone.create_index(
    name=index_name,
    dimension=2048, # Replace with your model dimensions
    spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1"
    )
  )
index = pinecone.Index(index_name)


In [None]:
def load_and_preprocess_image(img_path):
    img = image.load_img(img_path, target_size=(224, 224))
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    return preprocess_input(img_array)

# Load a pre-trained ResNet50 model
model = ResNet50(weights='imagenet', include_top=False, pooling='avg')

def get_image_embedding(img_path):
    img_array = load_and_preprocess_image(img_path)
    embedding = model.predict(img_array)
    return embedding.flatten()


In [None]:
pip install requests

In [None]:
pip install pillow

In [None]:
import json
import os
def add_image_to_pinecone_with_metadata(index, img_path, img_id, url, domain):
    embedding = get_image_embedding(img_path)
    index.upsert(vectors=[{"id": img_id, "values": embedding.tolist(), "metadata": {"url": url,"domain": domain}}], namespace='legitimateImageDB')

# Function to upsert images in a folder to Pinecone using metadata
def upsert_images_in_folder2(index, metadata_file, folder_path):
    with open(metadata_file, 'r') as file:
        metadata = json.load(file)
    
    # Iterate through the metadata and add images to Pinecone
    for item in metadata:
        img_id = item['image']
        img_path = os.path.join(folder_path, img_id)
        url = item['url']
        domain = item['domain']
        add_image_to_pinecone_with_metadata(index, img_path, img_id, url, domain)
        # add_image_to_pinecone(index, img_path, img_id)



In [None]:
folder_path = 'legitimateImageDB'
# upsert_images_in_folder(index, folder_path)
metadata_file = 'legitimateMetadata.json'
upsert_images_in_folder2(index, metadata_file, folder_path)


In [None]:
def query_similar_images2(query_img_path, top_k=5):
    query_embedding = get_image_embedding(query_img_path)
    query_results = index.query(namespace="legitimateImageDB", vector=[query_embedding.tolist()], top_k=top_k, include_values=True, include_metadata=True)
    return query_results

query_results = query_similar_images2('netflixPhish.png', top_k=3)
# print(query_results)
score = query_results
# print (score)
print(score.matches[0].score)
print(score.matches[0].id)
print(score.namespace)
print(score.matches[0].metadata)
# pinecone.delete_index(index_name)


In [None]:
import time
import requests

# Function to download an image from a URL
def download_image(url, save_path):
    response = requests.get(url)
    result = False
    if response.status_code == 200:
        with open(save_path, 'wb') as f:
            f.write(response.content)
        result = True
        print(f"Image successfully downloaded: {save_path}")
    else:
        print(f"Failed to download image. Status code: {response.status_code}")
    return result

# Function to get the closest image to a query image
def get_closest_image(query_image_url):
    save_path = 'query_image.jpg'
    result = download_image(query_image_url, save_path)
    if not result:
        # Delay for 8 seconds
        time.sleep(8)
        # Download the image again
        result = download_image(query_image_url, save_path)
        if not result:
            time.sleep(2)
            result = download_image(query_image_url, save_path)
    if not result:
        return None


    query_results = query_similar_images2(save_path, top_k=1)
    return (query_results.matches[0].score, query_results.matches[0].metadata.get('url'), query_results.matches[0].metadata.get('domain'))

# query_image_url = 'https://picsum.photos/536/354'
query_image_url = 'http://phish-collector-lb-1814707889.us-east-1.elb.amazonaws.com/screenshots/551f623d-05a9-4356-ba8a-0807ba117232-netflix.png'
closest_image = get_closest_image(query_image_url)
print(closest_image)
