In [13]:
from qdrant_client import QdrantClient
from PIL import Image


In [14]:
import pandas as pd

class InstagramPhotoMatcher:
    def __init__(self, data_path, origin_df):
        self.data_path = data_path
        self.origin_df = origin_df
        self.train_files = os.listdir(data_path)
        self.df_matching = self.create_matching_dataframe()
        
        self.processor = AutoImageProcessor.from_pretrained("microsoft/resnet-50")
        self.model = ResNetForImageClassification.from_pretrained("microsoft/resnet-50")
        
        self.collection_name = 'instagram_data_new'
        self.collection = qdrant_client.create_collection(
            collection_name=self.collection_name,
            vectors_config=VectorParams(
                size=1000,
                distance=Distance.COSINE
            ),
        )

    def create_matching_dataframe(self):
        matching = []
        for i in range(len(self.origin_df)):
            post_date = self.origin_df['post_date'][i]
            for file in self.train_files:
                if file.endswith('.jpg') and post_date == file[:19]:
                    image_path = os.path.join(self.data_path, file)
                    instagram_user = self.origin_df['username'][i]
                    full_name = self.origin_df['full_name'][i]
                    bio = self.origin_df['biography'][i]
                    caption = self.origin_df['post_caption'][i]
                    link = self.origin_df['post_url'][i]
                    matching.append({
                        'instagram_user': instagram_user,
                        'image_path': image_path.replace("\\", '/'),
                        'link': link,
                        'full_name': full_name,
                        'class': 0,
                        'bio': bio,
                        'caption': caption,
                    })
        return pd.DataFrame(matching)
    
    def resize_image(self, image, target_width=256):
        image_aspect_ratio = image.size[0] / image.size[1]
        resized_img = image.resize(
            [target_width, math.floor(target_width * image_aspect_ratio)]
        )
        return resized_img

    def convert_image_to_base64(self, image):
        buffered = BytesIO()
        image.save(buffered, format="JPEG")
        return base64.b64encode(buffered.getvalue()).decode("utf-8")

    def upload_item(self, idx):
        if idx >= len(self.df_matching):
            raise IndexError("Index out of range")
        
        item_data = self.__getitem__(idx)
        row = item_data['row']
        encoded_image = item_data['encoded_image']
        logits = item_data['logits']
    
        bio = row['bio'][:512] if isinstance(row['bio'], str) else ''
        
        points = [
            PointStruct(
                id=idx,
                payload={
                    'instagram_user': row['instagram_user'],
                    'image_path': row['image_path'],
                    'link': row['link'],
                    'full_name': row['full_name'],
                    'bio': bio,
                    'class': row['class'],
                    'encoded_image': encoded_image,
                },
                vector=logits.flatten().tolist()
            )
        ]
        
        print(qdrant_client.upsert(
            collection_name=self.collection_name,
            points=points
        ))


    def __getitem__(self, idx):
        if idx >= len(self.df_matching):
            raise IndexError("Index out of range")
        
        row = self.df_matching.iloc[idx]
        image = Image.open(row['image_path'])
        image = image.convert('RGB')
        image = self.resize_image(image)
        encoded_image = self.convert_image_to_base64(image)
        
        inputs = self.processor(images=image, return_tensors="pt")
        outputs = self.model(**inputs)
        
        return {
            'row': row.to_dict(),
            'image': image,
            'encoded_image': encoded_image,
            'logits': outputs.logits
        }

    
    def __len__(self):
        return len(self.df_matching)
        
data_path = '../Data/instagram_photo'
df_origin = pd.read_excel('../Data/instagram_posts.xlsx')
df_origin['post_date'] = df_origin['post_date'].dt.strftime('%Y-%m-%d_%H-%M-%S')
matcher = InstagramPhotoMatcher(data_path, df_origin)

UnexpectedResponse: Unexpected Response: 409 (Conflict)
Raw response content:
b'{"status":{"error":"Wrong input: Collection `instagram_data_new` already exists!"},"time":0.024640666}'

In [6]:
landmarks = [
    {"name": "Al Faisaliah Tower", "location": "Riyadh, Saudi Arabia"},
    {"name": "Al Masmak Palace", "location": "Riyadh, Saudi Arabia"},
    {"name": "Al Rahmah Mosque", "location": "Jeddah, Saudi Arabia"},
    {"name": "Diriyah", "location": "Riyadh, Saudi Arabia"},
    {"name": "Hegra (also known as Al-Hijr or Madain Salih)", "location": "AlUla, Saudi Arabia"},
    {"name": "Ibrahim Palace", "location": "Al-Hofuf, Al-Ahsa, Saudi Arabia"},
    {"name": "Ithra (King Abdulaziz Center for World Culture)", "location": "Dhahran, Saudi Arabia"},
    {"name": "Jabal AlFil (Elephant Rock)", "location": "AlUla, Saudi Arabia"},
    {"name": "King Abdullah Financial District (KAFD)", "location": "Riyadh, Saudi Arabia"},
    {"name": "Kingdom Tower (also known as Burj Al Mamlaka)", "location": "Riyadh, Saudi Arabia"},
    {"name": "Maraya", "location": "AlUla, Saudi Arabia"},
    {"name": "Nassif House Museum", "location": "Jeddah, Saudi Arabia"},
    {"name": "Quba Mosque", "location": "Medina, Saudi Arabia"},
    {"name": "Riyadh Water Tower", "location": "Riyadh, Saudi Arabia"},
    {"name": "The Clock Towers (also known as Abraj Al Bait)", "location": "Mecca, Saudi Arabia"},
    {"name": "The Kaaba", "location": "Mecca, Saudi Arabia"}
]

In [11]:
import os
import pandas as pd
from io import BytesIO
import math
import base64
from transformers import AutoImageProcessor, ResNetForImageClassification
from qdrant_client.models import VectorParams, Distance, PointStruct
import random

class LandmarksMatcher:
    def __init__(self, data_path):
        self.data_path = data_path
        self.train_files = os.listdir(data_path)
        self.df_matching = self.create_matching_dataframe()
        
        self.processor = AutoImageProcessor.from_pretrained("microsoft/resnet-50")
        self.model = ResNetForImageClassification.from_pretrained("microsoft/resnet-50")
        
        self.collection_name = 'instagram_data_new'
        # self.collection = qdrant_client.create_collection(
        #     collection_name=self.collection_name,
        #     vectors_config=VectorParams(
        #         size=1000,
        #         distance=Distance.COSINE
        #     ),
        # )

    def create_matching_dataframe(self):
        matching = []
        for i, class_name in enumerate(os.listdir(self.data_path)):
            class_path = os.path.join(self.data_path, class_name)
            for image_name in os.listdir(class_path):
                image_path = os.path.join(class_path, image_name)
                matching.append({
                    'landmark': landmarks[i]['name'],
                    'image_path': image_path.replace("\\", '/'),
                    'Location': landmarks[i]['location'],
                    'class': 1,
                })
        return pd.DataFrame(matching)
    
    def resize_image(self, image, target_width=256):
        image_aspect_ratio = image.size[0] / image.size[1]
        resized_img = image.resize(
            [target_width, math.floor(target_width * image_aspect_ratio)]
        )
        return resized_img

    def convert_image_to_base64(self, image):
        buffered = BytesIO()
        image.save(buffered, format="JPEG")
        return base64.b64encode(buffered.getvalue()).decode("utf-8")

    def upload_item(self, idx):
        if idx >= len(self.df_matching):
            raise IndexError("Index out of range")

        item_data = self.__getitem__(idx)
        row = item_data['row']
        logits = item_data['logits']
        points = [
            PointStruct(
                id=idx,
                payload={
                    'landmark': row['landmark'],
                    'class': row['class'],
                    'image_path': row['image_path'],
                    'Location': row['Location'],
                    'encoded_image': item_data['encoded_image'],
                },
                vector=logits.flatten().tolist()
            )
        ]

        print(qdrant_client.upsert(
            collection_name=self.collection_name,
            points=points
        ))


    def __getitem__(self, idx):
        if idx >= len(self.df_matching):
            raise IndexError("Index out of range")
        
        row = self.df_matching.iloc[idx]
        image = Image.open(row['image_path'])
        image = image.convert('RGB')
        image = self.resize_image(image)
        encoded_image = self.convert_image_to_base64(image)
        
        inputs = self.processor(images=image, return_tensors="pt")
        outputs = self.model(**inputs)
        
        return {
            'row': row.to_dict(),
            'image': image,
            'encoded_image': encoded_image,
            'logits': outputs.logits
        }

    
    def __len__(self):
        return len(self.df_matching)
        
data2_path = '../Data/Photos'
matcher2 = LandmarksMatcher(data2_path)