<a href="https://colab.research.google.com/github/fecsaba/automatic_photo/blob/main/auto_get_meta.ipynb" target="_blank"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Drive**

In [1]:
import os

# Csatoljuk a Google Drive-ot
from google.colab import drive
drive.mount('/content/drive')
!pip install pypng
!pip install Pillow

Mounted at /content/drive
Collecting pypng
  Downloading pypng-0.20220715.0-py3-none-any.whl.metadata (13 kB)
Downloading pypng-0.20220715.0-py3-none-any.whl (58 kB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m58.1/58.1 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pypng
Successfully installed pypng-0.20220715.0


# ***Action***

```
'/content/drive/MyDrive/Images'
```



## Meta kiszed√©se

In [None]:
from PIL import PngImagePlugin, Image
import os
import json

class PngValidator:
    """PNG f√°jlok √©rv√©nyess√©g√©nek ellen≈ërz√©s√©re szolg√°l√≥ oszt√°ly.

    Ez az oszt√°ly felel≈ës a PNG f√°jlok form√°tum√°nak valid√°l√°s√°√©rt. Ellen≈ërzi, hogy egy f√°jl
    val√≥ban √©rv√©nyes PNG form√°tum√∫-e, f√ºggetlen√ºl a f√°jl kiterjeszt√©s√©t≈ël.

    Methods:
        is_valid_png(file_path): Ellen≈ërzi egy f√°jl PNG form√°tum√°t.
    """

    @staticmethod
    def is_valid_png(file_path):
        """Ellen≈ërzi, hogy a f√°jl val√≥ban PNG form√°tum√∫-e.

        A met√≥dus megnyitja a f√°jlt √©s ellen≈ërzi annak bels≈ë form√°tum√°t,
        nem csak a kiterjeszt√©st vizsg√°lja.

        Args:
            file_path (str): A vizsg√°land√≥ f√°jl teljes el√©r√©si √∫tja.

        Returns:
            bool: True ha a f√°jl val√≥di PNG form√°tum√∫, False egy√©bk√©nt.

        Example:
            >>> validator = PngValidator()
            >>> is_valid = validator.is_valid_png("kep.png")
            >>> print(is_valid)
            True
        """
        try:
            with Image.open(file_path) as img:
                return isinstance(img, PngImagePlugin.PngImageFile) and img.format == 'PNG'
        except Exception:
            return False

class PngFileHandler:
    """PNG f√°jlok √©s mapp√°k kezel√©s√©re szolg√°l√≥ oszt√°ly.

    Ez az oszt√°ly felel≈ës a f√°jlrendszer m≈±veletek v√©grehajt√°s√°√©rt,
    bele√©rtve a mapp√°k beolvas√°s√°t √©s a PNG f√°jlok azonos√≠t√°s√°t.

    Attributes:
        directory_path (str): A feldolgozand√≥ mappa el√©r√©si √∫tja.

    Methods:
        set_directory_path(directory_path): Be√°ll√≠tja a feldolgozand√≥ mappa √∫tvonal√°t.
        get_png_files(): √ñsszegy≈±jti az √©rv√©nyes PNG f√°jlokat a megadott mapp√°b√≥l.
    """

    def __init__(self, directory_path=None):
        """Inicializ√°lja a PngFileHandler oszt√°lyt.

        Args:
            directory_path (str, optional): A feldolgozand√≥ mappa el√©r√©si √∫tja.
                                          Alap√©rtelmezett √©rt√©k: None.
        """
        self.directory_path = directory_path

    def set_directory_path(self, directory_path):
        """Be√°ll√≠tja a feldolgozand√≥ mappa √∫tvonal√°t.

        Args:
            directory_path (str): A feldolgozand√≥ mappa el√©r√©si √∫tja.
        """
        self.directory_path = directory_path

    def get_png_files(self):
        """√ñsszegy≈±jti az √©rv√©nyes PNG f√°jlokat a megadott mapp√°b√≥l.

        A met√≥dus ellen≈ërzi a megadott mapp√°ban tal√°lhat√≥ √∂sszes f√°jlt,
        √©s csak azokat a f√°jlokat adja vissza, amelyek val√≥ban PNG form√°tum√∫ak.

        Returns:
            list: √ârv√©nyes PNG f√°jlok teljes el√©r√©si √∫tjainak list√°ja.

        Note:
            - Ha nincs megadva mappa √∫tvonal, √ºres list√°val t√©r vissza
            - Ha a mappa nem l√©tezik, √ºres list√°val t√©r vissza
            - Figyelmeztet√©st ad ki, ha tal√°l olyan f√°jlt, ami .png kiterjeszt√©s≈±,
              de nem val√≥di PNG form√°tum√∫
        """
        if not self.directory_path:
            print("Nincs megadva mappa √∫tvonal!")
            return []

        if not os.path.exists(self.directory_path):
            print(f"A megadott mappa nem l√©tezik: {self.directory_path}")
            return []

        png_files = []
        for filename in os.listdir(self.directory_path):
            file_path = os.path.join(self.directory_path, filename)
            if PngValidator.is_valid_png(file_path):
                png_files.append(file_path)
            elif filename.lower().endswith('.png'):
                print(f"\nFigyelmeztet√©s: A f√°jl '.png' kiterjeszt√©s≈±, de nem val√≥di PNG: {filename}")

        return png_files

class MetadataReader:
    """PNG f√°jlok metaadatainak olvas√°s√°ra szolg√°l√≥ oszt√°ly.

    Ez az oszt√°ly felel≈ës a PNG f√°jlok metaadatainak kinyer√©s√©√©rt √©s
    megjelen√≠t√©s√©√©rt. A metaadatok k√∂z√∂tt szerepelhetnek p√©ld√°ul a k√©sz√≠t≈ë,
    a l√©trehoz√°s d√°tuma, √©s egy√©b egyedi inform√°ci√≥k.

    Methods:
        read_metadata(file_path): Beolvassa √©s megjelen√≠ti egy PNG f√°jl metaadatait.
    """

    @staticmethod
    def read_metadata(file_path):
        """Beolvassa egy PNG f√°jl metaadatait.

        Args:
            file_path (str): A PNG f√°jl teljes el√©r√©si √∫tja.

        Returns:
            dict: A metaadatok sz√≥t√°r form√°tumban, ahol a kulcsok a metaadat mez≈ëk nevei,
                 az √©rt√©kek pedig a hozz√°juk tartoz√≥ √©rt√©kek.
                 Hiba eset√©n None-nal t√©r vissza.
                 Ha nincsenek metaadatok, √ºres sz√≥t√°rral t√©r vissza.

        Note:
            A met√≥dus a k√∂vetkez≈ë eseteket kezeli:
            - Sikeres beolvas√°s eset√©n ki√≠rja a metaadatokat
            - Ha nincsenek metaadatok, ezt jelzi
            - Ha a f√°jl nem tal√°lhat√≥, hiba√ºzenetet ad
            - Egy√©b hib√°k eset√©n hiba√ºzenetet ad
        """
        try:
            with Image.open(file_path) as img:
                metadata = img.text
                if metadata:
                    print(f"\nMetaadatok a k√∂vetkez≈ë f√°jlhoz: {os.path.basename(file_path)}")
                    print("-" * 50)
                    for k, v in metadata.items():
                        print(f"**{k}**:\n{v}\n")
                    return metadata
                else:
                    print(f"\nNem tal√°ltunk metaadatokat a k√∂vetkez≈ë f√°jlban: {os.path.basename(file_path)}")
                    return {}

        except FileNotFoundError:
            print(f"A f√°jl nem tal√°lhat√≥: {file_path}")
            return None
        except Exception as e:
            print(f"Hiba t√∂rt√©nt a k√∂vetkez≈ë f√°jln√°l {os.path.basename(file_path)}: {str(e)}")
            return None

class PngMetadataProcessor:
    """PNG f√°jlok metaadatainak feldolgoz√°s√°ra szolg√°l√≥ f≈ë oszt√°ly.

    Ez az oszt√°ly koordin√°lja a teljes feldolgoz√°si folyamatot, √∂sszekapcsolva
    a f√°jlkezel√©st √©s a metaadat-olvas√°st. Egy megadott mapp√°ban tal√°lhat√≥ √∂sszes
    PNG f√°jlt feldolgozza √©s √∂sszegy≈±jti azok metaadatait.

    Attributes:
        file_handler (PngFileHandler): A f√°jlkezel√©s√©rt felel≈ës objektum.

    Methods:
        process_directory(): Feldolgozza a megadott mapp√°ban tal√°lhat√≥ √∂sszes PNG f√°jlt.
        save_results_to_json(results): Elmenti az eredm√©nyeket JSON f√°jlba.
    """

    def __init__(self, directory_path=None):
        """Inicializ√°lja a PngMetadataProcessor oszt√°lyt.

        Args:
            directory_path (str, optional): A feldolgozand√≥ mappa el√©r√©si √∫tja.
                                          Alap√©rtelmezett √©rt√©k: None.
        """
        self.file_handler = PngFileHandler(directory_path)

    def process_directory(self):
        """Feldolgozza a megadott mapp√°ban tal√°lhat√≥ √∂sszes PNG f√°jlt.

        A met√≥dus v√©gigmegy a megadott mapp√°ban tal√°lhat√≥ √∂sszes PNG f√°jlon,
        √©s √∂sszegy≈±jti azok metaadatait. A folyamat sor√°n:
        1. Azonos√≠tja az √©rv√©nyes PNG f√°jlokat
        2. Beolvassa minden f√°jl metaadatait
        3. √ñsszes√≠ti az eredm√©nyeket

        Returns:
            dict: Egy sz√≥t√°r, ahol a kulcsok a f√°jlnevek, az √©rt√©kek pedig
                 a hozz√°juk tartoz√≥ metaadatok sz√≥t√°r form√°tumban.
        """
        print(f"\nPNG f√°jlok keres√©se a k√∂vetkez≈ë mapp√°ban: {self.file_handler.directory_path}")
        print("=" * 70)

        png_files = self.file_handler.get_png_files()
        results = {}

        for file_path in png_files:
            metadata = MetadataReader.read_metadata(file_path)
            if metadata:
                results[os.path.basename(file_path)] = metadata

        if not results:
            print("\nNem tal√°ltunk √©rv√©nyes PNG f√°jlokat a megadott mapp√°ban.")

        return results

    def save_results_to_json(self, results):
        """Elmenti az eredm√©nyeket JSON f√°jlba.

        Args:
            results (dict): A mentend≈ë metaadatok sz√≥t√°r form√°tumban.

        Note:
            A f√°jl neve 'png_metadata_results.json' lesz, √©s az Image mapp√°ba ker√ºl ment√©sre.
            Ha a f√°jl m√°r l√©tezik, fel√ºl√≠rja azt.
        """
        if not results:
            print("\nNincs menthet≈ë adat.")
            return

        output_path = os.path.join(self.file_handler.directory_path, "png_metadata_results.json")
        try:
            with open(output_path, 'w', encoding='utf-8') as f:
                json.dump(results, f, indent=2, ensure_ascii=False)
            print(f"\nAz eredm√©nyek sikeresen mentve: {output_path}")
        except Exception as e:
            print(f"\nHiba t√∂rt√©nt a f√°jl ment√©se k√∂zben: {str(e)}")

# P√©lda haszn√°lat √©s tesztel√©s
if __name__ == "__main__":
    # P√©lda a program haszn√°lat√°ra
    processor = PngMetadataProcessor("/content/drive/MyDrive/Image/")
    all_metadata = processor.process_directory()

    # Eredm√©nyek ment√©se JSON f√°jlba
    processor.save_results_to_json(all_metadata)

    # √ñsszes√≠t√©s
    if all_metadata:
        print("\n√ñsszes√≠t√©s:")
        print("=" * 70)
        print(f"√ñsszesen {len(all_metadata)} √©rv√©nyes PNG f√°jl metaadatait olvastuk be.")

## Pozit√≠v prompt kiszed√©se

In [None]:
import json
import re
import sys

def extract_pos_from_parameters(input_json_path, output_json_path):
    """
    Extracts the 'Pos' value from the 'parameters' string in the input JSON and saves it to a new JSON file.
    """
    try:
        with open(input_json_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
    except FileNotFoundError:
        print(f"Error: Input file not found at {input_json_path}")
        return False
    except json.JSONDecodeError:
        print(f"Error: Invalid JSON in input file {input_json_path}")
        return False
    except Exception as e:
        print(f"Error: An unexpected error occurred: {type(e).__name__} - {e}")
        return False

    pos_data = {}
    pos_pattern = re.compile(r'Pos:\s*([\s\S]*?)(?:,\n|$)')  # Regex pattern to extract the Pos value

    for image_filename, image_data in data.items():
        if 'parameters' in image_data and isinstance(image_data['parameters'], str):
            match = pos_pattern.search(image_data['parameters'])
            if match:
                pos_value = match.group(1).strip()  # Get the matched Pos value and strip whitespace
                pos_data[image_filename] = pos_value
            else:
                print(f"Warning: No 'Pos' key found in parameters for {image_filename}")
                pos_data[image_filename] = ""  # √úres string, ha nincs 'Pos'
        else:
            print(f"Warning: No 'parameters' string found for {image_filename}")
            pos_data[image_filename] = ""  # √úres string, ha nincs 'parameters'

    try:
        with open(output_json_path, 'w', encoding='utf-8') as f:
            json.dump(pos_data, f, indent=2)  # Szebb JSON form√°z√°s
    except Exception as e:
        print(f"Error: Could not write to output file {output_json_path}: {e}")
        return False

    return True

if __name__ == '__main__':
    if len(sys.argv) != 3:
        print("Usage: python3 extract_pos.py <input_json_file> <output_json_file>")
        sys.exit(1)

    input_file = "/content/drive/MyDrive/Image/png_metadata_results.json"
    output_file = "/content/drive/MyDrive/Image/desc.json"

    if extract_pos_from_parameters(input_file, output_file):
        print(f"Successfully extracted 'Pos' values to {output_file}")
    else:
        print("Extraction failed.")

## JSON el≈ëk√©sz√≠t√©se

In [None]:
import json
from datetime import datetime
from transformers import pipeline
from google.colab import drive  # Import for Google Drive integration

from huggingface_hub import login
login(token="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")

# Mount Google Drive
drive.mount('/content/drive')

# Define the input and output directories
input_file_path = "/content/drive/MyDrive/Image/desc.json"
output_directory = "/content/drive/MyDrive/Image"

# Load input data from desc.json
try:
    with open(input_file_path, "r", encoding="utf-8") as input_file:
        data = json.load(input_file)
except FileNotFoundError:
    print(f"Error: The file {input_file_path} was not found.")
    data = {}
except json.JSONDecodeError:
    print(f"Error: The file {input_file_path} is not valid JSON.")
    data = {}

if not data:
    print("No data to process. Exiting.")
else:
    # Language model for rewriting descriptions
    rewriter = pipeline("text2text-generation", model="t5-base")

    # Keyword extraction model (KeyBART)
    keyword_extractor = pipeline("text2text-generation", model="bloomberg/KeyBART")

    # Process data
    processed_data = {}
    for filename, description in data.items():
        # Clean up the description: remove / and styles, capitalize sentences
        cleaned_description = ". ".join([part.strip().capitalize() for part in description.split("/") if part.strip()])
        
        # Generate SEO-friendly title
        if "woman" in cleaned_description.lower():
            seo_title = "Photorealistic Woman in Urban Autumn Setting | High-Resolution Artwork"
            cta_text = "‚ù§Ô∏è Discover this stunning artwork and bring elegance to your space today!"
        else:
            seo_title = "Lifelike White Cat with Bright Green Eyes | Detailed Digital Illustration"
            cta_text = "üíö Admire the charm of this adorable feline masterpiece and make it yours now!"

        # Rewrite description using language model
        rewritten_description = rewriter(cleaned_description, max_length=800, truncation=True)[0]['generated_text']
        
        # Post-process the rewritten description: ensure proper capitalization and no repetitions
        final_description_parts = []
        seen_sentences = set()
        for sentence in rewritten_description.split("."):
            sentence = sentence.strip()
            if sentence and sentence not in seen_sentences and len(sentence) > 10:  # Exclude short or duplicate sentences
                seen_sentences.add(sentence)
                final_description_parts.append(sentence.capitalize())
        
        # Add missing details from original description if needed
        original_details = [part.strip().capitalize() for part in cleaned_description.split(".") if part.strip() not in seen_sentences]
        final_description_parts.extend(original_details[:2])  # Add up to 2 missing details
        
        # Combine rewritten description with character limit
        final_description = []
        current_length = 0
        for sentence in final_description_parts:
            if current_length + len(sentence) + 1 <= 500:  # +1 for the period
                final_description.append(sentence)
                current_length += len(sentence) + 1
            else:
                break

        final_description = ". ".join(final_description).strip()

        # Extract keywords/tags using the KeyBART model
        keywords = keyword_extractor(final_description, max_length=50, truncation=True)[0]['generated_text'].split(", ")
        tags = [tag.strip() for tag in keywords if tag.strip()]  # Remove empty tags
        tags = tags[:5]  # Limit to 5 tags

        processed_data[filename] = {
            "seo_title": seo_title,
            "description": final_description,
            "cta": cta_text,  # Add CTA as a separate field with emoji
            "tags": tags,  # Add automatically generated tags
            "processing_date": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        }

    # Save to JSON file in the specified directory with UTF-8 encoding
    output_file_path = f"{output_directory}/processed_art_data.json"
    with open(output_file_path, "w", encoding="utf-8") as json_file:
        json.dump(processed_data, json_file, indent=4, ensure_ascii=False)

    print(f"JSON file has been created successfully at: {output_file_path}")