# JSON

## Bu dastur yangi JSON fayllar kelganida ularni asosiy ma'lumotlar bazasiga qo'shib turadi va ID konflikti bo'lmasligini ta'minlaydi.

In [3]:
import json
import os
import hashlib
from datetime import datetime
from typing import Dict, List, Any

class AudioDataManager:
    def __init__(self, main_db_path: str = "main_audio_database.json"):
        """
        Audio ma'lumotlarni boshqaruvchi sinf
        main_db_path: asosiy ma'lumotlar bazasi fayl yo'li
        """
        self.main_db_path = main_db_path
        self.main_database = self.load_main_database()
    
    def load_main_database(self) -> Dict[str, Any]:
        """Asosiy ma'lumotlar bazasini yuklash"""
        if os.path.exists(self.main_db_path):
            try:
                with open(self.main_db_path, 'r', encoding='utf-8') as file:
                    data = json.load(file)
                    # Agar data dict bo'lmasa, yangi format yaratamiz
                    if isinstance(data, list):
                        # Eski formatdan yangi formatga o'tkazish
                        new_format = {
                            "metadata": {
                                "total_records": len(data),
                                "last_updated": datetime.now().isoformat(),
                                "version": "1.0"
                            },
                            "records": {item.get("utt_id", f"record_{i}"): item 
                                      for i, item in enumerate(data)}
                        }
                        self.save_main_database(new_format)
                        return new_format
                    return data
            except (json.JSONDecodeError, FileNotFoundError):
                pass
        
        # Yangi ma'lumotlar bazasi yaratish
        return {
            "metadata": {
                "total_records": 0,
                "last_updated": datetime.now().isoformat(),
                "version": "1.0"
            },
            "records": {}
        }
    
    def save_main_database(self, data: Dict[str, Any] = None):
        """Ma'lumotlar bazasini saqlash"""
        if data is None:
            data = self.main_database
        
        with open(self.main_db_path, 'w', encoding='utf-8') as file:
            json.dump(data, file, ensure_ascii=False, indent=2)
    
    def generate_unique_id(self, record: Dict[str, Any], filename: str) -> str:
        """Noyob ID yaratish"""
        # Agar utt_id mavjud bo'lsa, uni ishlatamiz
        if "utt_id" in record and record["utt_id"]:
            base_id = record["utt_id"]
        else:
            # Fayl nomidan ID yaratish
            base_id = os.path.splitext(filename)[0]
        
        # Agar ID allaqachon mavjud bo'lsa, noyob qilish
        original_id = base_id
        counter = 1
        while base_id in self.main_database["records"]:
            base_id = f"{original_id}_{counter}"
            counter += 1
        
        return base_id
    
    def add_single_record(self, json_file_path: str) -> bool:
        """Bitta JSON faylni qo'shish"""
        try:
            with open(json_file_path, 'r', encoding='utf-8') as file:
                record = json.load(file)
            
            filename = os.path.basename(json_file_path)
            unique_id = self.generate_unique_id(record, filename)
            
            # Record'ga unique ID ni qo'yish
            record["utt_id"] = unique_id
            record["source_file"] = filename
            record["added_at"] = datetime.now().isoformat()
            
            # Ma'lumotlar bazasiga qo'shish
            self.main_database["records"][unique_id] = record
            self.main_database["metadata"]["total_records"] += 1
            self.main_database["metadata"]["last_updated"] = datetime.now().isoformat()
            
            print(f"✓ Qo'shildi: {filename} -> ID: {unique_id}")
            return True
            
        except Exception as e:
            print(f"✗ Xatolik {json_file_path}: {str(e)}")
            return False
    
    def add_multiple_records(self, json_files: List[str]) -> Dict[str, int]:
        """Ko'plab JSON fayllarni qo'shish"""
        stats = {"success": 0, "failed": 0}
        
        for json_file in json_files:
            if self.add_single_record(json_file):
                stats["success"] += 1
            else:
                stats["failed"] += 1
        
        return stats
    
    def add_from_directory(self, directory_path: str) -> Dict[str, int]:
        """Papkadan barcha JSON fayllarni qo'shish"""
        json_files = []
        
        for filename in os.listdir(directory_path):
            if filename.endswith('.json'):
                json_files.append(os.path.join(directory_path, filename))
        
        if not json_files:
            print("JSON fayllar topilmadi!")
            return {"success": 0, "failed": 0}
        
        print(f"Topildi: {len(json_files)} ta JSON fayl")
        return self.add_multiple_records(json_files)
    
    def search_records(self, **criteria) -> List[Dict[str, Any]]:
        """Ma'lumotlarni qidirish"""
        results = []
        
        for record_id, record in self.main_database["records"].items():
            match = True
            for key, value in criteria.items():
                if key not in record or record[key] != value:
                    match = False
                    break
            
            if match:
                results.append(record)
        
        return results
    
    def get_statistics(self) -> Dict[str, Any]:
        """Statistikalarni olish"""
        records = self.main_database["records"]
        
        categories = {}
        sentiments = {}
        speakers = {}
        
        for record in records.values():
            # Kategoriya bo'yicha
            cat = record.get("category", "unknown")
            categories[cat] = categories.get(cat, 0) + 1
            
            # Sentiment bo'yicha
            sent = record.get("sentiment", "unknown")
            sentiments[sent] = sentiments.get(sent, 0) + 1
            
            # Speaker bo'yicha
            speaker = record.get("speaker_id", "unknown")
            speakers[speaker] = speakers.get(speaker, 0) + 1
        
        return {
            "total_records": len(records),
            "categories": categories,
            "sentiments": sentiments,
            "speakers": speakers,
            "last_updated": self.main_database["metadata"]["last_updated"]
        }
    
    def export_to_format(self, format_type: str = "list") -> Any:
        """Ma'lumotlarni eksport qilish"""
        if format_type == "list":
            return list(self.main_database["records"].values())
        elif format_type == "dict":
            return self.main_database["records"]
        else:
            return self.main_database

def main():
    """Asosiy funksiya - dasturni ishga tushirish"""
    manager = AudioDataManager()
    
    print("=== Audio Ma'lumotlar Boshqaruvchi ===")
    print("1. Bitta fayl qo'shish")
    print("2. Papkadan barcha fayllarni qo'shish")
    print("3. Statistikalarni ko'rish")
    print("4. Ma'lumotlarni saqlash va chiqish")
    
    while True:
        choice = input("\nTanlang (1-4): ").strip()
        
        if choice == "1":
            file_path = input("JSON fayl yo'lini kiriting: ").strip()
            if os.path.exists(file_path):
                manager.add_single_record(file_path)
                manager.save_main_database()
            else:
                print("Fayl topilmadi!")
        
        elif choice == "2":
            dir_path = input("Papka yo'lini kiriting: ").strip()
            if os.path.exists(dir_path):
                stats = manager.add_from_directory(dir_path)
                print(f"\nNatija: {stats['success']} muvaffaqiyatli, {stats['failed']} xato")
                manager.save_main_database()
            else:
                print("Papka topilmadi!")
        
        elif choice == "3":
            stats = manager.get_statistics()
            print("\n=== Statistikalar ===")
            print(f"Jami yozuvlar: {stats['total_records']}")
            print(f"Kategoriyalar: {stats['categories']}")
            print(f"Kayfiyat: {stats['sentiments']}")
            print(f"So'nggi yangilanish: {stats['last_updated']}")
        
        elif choice == "4":
            manager.save_main_database()
            print("Ma'lumotlar saqlandi. Dastur tugadi.")
            break
        
        else:
            print("Noto'g'ri tanlov!")

# Qo'shimcha utility funksiyalar
def batch_process_directory(directory_path: str, output_path: str = "merged_audio_data.json"):
    """Papkadagi barcha JSON fayllarni avtomatik birlashtirish"""
    manager = AudioDataManager(output_path)
    stats = manager.add_from_directory(directory_path)
    manager.save_main_database()
    
    print(f"Natija: {stats['success']} fayl birlashtirildi")
    print(f"Saqlandi: {output_path}")
    
    return manager

if __name__ == "__main__":
    main()

=== Audio Ma'lumotlar Boshqaruvchi ===
1. Bitta fayl qo'shish
2. Papkadan barcha fayllarni qo'shish
3. Statistikalarni ko'rish
4. Ma'lumotlarni saqlash va chiqish

Tanlang (1-4): 2
Papka yo'lini kiriting: drive-download-20250915T152939Z-1-001
Topildi: 6 ta JSON fayl
✓ Qo'shildi: 20250912T124701_7fe961d1.json -> ID: 20250912T124701_7fe961d1_1
✓ Qo'shildi: 20250912T124632_fa67e4df.json -> ID: 20250912T124632_fa67e4df_1
✓ Qo'shildi: 20250912T134506_b09de6d4.json -> ID: 20250912T134506_b09de6d4_1
✓ Qo'shildi: 20250912T124607_d9fafe5d.json -> ID: 20250912T124607_d9fafe5d_1
✓ Qo'shildi: 20250912T134443_c3bdac14.json -> ID: 20250912T134443_c3bdac14_1
✓ Qo'shildi: 20250912T134342_343b1c6d.json -> ID: 20250912T134342_343b1c6d_1

Natija: 6 muvaffaqiyatli, 0 xato

Tanlang (1-4): 3

=== Statistikalar ===
Jami yozuvlar: 12
Kategoriyalar: {'work': 2, 'travel': 2, 'sport': 2, 'daily': 2, 'culture': 2, 'education': 2}
Kayfiyat: {'positive': 8, 'negative': 4}
So'nggi yangilanish: 2025-09-16T22:18:05.74

## simple_merger.py - Sodda birlashtiruvchi

In [4]:
# simple_merger.py - Sodda birlashtiruvchi
import json
import os
from datetime import datetime

def merge_json_files(input_directory, output_file="merged_data.json"):
    """Sodda JSON fayllar birlashtiruvi"""
    merged_data = []
    processed_files = 0
    
    for filename in os.listdir(input_directory):
        if filename.endswith('.json'):
            file_path = os.path.join(input_directory, filename)
            
            try:
                with open(file_path, 'r', encoding='utf-8') as f:
                    data = json.load(f)
                
                # Agar utt_id yo'q bo'lsa, fayl nomidan yaratamiz
                if 'utt_id' not in data:
                    data['utt_id'] = os.path.splitext(filename)[0]
                
                data['source_file'] = filename
                merged_data.append(data)
                processed_files += 1
                
            except Exception as e:
                print(f"Xatolik {filename}: {e}")
    
    # Saqlash
    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(merged_data, f, ensure_ascii=False, indent=2)
    
    print(f"{processed_files} ta fayl birlashtirildi -> {output_file}")

# Ishlatish
if __name__ == "__main__":
    input_dir = input("JSON fayllar papkasi: ")
    merge_json_files(input_dir)

JSON fayllar papkasi: drive-download-20250915T152939Z-1-001
6 ta fayl birlashtirildi -> merged_data.json




Oddiy ishlatish:

`bash`

python simple_merger.py

## Kengaytirilgan ishlatish 

In [None]:
from audio_data_manager import AudioDataManager

# Manager yaratish
manager = AudioDataManager("my_database.json")

# Papkadan fayllarni qo'shish
stats = manager.add_from_directory("/path/to/json/files")

# Saqlash
manager.save_main_database()

# Telegram botda foydalanuvchilar bir xil matnni qayta-qayta yozib olishi mumkin va bu holatda takroriy matnlarni aniqlash kerak. 
## Bu holat uchun text duplicate detection funksiyasini qo'shamiz

In [3]:
import json
import os
import hashlib
from datetime import datetime
from typing import Dict, List, Any, Tuple
from difflib import SequenceMatcher
import re

class SmartAudioDataManager:
    def __init__(self, main_db_path: str = "main_audio_database.json", 
                 similarity_threshold: float = 0.85):
        """
        Aqlli audio ma'lumotlar boshqaruvchi
        main_db_path: asosiy ma'lumotlar bazasi
        similarity_threshold: matn o'xshashlik chegarasi (0.0-1.0)
        """
        self.main_db_path = main_db_path
        self.similarity_threshold = similarity_threshold
        self.main_database = self.load_main_database()
    
    def clean_text(self, text: str) -> str:
        """Matnni taqqoslash uchun tozalash"""
        if not text:
            return ""
        
        # Kichik harfga o'tkazish
        text = text.lower().strip()
        
        # Ortiqcha bo'shliqlarni olib tashlash
        text = re.sub(r'\s+', ' ', text)
        
        # Tinish belgilarini olib tashlash
        text = re.sub(r'[.,!?;:"""''„"«»]', '', text)
        
        return text
    
    def calculate_text_similarity(self, text1: str, text2: str) -> float:
        """Ikki matn orasidagi o'xshashlikni hisoblash"""
        clean_text1 = self.clean_text(text1)
        clean_text2 = self.clean_text(text2)
        
        if not clean_text1 or not clean_text2:
            return 0.0
        
        # SequenceMatcher yordamida o'xshashlikni hisoblash
        similarity = SequenceMatcher(None,  clean_text1, clean_text2).ratio()
        return similarity
    
    def create_text_hash(self, text: str) -> str:
        """Matn uchun hash yaratish"""
        clean_text = self.clean_text(text)
        return hashlib.md5(clean_text.encode('utf-8')).hexdigest()[:8]
    
    def find_similar_records(self, new_text: str) -> List[Tuple[str, Dict, float]]:
        """O'xshash matnlarni topish"""
        similar_records = []
        
        for record_id, record in self.main_database["records"].items():
            existing_text = record.get("text", "")
            similarity = self.calculate_text_similarity(new_text, existing_text)
            
            if similarity >= self.similarity_threshold:
                similar_records.append((record_id, record, similarity))
        
        # O'xshashlik darajasi bo'yicha saralash
        similar_records.sort(key=lambda x: x[2], reverse=True)
        return similar_records
    
    def load_main_database(self) -> Dict[str, Any]:
        """Ma'lumotlar bazasini yuklash"""
        if os.path.exists(self.main_db_path):
            try:
                with open(self.main_db_path, 'r', encoding='utf-8') as file:
                    data = json.load(file)
                    if isinstance(data, list):
                        new_format = {
                            "metadata": {
                                "total_records": len(data),
                                "last_updated": datetime.now().isoformat(),
                                "version": "2.0",
                                "duplicate_policy": "detect_and_mark"
                            },
                            "records": {item.get("utt_id", f"record_{i}"): item 
                                      for i, item in enumerate(data)},
                            "text_hashes": {}
                        }
                        self.save_main_database(new_format)
                        return new_format
                    return data
            except (json.JSONDecodeError, FileNotFoundError):
                pass
        
        return {
            "metadata": {
                "total_records": 0,
                "last_updated": datetime.now().isoformat(),
                "version": "2.0",
                "duplicate_policy": "detect_and_mark"
            },
            "records": {},
            "text_hashes": {}  # Matn hashlari uchun
        }
    
    def save_main_database(self, data: Dict[str, Any] = None):
        """Ma'lumotlar bazasini saqlash"""
        if data is None:
            data = self.main_database
        
        with open(self.main_db_path, 'w', encoding='utf-8') as file:
            json.dump(data, file, ensure_ascii=False, indent=2)
    
    def add_record_with_duplicate_check(self, json_file_path: str, 
                                      action_on_duplicate: str = "ask") -> Dict[str, Any]:
        """
        Takroriy tekshiruv bilan record qo'shish
        action_on_duplicate: 'ask', 'skip', 'add_anyway', 'update_existing'
        """
        try:
            with open(json_file_path, 'r', encoding='utf-8') as file:
                new_record = json.load(file)
            
            filename = os.path.basename(json_file_path)
            new_text = new_record.get("text", "")
            
            if not new_text:
                return {"status": "error", "message": "Matn topilmadi"}
            
            # O'xshash matnlarni qidirish
            similar_records = self.find_similar_records(new_text)
            
            result = {
                "status": "unknown",
                "filename": filename,
                "new_text": new_text,
                "similar_count": len(similar_records),
                "similar_records": similar_records[:3]  # Eng o'xshash 3 tasini ko'rsatish
            }
            
            if similar_records:
                # O'xshash matnlar topildi
                best_match = similar_records[0]
                similarity_percent = int(best_match[2] * 100)
                
                result["best_match"] = {
                    "id": best_match[0],
                    "text": best_match[1].get("text", ""),
                    "similarity": similarity_percent,
                    "speaker_id": best_match[1].get("speaker_id"),
                    "created_at": best_match[1].get("created_at")
                }
                
                if action_on_duplicate == "ask":
                    print(f"\n🔍 O'XSHASH MATN TOPILDI!")
                    print(f"Yangi matn: '{new_text}'")
                    print(f"Mavjud matn: '{best_match[1].get('text', '')}'")
                    print(f"O'xshashlik: {similarity_percent}%")
                    print(f"Mavjud ID: {best_match[0]}")
                    
                    choice = input("\nHarakat tanlang:\n1. Qo'shish (har xil audio)\n2. O'tkazib yuborish\n3. Mavjudini yangilash\nTanlang (1-3): ").strip()
                    
                    if choice == "1":
                        action_on_duplicate = "add_anyway"
                    elif choice == "2":
                        action_on_duplicate = "skip"
                    elif choice == "3":
                        action_on_duplicate = "update_existing"
                    else:
                        action_on_duplicate = "skip"
                
                # Tanlangan harakatni bajarish
                if action_on_duplicate == "skip":
                    result["status"] = "skipped"
                    result["message"] = "Takroriy matn, o'tkazib yuborildi"
                    return result
                
                elif action_on_duplicate == "update_existing":
                    # Mavjud recordni yangilash
                    existing_id = best_match[0]
                    existing_record = self.main_database["records"][existing_id]
                    
                    # Audio ma'lumotlarini yangilash
                    if "duration_ms" in new_record:
                        existing_record["duration_ms"] = new_record["duration_ms"]
                    if "created_at" in new_record:
                        existing_record["last_recorded_at"] = new_record["created_at"]
                    
                    existing_record["updated_at"] = datetime.now().isoformat()
                    existing_record["source_files"] = existing_record.get("source_files", []) + [filename]
                    
                    result["status"] = "updated"
                    result["message"] = f"Mavjud record yangilandi: {existing_id}"
                    result["updated_id"] = existing_id
                    return result
            
            # Yangi record qo'shish (o'xshash topilmadi yoki add_anyway tanlandi)
            unique_id = self.generate_unique_id(new_record, filename)
            
            # Takroriy belgilar qo'shish
            if similar_records:
                new_record["is_potential_duplicate"] = True
                new_record["similar_to"] = [r[0] for r in similar_records[:3]]
                new_record["max_similarity"] = similar_records[0][2]
            else:
                new_record["is_potential_duplicate"] = False
            
            new_record["utt_id"] = unique_id
            new_record["source_file"] = filename
            new_record["added_at"] = datetime.now().isoformat()
            new_record["text_hash"] = self.create_text_hash(new_text)
            
            # Ma'lumotlar bazasiga qo'shish
            self.main_database["records"][unique_id] = new_record
            self.main_database["metadata"]["total_records"] += 1
            self.main_database["metadata"]["last_updated"] = datetime.now().isoformat()
            
            # Text hash ni saqlash
            text_hash = new_record["text_hash"]
            if text_hash not in self.main_database["text_hashes"]:
                self.main_database["text_hashes"][text_hash] = []
            self.main_database["text_hashes"][text_hash].append(unique_id)
            
            result["status"] = "added"
            result["message"] = f"Yangi record qo'shildi: {unique_id}"
            result["new_id"] = unique_id
            
            return result
            
        except Exception as e:
            return {
                "status": "error", 
                "message": f"Xatolik: {str(e)}",
                "filename": os.path.basename(json_file_path)
            }
    
    def generate_unique_id(self, record: Dict[str, Any], filename: str) -> str:
        """Noyob ID yaratish"""
        if "utt_id" in record and record["utt_id"]:
            base_id = record["utt_id"]
        else:
            base_id = os.path.splitext(filename)[0]
        
        original_id = base_id
        counter = 1
        while base_id in self.main_database["records"]:
            base_id = f"{original_id}_{counter}"
            counter += 1
        
        return base_id
    
    def batch_process_with_duplicate_check(self, directory_path: str, 
                                         action_on_duplicate: str = "ask") -> Dict[str, Any]:
        """Papkadagi fayllarni takroriy tekshiruv bilan qayta ishlash"""
        results = {
            "added": 0,
            "skipped": 0,
            "updated": 0,
            "errors": 0,
            "details": []
        }
        
        json_files = [f for f in os.listdir(directory_path) if f.endswith('.json')]
        
        if not json_files:
            return {"error": "JSON fayllar topilmadi"}
        
        print(f"Topildi: {len(json_files)} ta JSON fayl")
        
        for i, filename in enumerate(json_files, 1):
            file_path = os.path.join(directory_path, filename)
            print(f"\n[{i}/{len(json_files)}] Qayta ishlanmoqda: {filename}")
            
            result = self.add_record_with_duplicate_check(file_path, action_on_duplicate)
            results["details"].append(result)
            
            if result["status"] == "added":
                results["added"] += 1
                print(f"✓ Qo'shildi: {result.get('new_id', 'N/A')}")
            elif result["status"] == "skipped":
                results["skipped"] += 1
                print(f"⏭ O'tkazib yuborildi: {result['message']}")
            elif result["status"] == "updated":
                results["updated"] += 1
                print(f"🔄 Yangilandi: {result.get('updated_id', 'N/A')}")
            else:
                results["errors"] += 1
                print(f"✗ Xatolik: {result['message']}")
        
        return results
    
    def find_all_duplicates(self) -> Dict[str, List[str]]:
        """Barcha takroriy matnlarni topish"""
        text_groups = {}
        
        for record_id, record in self.main_database["records"].items():
            text = record.get("text", "")
            clean_text = self.clean_text(text)
            
            if clean_text:
                if clean_text not in text_groups:
                    text_groups[clean_text] = []
                text_groups[clean_text].append(record_id)
        
        # Faqat takroriy bo'lganlarini qaytarish
        duplicates = {text: ids for text, ids in text_groups.items() if len(ids) > 1}
        return duplicates
    
    def get_duplicate_statistics(self) -> Dict[str, Any]:
        """Takroriy matnlar statistikasi"""
        duplicates = self.find_all_duplicates()
        
        total_duplicate_groups = len(duplicates)
        total_duplicate_records = sum(len(ids) for ids in duplicates.values())
        
        return {
            "total_records": len(self.main_database["records"]),
            "duplicate_groups": total_duplicate_groups,
            "duplicate_records": total_duplicate_records,
            "unique_records": len(self.main_database["records"]) - total_duplicate_records + total_duplicate_groups,
            "duplicate_details": duplicates
        }

def main():
    """Asosiy dastur"""
    print("Ma'lumotlar Boshqaruvchi")
    
    # similarity = float(input("O'xshashlik chegarasi (0.7-0.95, tavsiya 0.85): ") or "0.85")
    similarity=0.85
    manager = SmartAudioDataManager(similarity_threshold=similarity)

    
    print("\n1. Bitta fayl qo'shish (takroriy tekshiruv bilan)")
    print("2. Papkadan fayllarni qo'shish (takroriy tekshiruv bilan)")  
    print("3. Takroriy matnlar statistikasi")
    print("4. Barcha takroriylarni ko'rsatish")
    print("5. Ma'lumotlarni saqlash")
    
    while True:
        choice = input("\nTanlang (1-5): ").strip()
        
        if choice == "1":
            file_path = input("JSON fayl yo'li: ").strip()
            if os.path.exists(file_path):
                result = manager.add_record_with_duplicate_check(file_path)
                print(f"Natija: {result['message']}")
                manager.save_main_database()
            else:
                print("Fayl topilmadi!")
        
        elif choice == "2":
            dir_path = input("Papka yo'li: ").strip()
            if os.path.exists(dir_path):
                print("\nTakroriy fayllar uchun harakat:")
                print("1. Har birini so'rash")
                print("2. Barchasini qo'shish")
                print("3. Barchasini o'tkazib yuborish")

                action_choice = input("Tanlang (1-3): ").strip()
                action_map = {"1": "ask", "2": "add_anyway", "3": "skip"}
                action = action_map.get(action_choice, "ask")
                # action="skip"

                results = manager.batch_process_with_duplicate_check(dir_path, action)
                print(f"\n📊 Natijalar:")
                print(f"✓ Qo'shildi: {results['added']}")
                print(f"🔄 Yangilandi: {results['updated']}")
                print(f"⏭ O'tkazib yuborildi: {results['skipped']}")
                print(f"✗ Xatolar: {results['errors']}")
                
                manager.save_main_database()
            else:
                print("Papka topilmadi!")
        
        elif choice == "3":
            stats = manager.get_duplicate_statistics()
            print(f"\n📈 Takroriy matnlar statistikasi:")
            print(f"Jami yozuvlar: {stats['total_records']}")
            print(f"Takroriy guruhlar: {stats['duplicate_groups']}")
            print(f"Takroriy yozuvlar: {stats['duplicate_records']}")
            print(f"Noyob yozuvlar: {stats['unique_records']}")
        
        elif choice == "4":
            duplicates = manager.find_all_duplicates()
            print(f"\n📋 Barcha takroriy matnlar ({len(duplicates)} ta guruh):")
            for i, (text, ids) in enumerate(duplicates.items(), 1):
                print(f"\n{i}. '{text}' ({len(ids)} marta):")
                for record_id in ids:
                    record = manager.main_database["records"][record_id]
                    print(f"   - {record_id} | {record.get('created_at', 'N/A')}")
        
        elif choice == "5":
            manager.save_main_database()
            print("Ma'lumotlar saqlandi!")
            break

if __name__ == "__main__":
    main()

Ma'lumotlar Boshqaruvchi

1. Bitta fayl qo'shish (takroriy tekshiruv bilan)
2. Papkadan fayllarni qo'shish (takroriy tekshiruv bilan)
3. Takroriy matnlar statistikasi
4. Barcha takroriylarni ko'rsatish
5. Ma'lumotlarni saqlash

Takroriy fayllar uchun harakat:
1. Har birini so'rash
2. Barchasini qo'shish
3. Barchasini o'tkazib yuborish
Topildi: 2 ta JSON fayl

[1/2] Qayta ishlanmoqda: 20250912T124611_d9fafe5d.json

🔍 O'XSHASH MATN TOPILDI!
Yangi matn: 'Bugun havo juda yaxshi, chiqaylikmi?'
Mavjud matn: 'Bugun havo juda yaxshi, sayrga chiqaylikmi?'
O'xshashlik: 90%
Mavjud ID: 20250912T124607_d9fafe5d
⏭ O'tkazib yuborildi: Takroriy matn, o'tkazib yuborildi

[2/2] Qayta ishlanmoqda: 20250912T124612_fa67e4df.json

🔍 O'XSHASH MATN TOPILDI!
Yangi matn: 'Samolyot chiptasi narxi oshib ketdi.'
Mavjud matn: 'Samolyot chiptasi narxi oshib ketdi, afsus.'
O'xshashlik: 92%
Mavjud ID: 20250912T124632_fa67e4df
⏭ O'tkazib yuborildi: Takroriy matn, o'tkazib yuborildi

📊 Natijalar:
✓ Qo'shildi: 0
🔄 Yangil