In [None]:
def decode_arabic_mojibake(text):
    """Convert mojibake characters back to proper Arabic"""
    # Extended mapping for common mojibake sequences
    mapping = {
        # Alef with various diacritics
        'Ø£': 'أ', 'Ø¥': 'إ', 'Ø¢': 'آ', 'Ø¤': 'ؤ', 'Ø¦': 'ئ',
        # Standard letters
        'Ø§': 'ا', 'Ø¨': 'ب', 'Øª': 'ت', 'Ø«': 'ث', 'Ø¬': 'ج', 'Ø­': 'ح', 'Ø®': 'خ',
        'Ø¯': 'د', 'Ø°': 'ذ', 'Ø±': 'ر', 'Ø²': 'ز', 'Ø³': 'س', 'Ø´': 'ش', 'Øµ': 'ص',
        'Ø¶': 'ض', 'Ø·': 'ط', 'Ø¸': 'ظ', 'Ø¹': 'ع', 'Øº': 'غ', 'Ù': 'ف', 'Ù‚': 'ق',
        'Ùƒ': 'ك', 'Ù„': 'ل', 'Ù…': 'م', 'Ù†': 'ن', 'Ù‡': 'ه', 'Ùˆ': 'و', 'ÙŠ': 'ي',
        'Ù‰': 'ى', 'Ø©': 'ة',

        # Diacritics (remove them)
        'Ù‹': '', 'ÙŒ': '', 'Ù': '', 'ÙŽ': '', 'Ù': '', 'Ù': '', 'Ù‘': '', 'Ù’': '',
        # Tatweel
        'ـ': '',
        # Special cases that appear in your text
        'Ø¨Ùˆ': 'بو', 'Ø§Ù…': 'ام', 'Ø§Ù„': 'ال', 'Ø¨Ù†': 'بن',
        'Ø¹Ù„': 'عل', 'Ø³Øª': 'ست', 'Ø´ÙŠ': 'شي', 'Ø¬Ù…': 'جم',
        'Ø­Ø³': 'حس', 'Ø®Ø§': 'خا', 'Ø¯Ø§': 'دا', 'Ø±Ø³': 'رس',
        'Ø²Ùˆ': 'زو', 'Ø³Ø±': 'سر', 'Ø´Ø¹': 'شع', 'ØµØ§': 'صا',
        'Ø¶Ø§': 'ضا', 'Ø·Ø§': 'طا', 'Ø¸Ø§': 'ظا', 'Ø¹Ø§': 'عا',
        'ØºØ§': 'غا', 'Ù‚Ø§': 'قا', 'ÙƒØ§': 'كا', 'Ù„Ø§': 'لا',
        'Ù…Ø§': 'ما', 'Ù†Ø§': 'نا', 'Ù‡Ø§': 'ها', 'ÙŠØ§': 'يا',
    }
    
    result = text
    # Process longer sequences first to avoid partial replacements
    for moji, arabic in sorted(mapping.items(), key=lambda x: len(x[0]), reverse=True):
        result = result.replace(moji, arabic)
    
    return result

# Read and decode the file
with open('code.txt', 'r', encoding='utf-8', errors='ignore') as f:
    content = f.read()

decoded_content = decode_arabic_mojibake(content)
print("done")
with open('decoded_sql_file.sql', 'w', encoding='utf-8') as f:
    f.write(decoded_content)