In [43]:
import os
from collections import Counter
import string

def load_text_files(folder_path):
    """Load all text files from the folder."""
    text_data = {}
    for filename in os.listdir(folder_path):
        if filename.endswith('.txt'):
            with open(os.path.join(folder_path, filename), 'r', encoding='utf-8') as file:
                text_data[filename] = file.read()
    return text_data

def analyze_text(text):
    """Analyze a single text for word counts and top words."""
    # Remove punctuation and convert to lowercase
    translator = str.maketrans('', '', string.punctuation)
    text = text.translate(translator).lower()
    words = text.split()
    
    word_count = len(words)
    word_frequencies = Counter(words)
    return word_count, word_frequencies

def main(folder_path, keywords):
    """Main function to analyze all text files."""
    text_data = load_text_files(folder_path)
    
    analysis_results = {}
    total_word_counts = []
    total_word_frequencies = Counter()
    total_keyword_counts = Counter()
    
    for filename, text in text_data.items():
        word_count, word_frequencies = analyze_text(text)
        total_word_counts.append(word_count)
        total_word_frequencies.update(word_frequencies)
        
        keyword_counts = {keyword: word_frequencies.get(keyword, 0) for keyword in keywords}
        total_keyword_counts.update(keyword_counts)
        top_words = word_frequencies.most_common(10)
        
        analysis_results[filename] = {
            "word_count": word_count,
            "keyword_counts": keyword_counts,
            "top_words": top_words
        }
    
    # Overall statistics
    overall_top_words = total_word_frequencies.most_common(10)
    
    return analysis_results, total_word_counts, overall_top_words, total_keyword_counts

if __name__ == "__main__":
    # Specify the folder path
    folder_path = "/home/alikhan/Desktop/Data/Parsing/Донор"
    
    # Input keywords from the user
    keywords = input("Enter your keywords, separated by commas: ").strip().split(",")
    keywords = [keyword.strip().lower() for keyword in keywords]  # Clean up the input
    
    # Execute the analysis
    results, word_counts, overall_top_words, total_keyword_counts = main(folder_path, keywords)

    # Print results
    for file, analysis in results.items():
        print(f"File: {file}")
        print(f"Total Words: {analysis['word_count']}")
        print(f"Keyword Counts: {analysis['keyword_counts']}")
        print(f"Top 10 Words: {analysis['top_words']}")
        print("-" * 50)

    print("Overall Top 10 Words Across All Files:")
    print(overall_top_words)
    print("\nTotal Keyword Counts Across All Files:")
    print(total_keyword_counts)


Enter your keywords, separated by commas: қайтыс
File: Донор болу – қайырымды іс.txt
Total Words: 99
Keyword Counts: {'қайтыс': 0}
Top 10 Words: [('қан', 5), ('–', 4), ('донор', 3), ('қайырымды', 3), ('аудандық', 3), ('пробация', 3), ('іс', 2), ('тапсыру', 2), ('азаматтар', 2), ('болу', 1)]
--------------------------------------------------
File: Ұлттық кардиохирургияда жүрек донорының тапшылығы қалай шешімін тауып жатыр.txt
Total Words: 1028
Keyword Counts: {'қайтыс': 4}
Top 10 Words: [('жүрек', 25), ('бұл', 13), ('жасанды', 10), ('бар', 9), ('жыл', 8), ('бойынша', 8), ('осы', 7), ('ғана', 7), ('дейді', 7), ('жоқ', 7)]
--------------------------------------------------
File: Ғалымдар  донорлық қан ойлап тапты.txt
Total Words: 87
Keyword Counts: {'қайтыс': 0}
Top 10 Words: [('қан', 7), ('екінші', 3), ('ғалымдар', 2), ('ойлап', 2), ('тапты', 2), ('бұл', 2), ('қант', 2), ('тобын', 2), ('әмбебап', 2), ('алып', 2)]
--------------------------------------------------
File: Ел экономикасының 