In [15]:
import os
from collections import Counter
import string
import pandas as pd

def load_text_files(folder_path):
    """Load all text files from the folder."""
    text_data = {}
    for filename in os.listdir(folder_path):
        if filename.endswith('.txt'):
            with open(os.path.join(folder_path, filename), 'r', encoding='utf-8') as file:
                text_data[filename] = file.read()
    return text_data

def analyze_text(text):
    """Analyze a single text for word counts and top words."""
    # Remove punctuation and convert to lowercase
    translator = str.maketrans('', '', string.punctuation)
    text = text.translate(translator).lower()
    words = text.split()
    
    word_count = len(words)
    word_frequencies = Counter(words)
    return word_count, word_frequencies

def main(folder_path, keywords, primary_csv, overall_csv):
    """Main function to analyze all text files."""
    text_data = load_text_files(folder_path)
    
    primary_analysis = []
    total_keyword_counts = Counter()
    
    for filename, text in text_data.items():
        word_count, word_frequencies = analyze_text(text)
        
        keyword_counts = {keyword: word_frequencies.get(keyword, 0) for keyword in keywords}
        total_keyword_counts.update(keyword_counts)
        
        primary_analysis.append({
            "File": filename
            ,
            "Word Count": word_count,
            **keyword_counts
        })
    
    # Create dataframes for CSV
    primary_df = pd.DataFrame(primary_analysis)
    overall_keywords_df = pd.DataFrame.from_dict(total_keyword_counts, orient='index', columns=['Counts']).reset_index()
    overall_keywords_df.rename(columns={'index': 'Keyword'}, inplace=True)
    
    # Save results to CSV
    primary_df.to_csv(primary_csv, index=False)
    overall_keywords_df.to_csv(overall_csv, index=False)

if __name__ == "__main__":
    # Specify the folder path
    folder_path = "/home/alikhan/Desktop/Data/Parsing/Донор_Tengri"
    primary_csv = "primary_analysis.csv"
    overall_csv = "overall_statistics.csv"
    
    # Input keywords from the user
    keywords = input("Enter your keywords, separated by commas: ").strip().split(",")
    keywords = [keyword.strip().lower() for keyword in keywords]  # Clean up the input
    
    # Execute the analysis
    main(folder_path, keywords, primary_csv, overall_csv)
    
    print(f"Primary analysis saved to {primary_csv}")
    print(f"Overall statistics saved to {overall_csv}")

    

Enter your keywords, separated by commas: дамыған, жасалды, келісім, қолжетімді,  Прогресс, алғыс, қанағаттандырады, білікті, жоғары, берді, қауқарлы, Прогресс,способны, успешно, удовлетворительно, согласие 
Primary analysis saved to primary_analysis.csv
Overall statistics saved to overall_statistics.csv
