In [4]:
import pandas as pd
import re
from collections import Counter

def extract_groups(file_path, column_name, target_string, output_file="group_counts.txt"):
    
    try:
      
        df = pd.read_csv(file_path, encoding='utf-8')

       
        if column_name not in df.columns:
            raise ValueError(f"Column '{column_name}' not found in the file.")

        
        group_pattern = rf"{target_string} : \[code\]<I>(.*?)</I>\[/code\]"
        groups = []

        for row in df[column_name]:
            if isinstance(row, str): 
                match = re.search(group_pattern, row)
                if match:
                    group_list = match.group(1).split(",")  
                    groups.extend([group.strip() for group in group_list]) 

        
        group_counts = Counter(groups)

       
        group_data = pd.DataFrame(group_counts.items(), columns=['Group_name', 'Number of occurrences'])
        group_data = group_data.sort_values(by='Number of occurrences', ascending=False)  

        
        with open(output_file, "w") as f:
            f.write(group_data.to_string(index=False))  

        print(f"Group counts successfully saved to {output_file}.")

    except Exception as e:
        print(f"An error occurred: {e}")


if __name__ == "__main__":
    
    input_file = "coding challenge.csv"  
    column_name = "Additional comments" 
    target_string = "Groups"  
    output_file = "group_counts.txt"

    
    extract_groups(input_file, column_name, target_string, output_file)


Group counts successfully saved to group_counts.txt.
