In [4]:
import csv
import re
import json
from typing import Dict, List, Any

class ComprehensiveSummaryGenerator:
    def __init__(self, file_path: str):
        """
        Initialize the comprehensive summary generator.

        :param file_path: Path to the CSV file to be summarized
        """
        self.file_path = file_path
        self.summary_data: Dict[str, Any] = {}

    def _extract_main_definition(self, text: str) -> str:
        """
        Extract the main definition from the text.

        :param text: Input text
        :return: Main definition sentence
        """
        # Look for sentences that start with definitive language
        definition_sentences = re.findall(
            r'^(The\s+\w+\s+is\s+[^.]+\.|[^.]+\s+is\s+defined\s+as\s+[^.]+\.)',
            text,
            re.MULTILINE | re.IGNORECASE
        )

        # Return the first definition or a truncated version of the text
        return definition_sentences[0] if definition_sentences else text[:300]

    def _extract_structural_components(self, text: str) -> Dict[str, List[str]]:
        """
        Extract structural components from the text.

        :param text: Input text
        :return: Dictionary of structural components
        """
        # Regex patterns to find structural components
        patterns = {
            'Organs': [
                r'(\w+\s+organ[s]?)',
                r'([\w\s]+\s+system\s+organ[s]?)',
                r'([\w\s]+\s+located\s+in\s+the\s+\w+)'
            ],
            'Muscles': [
                r'([\w\s]+\s+muscle[s]?)',
                r'(muscle[s]?\s+that\s+[^.]+)'
            ],
            'Systems': [
                r'([\w\s]+\s+system)',
                r'(system[s]?\s+that\s+[^.]+)'
            ]
        }

        structural_components = {}
        for category, pattern_list in patterns.items():
            components = []
            for pattern in pattern_list:
                found = re.findall(pattern, text, re.IGNORECASE)
                components.extend([item.strip() for item in found])

            # Remove duplicates and truncate
            structural_components[category] = list(dict.fromkeys(components))[:10]

        return structural_components

    def _extract_keywords(self, text: str, max_keywords: int = 20) -> List[str]:
        """
        Extract keywords from the text.

        :param text: Input text to extract keywords from
        :param max_keywords: Maximum number of keywords to extract
        :return: List of keywords
        """
        # Remove common words and punctuation
        words = re.findall(r'\b\w+\b', text.lower())

        # Remove very common words
        stop_words = set(['the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by'])
        filtered_words = [word for word in words if word not in stop_words and len(word) > 2]

        # Count word frequencies
        word_freq = {}
        for word in filtered_words:
            word_freq[word] = word_freq.get(word, 0) + 1

        # Sort by frequency and take top keywords
        keywords = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)[:max_keywords]

        return [word for word, freq in keywords]

    def generate_comprehensive_summary(self) -> Dict[str, Any]:
        """
        Generate a comprehensive summary from the CSV file.

        :return: Dictionary containing comprehensive summary information
        """
        # Read the CSV file
        with open(self.file_path, 'r', encoding='utf-8') as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
                # Assuming the CSV has 'Title' and 'Content' columns
                title = row.get('Title', 'Untitled')
                content = row.get('Content', '')

                # Generate comprehensive summary
                self.summary_data = {
                    'title': title,
                    'main_definition': self._extract_main_definition(content),
                    'structural_components': self._extract_structural_components(content),
                    'keywords': self._extract_keywords(content),
                    'document_metadata': {
                        'total_characters': len(content),
                        'total_words': len(content.split())
                    }
                }

                # Break after first row (assuming single document)
                break

        return self.summary_data

    def export_summary(self, output_format: str = 'json') -> str:
        """
        Export the generated summary to a specified format.

        :param output_format: Output format ('json' or 'text')
        :return: Formatted summary string
        """
        if not self.summary_data:
            self.generate_comprehensive_summary()

        if output_format == 'json':
            return json.dumps(self.summary_data, indent=2)
        elif output_format == 'text':
            summary_text = f"Title: {self.summary_data['title']}\n\n"

            summary_text += "Main Definition:\n"
            summary_text += f"{self.summary_data['main_definition']}\n\n"

            summary_text += "Structural Components:\n"
            for category, components in self.summary_data['structural_components'].items():
                summary_text += f"{category}:\n"
                for component in components:
                    summary_text += f"- {component}\n"
                summary_text += "\n"

            summary_text += "Keywords:\n"
            summary_text += ", ".join(self.summary_data['keywords'])

            summary_text += f"\n\nDocument Metadata:\n"
            summary_text += f"Total Characters: {self.summary_data['document_metadata']['total_characters']}\n"
            summary_text += f"Total Words: {self.summary_data['document_metadata']['total_words']}"

            return summary_text
        else:
            raise ValueError("Unsupported output format. Use 'json' or 'text'.")

# Example usage
def main():
    # Create a comprehensive summary generator
    generator = ComprehensiveSummaryGenerator('/content/summarized_Abdomen.csv')

    # Generate and print summary
    print(generator.export_summary('text'))

if __name__ == '__main__':
    main()

Title: Abdomen

Main Definition:
The abdomen is the front part of the torso between the thorax chest and pelvis in humans and in other vertebrates.

Structural Components:
Organs:
- of organs
- most organs
- digestive organs
- abdominal organs
- pelvic organs
- retroperitoneal organs
- inner organs
- the organs
- an organ
- Abdominal organs

Muscles:
- The abdomen is a large body cavity enclosed by the abdominal muscles
- The transverse abdominal muscle
- The muscle
- rectus abdominis is enclosed in a thick sheath formed by fibers from each of the three muscles
- The pyramidalis muscle
- The abdominal muscles assist as muscles
- These muscles
- Together with the back muscles
- The transverse abdominis muscle is the deepest muscle
- rectus abdominis is the muscle

Systems:
- The abdomen in vertebrates contains a number of organs belonging to the digestive system and urinary system
- The abdominal cavity contains most organs of the digestive system
- Other digestive organs are known as t

In [3]:
import csv
import re
import json
from typing import Dict, List, Any

class CSVSummaryGenerator:
    def __init__(self, file_path: str):
        """
        Initialize the summary generator with a CSV file.

        :param file_path: Path to the CSV file to be summarized
        """
        self.file_path = file_path
        self.summary_data: Dict[str, Any] = {}

    def _extract_key_points(self, text: str, max_points: int = 10) -> List[str]:
        """
        Extract key points from a given text.

        :param text: Input text to extract points from
        :param max_points: Maximum number of points to extract
        :return: List of key points
        """
        # Remove extra whitespaces and split into sentences
        sentences = re.findall(r'[^.!?]+[.!?]', text)

        # Score sentences based on length and potential importance
        scored_sentences = [
            (sentence.strip(),
             len(sentence.split()) * (1 if 'important' in sentence.lower() or 'key' in sentence.lower() else 0.5)
            ) for sentence in sentences
        ]

        # Sort by score and take top points
        key_points = [sent for sent, _ in sorted(scored_sentences, key=lambda x: x[1], reverse=True)[:max_points]]

        return key_points

    def _extract_keywords(self, text: str, max_keywords: int = 15) -> List[str]:
        """
        Extract keywords from the text.

        :param text: Input text to extract keywords from
        :param max_keywords: Maximum number of keywords to extract
        :return: List of keywords
        """
        # Remove common words and punctuation
        words = re.findall(r'\b\w+\b', text.lower())

        # Remove very common words
        stop_words = set(['the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by'])
        filtered_words = [word for word in words if word not in stop_words and len(word) > 2]

        # Count word frequencies
        word_freq = {}
        for word in filtered_words:
            word_freq[word] = word_freq.get(word, 0) + 1

        # Sort by frequency and take top keywords
        keywords = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)[:max_keywords]

        return [word for word, freq in keywords]

    def generate_summary(self) -> Dict[str, Any]:
        """
        Generate a structured summary from the CSV file.

        :return: Dictionary containing summary information
        """
        # Read the CSV file
        with open(self.file_path, 'r', encoding='utf-8') as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
                # Assuming the CSV has 'Title' and 'Content' columns
                title = row.get('Title', 'Untitled')
                content = row.get('Content', '')

                # Generate summary
                self.summary_data = {
                    'title': title,
                    'key_points': self._extract_key_points(content),
                    'keywords': self._extract_keywords(content),
                    'length': {
                        'total_characters': len(content),
                        'total_words': len(content.split())
                    },
                    'raw_text': content[:500] + '...' if len(content) > 500 else content
                }

                # Break after first row (assuming single document)
                break

        return self.summary_data

    def export_summary(self, output_format: str = 'json') -> str:
        """
        Export the generated summary to a specified format.

        :param output_format: Output format ('json' or 'text')
        :return: Formatted summary string
        """
        if not self.summary_data:
            self.generate_summary()

        if output_format == 'json':
            return json.dumps(self.summary_data, indent=2)
        elif output_format == 'text':
            summary_text = f"Title: {self.summary_data['title']}\n\n"
            summary_text += "Key Points:\n"
            for i, point in enumerate(self.summary_data['key_points'], 1):
                summary_text += f"{i}. {point}\n"

            summary_text += "\nKeywords:\n"
            summary_text += ", ".join(self.summary_data['keywords'])

            summary_text += f"\n\nDocument Length:\n"
            summary_text += f"Total Characters: {self.summary_data['length']['total_characters']}\n"
            summary_text += f"Total Words: {self.summary_data['length']['total_words']}"

            return summary_text
        else:
            raise ValueError("Unsupported output format. Use 'json' or 'text'.")

# Example usage
def main():
    # Create a summary generator
    generator = CSVSummaryGenerator('/content/summarized_Abdomen.csv')

    # Generate and print summary in JSON format
    print(generator.export_summary('json'))

    # Alternatively, print summary in text format
    # print(generator.export_summary('text'))

if __name__ == '__main__':
    main()

{
  "title": "Abdomen",
  "key_points": [
    "Other digestive organs are known as the accessory digestive organs and include the liver its attached gallbladder and the pancreas and these communicate with the rest of the system via various ducts.",
    "Together with the back muscles they provide postural support and are important in defining the form.",
    "The third line is called the intertubercular line and runs across between the two rough tubercles which can be felt on the outer lip of the crest of the ilium.",
    "The abdomen is a large body cavity enclosed by the abdominal muscles at the front and to the sides and by part of the vertebral column at the back.",
    "It is important to properly exercise the abdominal muscles together with the back muscles.",
    "The Protura do have rudimentary leglike appendages on the first three abdominal segments and Archaeognatha possess small articulated styli which are sometimes considered to be rudimentary appendages.",
    "abdominal m