In [6]:
# !pip install --upgrade scikit-learn==1.3.2


In [50]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from sklearn.decomposition import TruncatedSVD
from joblib import dump, load
import sys

# from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
# smote = SMOTE()

import nltk
import pandas as pd
import string
import re

In [57]:
import nltk
import pandas as pd
import string
import re
from nltk.stem import WordNetLemmatizer
from nltk.corpus import wordnet


# twelve test questions 
# url = 'https://docs.google.com/spreadsheets/d/18Sd81YVm8CTNTEsvs-lUOujExNg8Xw1MqEwAu-MGsNY/export?format=csv'

# #two other test questions
# url = 'https://docs.google.com/spreadsheets/d/1srQExpxj8Xw2kKHCpuI61U8ewJMyY6Mc/export?format=csv'


# Read the CSV data into a DataFrame
# df = pd.read_csv(url)
# df = df.iloc[[0]]  # Use the first row for testing


df = pd.read_csv('Dataset/r12_problems.csv')

# Download NLTK resources
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')
nltk.download('omw-1.4')

# Initialize the WordNet lemmatizer
lemmatizer = WordNetLemmatizer()

# Function to map Treebank POS tags to WordNet POS tags
def get_wordnet_pos(treebank_tag):
    """
    Map POS tag to a format accepted by WordNet lemmatizer.
    """
    if treebank_tag.startswith('J'):
        return wordnet.ADJ
    elif treebank_tag.startswith('V'):
        return wordnet.VERB
    elif treebank_tag.startswith('N'):
        return wordnet.NOUN
    elif treebank_tag.startswith('R'):
        return wordnet.ADV
    else:
        return wordnet.NOUN

# Function to extract class-attribute mappings from the attribute string
def extract_class_attribute_mapping(attribute_string):
    class_attribute_mapping = {}
    # Find all class-attribute groups in the format 'ClassName [attributes]'
    class_attribute_groups = re.findall(r'(\w+)\s*\[([^\]]+)\]', attribute_string)
    for group in class_attribute_groups:
        class_name, attributes = group
        # Lemmatize the class name
        class_name_lem = lemmatizer.lemmatize(class_name.lower(), pos='n')
        attributes_list = [attr.strip() for attr in attributes.split(',')]
        # Lemmatize attributes
        attributes_lem = [lemmatizer.lemmatize(attr.lower(), pos='n') for attr in attributes_list]
        class_attribute_mapping[class_name_lem] = attributes_lem
    return class_attribute_mapping

# Function to parse relationships between classes
def parse_relationships(relationship_string):
    relationships = []
    for rel in relationship_string.split(','):
        rel = rel.strip()
        if 'and' in rel:
            class_pair = tuple(map(str.strip, rel.split('and')))
            # Lemmatize class names in relationships
            class_pair_lem = tuple(lemmatizer.lemmatize(cls.lower(), pos='n') for cls in class_pair)
            relationships.append(class_pair_lem)
    return relationships

# Global sentence counter
global_sentence_counter = 0

# Function to tag words in the problem text as 'Class', 'Attribute', or 'Other'
def tag_problem_classes_and_attributes(problem_number, problem, class_attribute_mapping, class_list_lem, relationships):
    global global_sentence_counter  # Reference the global sentence counter

    # Lists to store the tagging results
    problems = []
    problem_numbers = []
    sentence_numbers = []
    sent_list = []
    word_list = []
    pos_list = []
    tag_list = []
    class_related_list = []
    class_r_list = []

    # Tokenize the problem into sentences
    sentences = nltk.sent_tokenize(problem)
    for sentence in sentences:
        global_sentence_counter += 1  # Increment global sentence counter
        words = nltk.word_tokenize(sentence)
        # Remove punctuation from words
        words = [word for word in words if word.lower() not in string.punctuation]
        # POS tagging
        pos_tags = nltk.pos_tag(words)

        lemmatized_words = []
        # Lemmatize words
        for word, pos in pos_tags:
            wordnet_pos = get_wordnet_pos(pos)
            lemmatized_word = lemmatizer.lemmatize(word.lower(), pos=wordnet_pos)
            lemmatized_words.append(lemmatized_word)

        lemmatized_sentence = ' '.join(lemmatized_words)

        for word, lemmatized_word, pos in zip(words, lemmatized_words, [p for w, p in pos_tags]):
            problem_numbers.append(problem_number)
            sentence_numbers.append(f"Sentence: {global_sentence_counter}")
            problems.append(problem)
            sent_list.append(lemmatized_sentence)
            word_list.append(lemmatized_word)
            pos_list.append(pos)

            tag = 'Other'
            found_class = 'Other'
            found_relationship = 'Other'

            # Check if the word is an attribute
            attribute_found = False
            for class_name, attributes in class_attribute_mapping.items():
                if lemmatized_word in attributes:
                    tag = "Attribute"
                    found_class = class_name
                    attribute_found = True
                    break

            # If not an attribute, check if the word is a class
            if not attribute_found:
                if lemmatized_word in class_list_lem:
                    tag = "Class"
                    found_class = lemmatized_word
                    # Find related class from relationships
                    for rel in relationships:
                        if found_class in rel:
                            found_relationship = rel[1] if rel[0] == found_class else rel[0]
                            break

            class_related_list.append(found_class if found_class != 'Other' else "")
            class_r_list.append(found_relationship if found_relationship != 'Other' else "")
            tag_list.append(tag)

    # Create a DataFrame with the results
    df = pd.DataFrame({
        'Problem_Number': problem_numbers,
        'Sentence #': sentence_numbers,
        'Problem': problems,
        'Sentence': sent_list,
        'Word': word_list,
        'POS': pos_list,
        'Tag': tag_list,
        'Class_Related': class_related_list,
        'Class_R': class_r_list
    })

    return df

tagged_data_list = []

# Iterate over the DataFrame rows to process each problem
for index, row in df.iterrows():
    problem_number = index + 1
    problem_text = row['Problem']
    attribute_string = row['Atributes']  # Column name 'Atributes'
    relationship_string = row.get('Relationship', '')
    class_list_string = row['Class']

    # Extract and lemmatize class names
    class_list = [cls.strip() for cls in class_list_string.split(',')]
    class_list_lem = [lemmatizer.lemmatize(cls.lower(), pos='n') for cls in class_list]

    # Extract class-attribute mappings and relationships
    class_attribute_mapping = extract_class_attribute_mapping(attribute_string)
    relationships = parse_relationships(relationship_string)

    # Tag the problem text
    tagged_df = tag_problem_classes_and_attributes(
        problem_number, problem_text, class_attribute_mapping, class_list_lem, relationships)
    tagged_data_list.append(tagged_df)

# Concatenate all tagged data into a single DataFrame
final_tagged_df = pd.concat(tagged_data_list, ignore_index=True)

# Reorder and clean up the columns
final_tagged_df = final_tagged_df[['Problem_Number', 'Sentence #', 'Problem', 'Sentence', 'Word', 'POS', 'Tag', 'Class_Related', 'Class_R']]
final_tagged_df['Class_Related'] = final_tagged_df['Class_Related'].replace('', 'Other')
final_tagged_df['Class_R'] = final_tagged_df['Class_R'].replace('', 'Other')

# Display the DataFrame




[nltk_data] Downloading package punkt to /home/abdul/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /home/abdul/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package wordnet to /home/abdul/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /home/abdul/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


In [52]:
final_tagged_df

Unnamed: 0,Problem_Number,Sentence #,Problem,Sentence,Word,POS,Tag,Class_Related,Class_R
0,1,Sentence: 1,The Stroke Recovery System shall have a compre...,the stroke recovery system shall have a compre...,the,DT,Other,Other,Other
1,1,Sentence: 1,The Stroke Recovery System shall have a compre...,the stroke recovery system shall have a compre...,stroke,NNP,Other,Other,Other
2,1,Sentence: 1,The Stroke Recovery System shall have a compre...,the stroke recovery system shall have a compre...,recovery,NNP,Other,Other,Other
3,1,Sentence: 1,The Stroke Recovery System shall have a compre...,the stroke recovery system shall have a compre...,system,NNP,Other,Other,Other
4,1,Sentence: 1,The Stroke Recovery System shall have a compre...,the stroke recovery system shall have a compre...,shall,MD,Other,Other,Other
...,...,...,...,...,...,...,...,...,...
439,2,Sentence: 23,Researcher is empowered to view finding aids f...,the extensive requirement be design to fulfil ...,promote,VBG,Other,Other,Other
440,2,Sentence: 23,Researcher is empowered to view finding aids f...,the extensive requirement be design to fulfil ...,collaboration,NN,Other,Other,Other
441,2,Sentence: 23,Researcher is empowered to view finding aids f...,the extensive requirement be design to fulfil ...,usability,NN,Other,Other,Other
442,2,Sentence: 23,Researcher is empowered to view finding aids f...,the extensive requirement be design to fulfil ...,and,CC,Other,Other,Other


In [58]:
#preprocessing a single requirment problem for prediction

from typing_extensions import final

def process_requirments(one_question):
     #Iterate over the rows in the DataFrame
    df = one_question
    tagged_data_list = []

    # Iterate over the rows in the DataFrame
    for index, row in df.iterrows():
        problem_number = index + 1
        problem_text = row['Problem']
        attribute_string = row['Atributes']  # Corrected column name
        relationship_string = row.get('Relationship', '')

        class_attribute_mapping = extract_class_attribute_mapping(attribute_string)
        relationships = parse_relationships(relationship_string)

        tagged_df = tag_problem_classes_and_attributes(problem_number, problem_text, class_attribute_mapping, relationships)
        tagged_data_list.append(tagged_df)

    # Concatenate all the tagged data into a single DataFrame
    final_tagged_df = pd.concat(tagged_data_list, ignore_index=True)

    # Reorder the columns
    final_tagged_df = final_tagged_df[['Problem_Number', 'Sentence #', 'Problem', 'Sentence', 'Word', 'POS', 'Tag', 'Class_Related', 'Class_R']]


    final_tagged_df['Class_Related'] = final_tagged_df['Class_Related'].replace('', 'Other')
    final_tagged_df['Class_R'] = final_tagged_df['Class_R'].replace('', 'Other')

    # final_tagged_df = final_tagged_df.applymap(lambda x: x.lower() if isinstance(x, str) else x)

    return final_tagged_df

    # Save to a new CSV file or display the DataFrame
    # final_tagged_df.to_csv('tagged_problems.csv', index=False)


In [54]:
# one_question = '''
# #
# sma ekjgkw wgewflewjgwlkow wfopewkfpw

# '''

In [55]:
# final_tagged_df = process_requirments(one_question)

In [56]:
# import nltk
# import pandas as pd
# from nltk.corpus import stopwords

# # Assume final_tagged_df is already created from previous steps
# # If not, make sure to run the code to generate final_tagged_df

# # Download NLTK stopwords corpus
# nltk.download('stopwords')

# # Create a set of English stop words
# stop_words = set(stopwords.words('english'))

# # Convert words to lowercase for case-insensitive comparison
# final_tagged_df['Word_lower'] = final_tagged_df['Word'].str.lower()

# # Create a mask to filter out stop words
# mask = ~final_tagged_df['Word_lower'].isin(stop_words)

# # Filter the DataFrame
# filtered_df = final_tagged_df[mask].copy()

# # Drop the temporary column
# filtered_df.drop(columns=['Word_lower'], inplace=True)

# # Optional: Reset index if needed
# filtered_df.reset_index(drop=True, inplace=True)

# # Optional: Compare the number of 'Other' tags
# original_other_count = final_tagged_df[final_tagged_df['Tag'] == 'Other'].shape[0]
# filtered_other_count = filtered_df[filtered_df['Tag'] == 'Other'].shape[0]

# print(f"Original 'Other' tags count: {original_other_count}")
# print(f"Filtered 'Other' tags count: {filtered_other_count}")


# # Display the filtered DataFrame
# print(filtered_df.head())


In [33]:
# filtered_df

In [60]:

tag_model = load('model/tag_model.joblib')
class_related_model = load('model/class_related_model.joblib')
class_r_model = load('model/class_r_model.joblib')

In [35]:
# final_tagged_df

In [61]:
# Import accuracy_score from scikit-learn
from sklearn.metrics import accuracy_score, classification_report
import pandas as pd

# Create a list to hold each classification report's results
results = []
Dataframe_results = pd.DataFrame()  # Initialize an empty DataFrame to hold all the predicted data

def predict_class_attribute(parsed_data, rq_label, problem_number):
    global Dataframe_results
    
    # Model to predict Class_Attribute
    predicted_data = parsed_data[['Sentence', 'Word', 'POS']].copy()
    predicted_data.loc[:, 'Tag'] = tag_model.predict(parsed_data[['Sentence', 'Word', 'POS']])
    
    # Generate classification report for Class_Attribute
    report = classification_report(parsed_data['Tag'], predicted_data['Tag'], output_dict=True, zero_division=1)
    
    # Calculate accuracy for Class_Attribute
    accuracy_attr = accuracy_score(parsed_data['Tag'], predicted_data['Tag'])
    
    # Predict Class_Related using Predicted_Tag
    predicted_data.loc[:, 'Class_Related'] = class_related_model.predict(predicted_data[['Sentence', 'Word', 'POS', 'Tag']])
    
    # Generate classification report for Class_Related
    report_related = classification_report(parsed_data['Class_Related'], predicted_data['Class_Related'], output_dict=True, zero_division=1)
    
    # Calculate accuracy for Class_Related
    accuracy_related = accuracy_score(parsed_data['Class_Related'], predicted_data['Class_Related'])
    
    # Predict Class_R using Predicted_Tag and Predicted_Class_Related
    predicted_data.loc[:, 'Class_R'] = class_r_model.predict(predicted_data[['Sentence', 'Word', 'POS', 'Tag', 'Class_Related']])
    
    # Add a column with the problem number for this iteration
    predicted_data['Problem_Number'] = problem_number
    
    # Concatenate the current problem's predicted data to the global DataFrame
    Dataframe_results = pd.concat([Dataframe_results, predicted_data], ignore_index=True)
    
    # Generate classification report for Class_R
    report_r = classification_report(parsed_data['Class_R'], predicted_data['Class_R'], output_dict=True, zero_division=1)
    
    # Calculate accuracy for Class_R
    accuracy_r = accuracy_score(parsed_data['Class_R'], predicted_data['Class_R'])
    
    # Append the metrics for each class (Attribute, Class_Related, Class_R) to the results list
    results.append({
        'rq': rq_label,
        'Class_Attribute_Accuracy': accuracy_attr,
        'Class_Attribute_Precision_Macro': report['macro avg']['precision'],
        'Class_Attribute_Recall_Macro': report['macro avg']['recall'],
        'Class_Attribute_F1-Macro': report['macro avg']['f1-score'],
        
        'Class_Related_Accuracy': accuracy_related,
        'Class_Related_Precision_Macro': report_related['macro avg']['precision'],
        'Class_Related_Recall_Macro': report_related['macro avg']['recall'],
        'Class_Related_F1-Macro': report_related['macro avg']['f1-score'],
        
        'Class_R_Accuracy': accuracy_r,
        'Class_R_Precision_Macro': report_r['macro avg']['precision'],
        'Class_R_Recall_Macro': report_r['macro avg']['recall'],
        'Class_R_F1-Macro': report_r['macro avg']['f1-score']
    })

# Loop over all unique problem numbers in the DataFrame
unique_problem_numbers = final_tagged_df['Problem_Number'].unique()

# Iterate over each unique problem number
for problem_number in unique_problem_numbers:
    # Select rows with the current problem number
    parsed_data = final_tagged_df[final_tagged_df['Problem_Number'] == problem_number]
    
    # Label for the current problem
    rq_label = f"rq{problem_number}"
    
    # Call the prediction function for the current problem
    predict_class_attribute(parsed_data, rq_label, problem_number)

# Convert the results list into a DataFrame
df_results = pd.DataFrame(results)

# Transpose the DataFrame so that each requirement (rq1, rq2, etc.) becomes the header
df_results_transposed = df_results.set_index('rq').transpose()

# Display the transposed DataFrame (optional)
# import ace_tools as tools; tools.display_dataframe_to_user(name="Transposed Classification Report Results", dataframe=df_results_transposed)

# Save Dataframe_results for future analysis
Dataframe_results.to_csv('predicted_results.csv', index=False)

# Save the classification report results for future analysis
df_results.to_csv('classification_report_results.csv', index=False)


In [62]:
df_results_transposed

rq,rq1,rq2,rq3,rq4,rq5,rq6,rq7,rq8,rq9,rq10,rq11,rq12
Class_Attribute_Accuracy,0.915888,0.988095,0.973684,0.921053,0.928571,0.975,0.890244,0.925234,0.994152,0.857143,0.925,0.982143
Class_Attribute_Precision_Macro,0.871035,0.994709,0.988701,0.857456,0.778275,0.961538,0.784375,0.882217,0.990741,0.815965,0.882206,0.969697
Class_Attribute_Recall_Macro,0.901282,0.980392,0.888889,0.882353,0.851111,0.98913,0.833862,0.891073,0.997396,0.839105,0.942308,0.990196
Class_Attribute_F1-Macro,0.881951,0.987232,0.927586,0.842639,0.805232,0.974067,0.74257,0.886484,0.993998,0.82677,0.906912,0.979152
Class_Related_Accuracy,0.906542,0.988095,0.973684,0.921053,0.928571,0.975,0.902439,0.934579,0.988304,0.857143,0.9375,0.982143
Class_Related_Precision_Macro,0.725321,0.996032,0.988701,0.660819,0.54915,0.685714,0.564732,0.632628,0.873016,0.544504,0.571429,0.75
Class_Related_Recall_Macro,0.849328,0.958333,0.944444,0.768519,0.827857,0.995342,0.798062,0.896601,0.962095,0.852041,0.986264,0.992647
Class_Related_F1-Macro,0.611522,0.975273,0.96395,0.463664,0.428872,0.696045,0.392788,0.537173,0.857684,0.457299,0.564214,0.746269
Class_R_Accuracy,0.897196,0.988095,1.0,0.921053,0.928571,0.975,0.902439,0.962617,0.988304,0.968254,0.975,1.0
Class_R_Precision_Macro,0.657738,0.995098,1.0,0.976923,0.468545,0.766667,0.726351,0.695833,0.714286,0.6,0.75,1.0


In [63]:
Dataframe_results

Unnamed: 0,Sentence,Word,POS,Tag,Class_Related,Class_R,Problem_Number
0,a company be comprise of two to eight department,a,DT,Other,Other,Other,1
1,a company be comprise of two to eight department,company,NN,Class,company,department,1
2,a company be comprise of two to eight department,be,VBZ,Other,Other,Other,1
3,a company be comprise of two to eight department,comprise,VBN,Other,Other,Other,1
4,a company be comprise of two to eight department,of,IN,Other,Other,Other,1
...,...,...,...,...,...,...,...
1143,these project can be classify a research proje...,project,NNS,Class,project,student,12
1144,these project can be classify a research proje...,or,CC,Other,Other,Other,12
1145,these project can be classify a research proje...,course-based,JJ,Other,Other,Other,12
1146,these project can be classify a research proje...,project,NNS,Class,project,student,12


In [47]:
# df_results_transposed .to_csv('RQ_results.csv')

In [66]:
import spacy
from collections import defaultdict
import pandas as pd

# Load the spaCy English language model
nlp = spacy.load('en_core_web_sm')

# Extend the Token class to include custom attributes if not already set
from spacy.tokens import Token

if not Token.has_extension('is_class'):
    Token.set_extension('is_class', default=False)
if not Token.has_extension('is_attribute'):
    Token.set_extension('is_attribute', default=False)

def parse_tagged_data(df):
    """
    Parses the DataFrame and groups words by sentence text (not sentence number).
    """
    sentences = defaultdict(list)
    for idx, row in df.iterrows():
        sentence_text = str(row['Sentence'])
        word = str(row['Word'])
        tag = str(row['Tag'])
        sentences[sentence_text].append({'word': word, 'tag': tag})
    return sentences

def process_sentences(sentences):
    class_attributes = defaultdict(set)
    class_relationships = set()

    for sentence_text, tokens in sentences.items():
        # Collect all classes and attributes in the sentence
        classes = [token['word'] for token in tokens if token['tag'] == 'Class']
        attributes = [token['word'] for token in tokens if token['tag'] == 'Attribute']

        # Create relationships between classes and attributes
        for cls in classes:
            for attr in attributes:
                class_attributes[cls.capitalize()].add(attr.capitalize())

        # Reconstruct the sentence text for processing in spaCy
        sentence_words = [token['word'] for token in tokens]
        reconstructed_sentence = ' '.join(sentence_words)
        doc = nlp(reconstructed_sentence)

        # For each class, check if there are relationships with other classes in the sentence
        for token in doc:
            if token.text.lower() in [cls.lower() for cls in classes]:
                subject_class = token.text.capitalize()

                # 1. Look for verbs connected to this class
                for child in token.children:
                    if child.pos_ == 'VERB':
                        verb = child
                        # Check for object classes connected to the verb
                        for obj in verb.children:
                            if obj.text.lower() in [cls.lower() for cls in classes] and obj != token:
                                object_class = obj.text.capitalize()
                                relationship = (subject_class, verb.lemma_, object_class)
                                class_relationships.add(relationship)

                # 2. Look for prepositions linking this class to others
                for prep in [child for child in token.children if child.dep_ == 'prep']:
                    for obj in prep.children:
                        if obj.text.lower() in [cls.lower() for cls in classes]:
                            relationship = (
                                subject_class,
                                prep.text,
                                obj.text.capitalize()
                            )
                            class_relationships.add(relationship)

                # 3. Look for conjunctions (e.g., "Project and Group")
                for conjunct in token.conjuncts:
                    if conjunct.text.lower() in [cls.lower() for cls in classes] and subject_class != conjunct.text.capitalize():
                        relationship = (
                            subject_class,
                            "and",
                            conjunct.text.capitalize()
                        )
                        class_relationships.add(relationship)

                # 4. Check for compound nouns (e.g., "funding group")
                if token.dep_ == 'compound':
                    compound_head = token.head
                    if compound_head.text.lower() in [cls.lower() for cls in classes] and subject_class != compound_head.text.capitalize():
                        relationship = (
                            subject_class,
                            "compound",
                            compound_head.text.capitalize()
                        )
                        class_relationships.add(relationship)

    # Remove redundant relationships (e.g., "Community compound Community")
    class_relationships = {
        (subj, rel, obj) for subj, rel, obj in class_relationships if subj != obj
    }

    return class_attributes, class_relationships

def list_classes_attributes_and_relationships():
    # Get all unique problem numbers
    unique_problem_numbers = Dataframe_results['Problem_Number'].unique()

    # Iterate over each problem number
    for problem_number in unique_problem_numbers:
        print(f"Processing Problem Number: {problem_number}")
        
        # Filter the data for the current problem, considering only rows where Tag is 'Class' or 'Attribute'
        problem_data = Dataframe_results[
            (Dataframe_results['Problem_Number'] == problem_number) &
            (Dataframe_results['Tag'].isin(['Class', 'Attribute']))
        ]
        
        # Parse the data into sentences
        sentences = parse_tagged_data(problem_data)
        
        # Process the sentences to get class attributes and relationships
        class_attributes, class_relationships = process_sentences(sentences)
        
        # Get the words tagged as 'Class'
        class_words = problem_data[problem_data['Tag'] == 'Class']['Word'].unique().tolist()
        
        # Get the words tagged as 'Attribute'
        attribute_words = problem_data[problem_data['Tag'] == 'Attribute']['Word'].unique().tolist()
        
        # Extract class-related and class-r relationships
        class_related_relationships = problem_data[problem_data['Class_Related'] != problem_data['Word']]
        class_related_words = list(zip(class_related_relationships['Class_Related'], class_related_relationships['Word']))

        # Exclude relationships where Class_R is 'Other'
        class_r_relationships = problem_data[
            (problem_data['Class_R'] != problem_data['Word']) & (problem_data['Class_R'] != 'Other')
        ]
        class_r_words = list(zip(class_r_relationships['Class_R'], class_r_relationships['Word']))
        
        # Output the classes and attributes
        print(f"\nClasses: {class_words}")
        print(f"Attributes: {attribute_words}")
        
        # Output the Class_Related relationships
        print("\nClass_Related relationships (Class -> Attribute):")
        for class_word, attribute_word in class_related_words:
            print(f"- {class_word} -> {attribute_word}")

        # Output the Class_R relationships, excluding 'Other'
        print("\nClass_R relationships (Class -> Class):")
        for class_word, related_class_word in class_r_words:
            print(f"- {class_word} -> {related_class_word}")
        
        # Output the attributes associated with each class
        print("\nAttributes associated with classes:")
        for cls, attrs in class_attributes.items():
            print(f"- {cls}: {', '.join(attrs)}")
        
        # Output the relationships between classes
        print("\nRelationships between classes:")
        for subj_class, verb, obj_class in class_relationships:
            print(f"- {subj_class} {verb} {obj_class}")
        
        

        # Print a separator between each problem
        print("\n" + "-"*50 + "\n")

# Call the function to process the DataFrame and list classes, attributes, and relationships
list_classes_attributes_and_relationships()


Processing Problem Number: 1

Classes: ['company', 'employee', 'project', 'production', 'education', 'community', 'budget', 'group']
Attributes: ['department', 'id', 'email', 'name', 'number', 'type', 'title', 'description', 'amount', 'deadline', 'code']

Class_Related relationships (Class -> Attribute):
- employee -> department
- employee -> department
- department -> id
- department -> email
- employee -> department
- employee -> name
- department -> email
- department -> id
- employee -> number
- group -> type
- project -> title
- project -> description
- project -> amount
- project -> deadline
- record -> code

Class_R relationships (Class -> Class):
- department -> company
- department -> employee
- department -> project
- department -> employee
- department -> project
- department -> employee
- department -> employee
- department -> employee
- department -> employee
- department -> employee
- department -> project
- project -> production
- department -> project
- department -> pr