In [1]:
import pandas as pd
import re
from threading import Thread

In [2]:
def remove_formatting(text):
    """
    Removes formatting characters (such as punctuation, special characters) from the text.
    
    Parameters:
    text (str): The input text string.
    
    Returns:
    str: The text with formatting characters removed.
    """
    # Define the regex pattern to match punctuation and special characters
    pattern = r'[^\w\s]'
    # Remove formatting characters
    cleaned_text = re.sub(pattern, '', text)
    return cleaned_text

In [15]:
def check_and_format_text(original_text, new_text):
    """
    Compares original_text and new_text to identify formatting differences.
    If new words are found in new_text that are not present in original_text,
    those words are removed from new_text.
    
    Parameters:
    original_text (str): The original text string.
    new_text (str): The new text string with potential formatting differences.
    
    Returns:
    str: The new_text with any new words removed.
    """
    original_text_clean = remove_formatting(original_text)
    new_text_clean = remove_formatting(new_text)
    
    original_words = set(original_text_clean.split())
    new_words = new_text_clean.split()
    
    # # Filter out new words that are not in the original text
    # filtered_new_words = [word for word in new_words if word in original_words]
    # print([word for word in new_words if word not in original_words] )
    
    # # Join the filtered words to form the final text
    # filtered_new_text = ' '.join(filtered_new_words)
    
    # return filtered_new_text

    # Identify words to keep based on original text
    filtered_words = [word for word in new_words if word in original_words]
    
    # Create a regex pattern to match words to keep
    pattern = r'\b(' + '|'.join(re.escape(word) for word in filtered_words) + r')\b'

    filtered_new_text = ' '.join(filtered_words)
    
    # Preserve original formatting by replacing words not in filtered_words with an empty string
    formatted_new_text = re.sub(pattern, lambda match: match.group(0), filtered_new_text)
    
    return formatted_new_text


In [13]:

def update_cell(df,df2, col, row_idx):
    original_text = df.at[row_idx, col]
    modified_text = df2.at[row_idx,col]
    try:
        new_text = check_and_format_text(original_text,modified_text)
        if new_text == modified_text:
            if(col == "Medium Homework"):
                print('True')
                print(f"new text {new_text}\n\nmodified text {modified_text}")
        else:
            print(f"new text {new_text}\n\nmodified text {modified_text}")
    except Exception as e:
        print(f"Error updating row {row_idx}, column '{col}': {e}")

In [8]:
def update_column(df,df2, col):
    for row_idx in range(9):
        # print(row_idx)
        update_cell(df,df2, col, row_idx)
        df[col] = df[col].apply(lambda x: x.replace('\n', '\\n') if isinstance(x, str) else x)

In [17]:
row = [0,1,5,6,8,9,12,13,15,18,19,20,21,22,23,26,27,28,29,30,33,34,35,36,39,40,41,42,43,46,50,51,53,54,55,56,57,58,60,61,62,64,65,66,67,68,69]

In [20]:
print(row[0:13])

[0, 1, 5, 6, 8, 9, 12, 13, 15, 18, 19, 20, 21]


In [6]:
def main(file_path, file_path_modified , columns):
    df = pd.read_excel(file_path)
    df2 = pd.read_excel(file_path_modified)
    # threads = []
    
    for col in columns:
        update_column(df,df2,col)
    #     thread = Thread(target=update_column, args=(df, df2, col))
    #     threads.append(thread)
    #     thread.start()
    
    # for thread in threads:
    #     thread.join()
    
    # # Use xlsxwriter to write the DataFrame to Excel
    # writer = pd.ExcelWriter(output_file_path, engine='xlsxwriter')
    # df.to_excel(writer, index=False, sheet_name='Sheet1')
    # writer.close()
    print("Processing complete and output file saved")

In [16]:
if __name__ == "__main__":
    file_path = 'lta.xlsx' 
    columns =  [ 'Summary',
       'Home work', 'Easy Homework', 'Medium Homework', 'Hard Homework',
       'Classwork', 'Easy Classwork', 'Medium Classwork', 'Hard Classwork',
       'Assessment', 'Easy Assessment', 'Medium Assessment', 'Hard Assessment',
       'Training references', 'Real world scenarios', 'Lesson Plan',
       'Test/Exam']
    file_path_modified = 'output.xlsx' 
    main(file_path,file_path_modified, columns)

new text Poem Icecream Man by Rachel Field is a poem that describes the arrival of the icecream man in the summer bringing joy and relief from the heat through his colorful and flavorful ice creams The poem captures the essence of childhood summers and the simple pleasures of enjoying an ice The poem uses vivid imagery to bring the scene to life such as bricks a blaze of heat cooling brown and white and roses and sweet peas These phrases help paint a mental picture of the hot summer day cooled by the delightful sight of ice and The poem features a consistent rhyming scheme that adds a musical quality to the reading making it more engaging and enjoyable for young readers The rhyme scheme helps to emphasize the playful and lighthearted tone of the poem

modified text **Poem Appreciation:**\nSummary: "Ice-cream Man" by Rachel Field is a poem that describes the arrival of the ice-cream man in the summer, bringing joy and relief from the heat through his colorful and flavorful ice creams. T