In [17]:
def data_filtering(df):
        """
        This function filters the input DataFrame based on specific conditions and returns two DataFrames: one for patent-related data and another for trademark-related data.

        Parameters:
        - df (DataFrame): The input DataFrame to be filtered.

        Returns:
        - patent_df (DataFrame): A DataFrame containing patent-related data after filtering.
        - trademark_df (DataFrame): A DataFrame containing trademark-related data after filtering.

        The filtering process involves the following steps:
        1. Remove rows where 'DeadlineType' is 'Internal Deadline'.
        2. Iterate over each row to check for specific conditions in 'Owner', 'Matter.Title', 'Matter.Type', and 'TaskType' columns.
        3. If any of the conditions are met, the index of that row is added to a list of indexes to be filtered.
        4. Drop rows based on the list of indexes.
        5. Create a patent_df by dropping 'Matter.Title' column from the filtered DataFrame.
        6. Create a trademark_df as a copy of the filtered DataFrame.
        """
        # Removing the internal deadlines
        df = df[df['DeadlineType'] != 'Internal Deadline']
        df = df.reset_index(drop=True)

        indexes_filtered = []

        # Iterating over the Owner column to look for Brittany Steele
        for i in range(len(df)):

            # Extracting the owner and the Matter title
            owner = df.at[i,'Owner']
            matter_title = df.at[i,'Matter.Title']
            matter_type = df.at[i, 'Matter.Type']
            task_type = df.at[i,'TaskType']
            
            if owner == 'Brittany Steele' or owner == 'Faisal Khan' or 'FTO' in matter_title or 'review' in task_type.lower() or 'trademark: opposition' in matter_type.lower():
                
                # Adding the index
                indexes_filtered.append(i)

        df = df.drop(indexes_filtered)
        df = df.reset_index(drop=True)

        # First layer of bifurcation
        trademark_df = df[df['Matter.Type'].str.lower() == 'trademark']
        patent_urgnent_nhd_df = df.drop(trademark_df.index)
        return trademark_df,patent_urgnent_nhd_df

        # patent_df = df.drop(['Matter.Title'],axis=1)
        # trademark_df = df.copy()
        # patent_df.to_excel("data/patent_data.xlsx", index=False)
        # trademark_df.to_excel("data/trademark.xlsx", index=False)
        # return patent_df,trademark_df

In [18]:
def bifurcation(df):
    eed_df = df[df['DeadlineType'] == 'Extendable External Deadline']
    hed_df = df[df['DeadlineType'] == 'Hard External Deadline']  
    return eed_df,hed_df

In [24]:
import pandas as pd
df = pd.read_csv(r"C:\Users\yuvraj.s_maxval\Documents\GitHub\AC_Automation\artifacts\AppCollTasksVisible20241129.csv",encoding='latin')
df = df.fillna("Missing Data")

trademark_df,patent_urgnent_nhd_df = data_filtering(df)