In [1]:
import pandas as pd
import os

# File paths for the Excel files
file_DLL = r"D:\Bhuwan Data\Bhuwan\Desktop\Adhoc data of BPnA team\Sonali\Grouping\Deferred Lines List INDIAMART_SMALIK.xlsx"
file_DR = r"D:\Bhuwan Data\Bhuwan\Desktop\Adhoc data of BPnA team\Sonali\Grouping\Extracted Jan-25 DR.xlsb"
file_RR = r"D:\Bhuwan Data\Bhuwan\Desktop\Adhoc data of BPnA team\Sonali\Grouping\Revenue Reversal INDIAMART_SMALIK 2025-02-03T09_17_49.xlsx"
file_DRP = r"D:\Bhuwan Data\Bhuwan\Desktop\Adhoc data of BPnA team\Sonali\Grouping\Direct revenue Posting OT INDIAMART_SMALIK 2024-11-04T03_39_58.xlsx"

# List of the columns to read
columns_to_read = ['Renewal Entry No.', 'Approved Performa']

# Function to read Excel files safely
def read_excel_file(file_path, columns):
    try:
        if os.path.exists(file_path):  # Check if file exists
            if file_path.endswith('.xlsb'):
                return pd.read_excel(file_path, usecols=columns, engine='pyxlsb')
            else:
                return pd.read_excel(file_path, usecols=columns)
        else:
            print(f"⚠️ Warning: File not found - {file_path}")
            return None
    except Exception as e:
        print(f"⚠️ Error reading {file_path}: {e}")
        return None

# Read each file into a dataframe if it exists
dataframes = []

for file in [file_DLL, file_DR, file_RR, file_DRP]:
    df = read_excel_file(file, columns_to_read)
    if df is not None:
        dataframes.append(df)

# Check if we have data to concatenate
if dataframes:
    consol_df = pd.concat(dataframes, ignore_index=True)
    print('✅ All available files were loaded and concatenated successfully.')
else:
    consol_df = pd.DataFrame()  # Empty DataFrame if no files were loaded
    print('⚠️ No valid files found. Empty DataFrame created.')

# Print dataframe shape to verify
print(f"Final DataFrame shape: {consol_df.shape}")

✅ All available files were loaded and concatenated successfully.
Final DataFrame shape: (659138, 2)


In [3]:
# Print the number of duplicates before removing duplicates
duplicates_before = consol_df.duplicated().sum()
print(f"Number of duplicates before removing duplicates: {duplicates_before}")

# Remove duplicates from the consolidated dataframe
consol_df = consol_df.drop_duplicates()

# Remove rows with any NaN or blank values in any column
consol_df = consol_df.dropna()  # Removes rows with NaN values
consol_df = consol_df[~(consol_df.astype(str).apply(lambda x: x.str.strip()) == '').any(axis=1)]  # Removes blank values

# Print the number of duplicates after removing duplicates
duplicates_after = consol_df.duplicated().sum()
print(f"Number of duplicates after removing duplicates: {duplicates_after}")

# Display the consolidated dataframe
print(consol_df.head())

# Save unique cases after merging all DFs
output_path = r"D:\Bhuwan Data\Bhuwan\Desktop\Adhoc data of BPnA team\Sonali\Grouping\Unique_after_Stacked.csv"
consol_df.to_csv(output_path, index=False)

print(f'✅Done! File saved at {output_path}')

Number of duplicates before removing duplicates: 286757
Number of duplicates after removing duplicates: 0
   Renewal Entry No.  Approved Performa
0          2299943.0          1881940.0
1          3486879.0          2722964.0
2          3598174.0          2838382.0
3          3807460.0          3056590.0
4          4092785.0          3354887.0
✅Done! File saved at D:\Bhuwan Data\Bhuwan\Desktop\Adhoc data of BPnA team\Sonali\Grouping\Unique_after_Stacked.csv


In [7]:
# Count of rows after duplicates are removed
rows_after_deduplication = len(consol_df)
print(f"Number of rows after removing duplicates: {rows_after_deduplication}")

Number of rows after removing duplicates: 372380


In [9]:
# Define the file path
file_DL = r"D:\Bhuwan Data\Bhuwan\Desktop\Adhoc data of BPnA team\Sonali\Grouping\Deferred List INDIAMART_ANUJ 2025-02-03.xlsx"

# Read all sheets into a dictionary of DataFrames
DL_sheets = pd.read_excel(file_DL, sheet_name=None)  # Load all sheets

# Columns needed for lookup
columns_to_read = ['Renewal Entry No.', 'Service Group']

# Combine only the required columns from all sheets
DL_combined = pd.concat(
    [df[columns_to_read] for df in DL_sheets.values()], 
    ignore_index=True
)

# Fill NaN values before converting to int and string
consol_df['Renewal Entry No.'] = consol_df['Renewal Entry No.'].fillna(0).astype(int).astype(str)
consol_df['Approved Performa'] = consol_df['Approved Performa'].fillna(0).astype(int).astype(str)
DL_combined['Renewal Entry No.'] = DL_combined['Renewal Entry No.'].fillna(0).astype(int).astype(str)

# Perform the lookup and update 'Already Grouped?' with 'Service Group' values
consol_df = consol_df.merge(DL_combined, on='Renewal Entry No.', how='left')

# Replace missing values in 'Service Group' with '#NA'
consol_df['Service Group'] = consol_df['Service Group'].fillna('#NA')

# Rename column to 'Already Grouped?'
consol_df.rename(columns={'Service Group': 'Already Grouped?'}, inplace=True)

#Remove Duplicates
consol_df = consol_df.drop_duplicates()

# Display the updated dataframe
print(consol_df.head())

#Save consol_df
consol_df.to_csv(r"D:\Bhuwan Data\Bhuwan\Desktop\Adhoc data of BPnA team\Sonali\Grouping\Consol_df.csv", index=False)
print('✅Consol DF saved')

  Renewal Entry No. Approved Performa          Already Grouped?
0           2299943           1881940  SUBSCRIPTION-CATALOG-MYR
1           3486879           2722964                    OTHERS
2           3598174           2838382                    OTHERS
3           3807460           3056590                    OTHERS
4           4092785           3354887  SUBSCRIPTION-LISTING-MYR
✅Consol DF saved


In [11]:
# Filter only rows where 'Already Grouped?' is '#NA'
consol_df_NA = consol_df[consol_df['Already Grouped?'] == '#NA'][['Approved Performa', 'Already Grouped?']]

# Remove duplicates
consol_df_NA = consol_df_NA.drop_duplicates()

#Count of #NA Cases
consol_df_NA.count()

# Save the filtered dataframe
na_output_path = r"D:\Bhuwan Data\Bhuwan\Desktop\Adhoc data of BPnA team\Sonali\Grouping\Consol_df_NA_Cases.csv"
consol_df_NA.to_csv(na_output_path, index=False)

print(f'✅Consol_df_NA_Cases saved at {na_output_path}')

✅Consol_df_NA_Cases saved at D:\Bhuwan Data\Bhuwan\Desktop\Adhoc data of BPnA team\Sonali\Grouping\Consol_df_NA_Cases.csv


In [13]:
# Rename column 'Already Grouped?' to 'Remark'
consol_df_NA.rename(columns={'Already Grouped?': 'Remark'}, inplace=True)

# Fill 'Remark' column with 'Receipt Tagging'
consol_df_NA['Remark'] = 'Receipt Tagging'

# Display the updated dataframe
print(consol_df_NA.head())

# Save the updated dataframe
output_path = r"D:\Bhuwan Data\Bhuwan\Desktop\Adhoc data of BPnA team\Sonali\Grouping\Receipt tagging extract.csv"
consol_df_NA.to_csv(output_path, index=False)

print(f'✅Done! File saved at {output_path}')

       Approved Performa           Remark
405303           8817820  Receipt Tagging
405343           8695698  Receipt Tagging
422796           8829963  Receipt Tagging
434600           8843269  Receipt Tagging
451279           8853654  Receipt Tagging
✅Done! File saved at D:\Bhuwan Data\Bhuwan\Desktop\Adhoc data of BPnA team\Sonali\Grouping\Receipt tagging extract.csv


In [15]:
import pandas as pd

# Define the file path
file_path = r"D:\Bhuwan Data\Bhuwan\Desktop\Adhoc data of BPnA team\Sonali\Grouping\2. TaggingData_04.02.25 04.08 PM Jan.xlsb"

# Get all sheet names
if file_path.endswith('.xlsb'):
    sheet_names = pd.ExcelFile(file_path, engine='pyxlsb').sheet_names
else:
    sheet_names = pd.ExcelFile(file_path, engine='openpyxl').sheet_names

# Find the sheet containing "TaggingData"
tagging_sheet = next((sheet for sheet in sheet_names if "TaggingData" in sheet), None)

# Ensure the sheet exists
if tagging_sheet:
    print(f"✅ Found sheet: {tagging_sheet}")

    # Read the identified sheet
    if file_path.endswith('.xlsb'):
        df_tagging = pd.read_excel(file_path, sheet_name=tagging_sheet, engine='pyxlsb')
    else:
        df_tagging = pd.read_excel(file_path, sheet_name=tagging_sheet, engine='openpyxl')

    # Ensure required columns exist
    if 'Receipt Type' in df_tagging.columns and 'Tagging Type' in df_tagging.columns:
        
        # Initialize 'Remarks' column
        df_tagging['Remarks'] = 'Other tagging'  # Default value

        # Condition 1: If 'Receipt Type' is in (6, 25, 27, 28) → Update to 'CN'
        df_tagging.loc[df_tagging['Receipt Type'].isin([6, 25, 27, 28]), 'Remarks'] = 'CN'

        # Condition 2: If 'Tagging Type' is 'Adjustment Note' → Update to 'ADJN'
        df_tagging.loc[df_tagging['Tagging Type'] == 'Adjustment Note', 'Remarks'] = 'ADJN'

        # Display updated dataframe
        print(df_tagging.head())

        # Save the updated file
        output_path = r"D:\Bhuwan Data\Bhuwan\Desktop\Adhoc data of BPnA team\Sonali\Grouping\TaggingData_Updated.csv"
        df_tagging.to_csv(output_path, index=False)

        print(f"✅ Tagging data updated successfully and saved at {output_path}")

    else:
        print("❌ 'Receipt Type' or 'Tagging Type' column not found in the dataset!")

else:
    print("❌ No sheet containing 'TaggingData' found in the file!")

✅ Found sheet: TaggingData_04.02.25 04.08 PM
   Document No. Tagging Type  Approved Performa  Receipt Type        Remarks
0       3493657      Receipt            2413186           3.0  Other tagging
1       3522759      Receipt            2413186           6.0             CN
2       3489899      Receipt            2413186           5.0  Other tagging
3       3266970      Receipt            2415514           2.0  Other tagging
4       6024590      Receipt            4605836           3.0  Other tagging
✅ Tagging data updated successfully and saved at D:\Bhuwan Data\Bhuwan\Desktop\Adhoc data of BPnA team\Sonali\Grouping\TaggingData_Updated.csv


In [17]:
import pandas as pd

# Convert "Approved Performa" to numeric
df_tagging["Approved Performa"] = pd.to_numeric(df_tagging["Approved Performa"], errors="coerce")

# Create the pivot table
pivot_df_tagging = df_tagging.pivot_table(
    index="Approved Performa",
    columns="Remarks",
    values="Document No.",  # Changed to Document No. to avoid issues
    aggfunc="count",
    fill_value=0,
)

    # Ensure all expected columns exist
expected_columns = ["ADJN", "CN", "Other tagging"]  # Only required columns
for col in expected_columns:
    if col not in pivot_df_tagging:
        pivot_df_tagging[col] = 0  # Add missing columns

# Add 'Grand Total' column (row-wise sum)
pivot_df_tagging["Grand Total"] = pivot_df_tagging[expected_columns].sum(axis=1)

# Rename columns for clarity
pivot_df_tagging.columns.name = "Count of Remarks"

# Ensure the index is properly reset to avoid extra index column
pivot_df_tagging.reset_index(drop=False, inplace=True)

# Save the final pivot table to an Excel file
pivot_df_tagging.to_excel(r"D:\Bhuwan Data\Bhuwan\Desktop\Adhoc data of BPnA team\Sonali\Grouping\pivot_df_tagging.xlsx", index=False)

print("✅pivot_df_tagging downloaded")

✅pivot_df_tagging downloaded


In [19]:
import pandas as pd

# Convert "Approved Performa" to numeric
df_tagging["Approved Performa"] = pd.to_numeric(df_tagging["Approved Performa"], errors="coerce")

# Create the pivot table
pivot_df_tagging = df_tagging.pivot_table(
    index="Approved Performa",
    columns="Remarks",
    values="Document No.",  # Changed to Document No. to avoid issues
    aggfunc="count",
    fill_value=0,
)

# Ensure all expected columns exist
expected_columns = ["ADJN", "CN", "Other tagging"]  # Only required columns
for col in expected_columns:
    if col not in pivot_df_tagging:
        pivot_df_tagging[col] = 0  # Add missing columns

# Add 'Grand Total' column (row-wise sum)
pivot_df_tagging["Grand Total"] = pivot_df_tagging[expected_columns].sum(axis=1)

# Corrected 'Relv/Not Relv' column condition
pivot_df_tagging["Relv/Not Relv"] = pivot_df_tagging.apply(
    lambda row: "R" if (row["ADJN"] != 0 or row["CN"] != 0) and row["Other tagging"] == 0 else "NR", axis=1
)

# Rename columns for clarity
pivot_df_tagging.columns.name = "Count of Remarks"

# Reset index if needed
pivot_df_tagging.reset_index(inplace=True)

# Save the final pivot table to an Excel file
pivot_df_tagging.to_excel(r"D:\Bhuwan Data\Bhuwan\Desktop\Adhoc data of BPnA team\Sonali\Grouping\pivot_df_tagging_relv_nonrelv.xlsx", index=False)

print("✅pivot_df_tagging downloaded")

✅pivot_df_tagging downloaded


In [21]:
# Add 'Source for R/NR' column with the defined conditions
def get_source_for_rnr(row):
    if row["Relv/Not Relv"] == "NR":
        return "Other Tagging"
    elif row["CN"] != 0 and row["ADJN"] == 0:
        return "Only CDN"
    elif row["ADJN"] != 0 and row["CN"] == 0:
        return "Only ADJN"
    elif row["CN"] != 0 and row["ADJN"] != 0 and row["Other tagging"] == 0:
        return "1. Both CN/ADJN, No Other tagging"
    return ""

pivot_df_tagging["Source for R/NR"] = pivot_df_tagging.apply(get_source_for_rnr, axis=1)

# Rename columns for clarity
pivot_df_tagging.columns.name = "Count of Remarks"

# Reset index if needed
pivot_df_tagging.reset_index(inplace=True)

# Save the final pivot table to an Excel file
pivot_df_tagging.to_excel(r"D:\Bhuwan Data\Bhuwan\Desktop\Adhoc data of BPnA team\Sonali\Grouping\pivot_df_tagging_source_added.xlsx", index=False)

print("✅pivot_df_tagging downloaded")

✅pivot_df_tagging downloaded


In [23]:
import pandas as pd

# Define the file path
file_path = r"D:\Bhuwan Data\Bhuwan\Desktop\Adhoc data of BPnA team\Sonali\Grouping\Inbound Work Order Receipts INDIAMART_ANUJ 2025-02-04T11_10_39.xlsx"

# Define the sheet name
sheet_name = "Inbound Work Order Receipts"

# Load the Excel file (supports both .xlsx and .xlsb)
try:
    if file_path.endswith(".xlsb"):
        # Use pyxlsb for .xlsb files
        import pyxlsb
        df_cn_amt = pd.read_excel(file_path, sheet_name=sheet_name, engine="pyxlsb")
    else:
        # Standard pandas read for .xlsx
        df_cn_amt = pd.read_excel(file_path, sheet_name=sheet_name)

    # Ensure required columns exist
    required_columns = ["Approved Performa No", "Receipt Amount Used"]
    missing_columns = [col for col in required_columns if col not in df_cn_amt.columns]

    if missing_columns:
        raise ValueError(f"Missing columns in the file: {missing_columns}")

    # Select only the required columns
    df_cn_amt = df_cn_amt[required_columns]

    # Group by 'Approved Performa No' and sum 'Receipt Amount Used'
    CN_Amt_df = df_cn_amt.groupby("Approved Performa No", as_index=False).agg({"Receipt Amount Used": "sum"})

    print("✅ CN_Amt_df created successfully!")
    #Save CN_AMT_Df
    CN_Amt_df.to_csv(r"D:\Bhuwan Data\Bhuwan\Desktop\Adhoc data of BPnA team\Sonali\Grouping\CN_Amt_df.csv", index=False)
    print('CN_Amt_df Saved')

except Exception as e:
    print(f"❌ Error loading file: {e}")


✅ CN_Amt_df created successfully!
CN_Amt_df Saved


In [25]:
# Filter pivot_df_tagging where 'Relv/Not Relv' == 'R' and 'CN' != 0
filtered_df = pivot_df_tagging[(pivot_df_tagging["Relv/Not Relv"] == "R") & (pivot_df_tagging["CN"] != 0)]

# Perform lookup using 'Approved Performa' as the key
pivot_df_tagging["Receipt Amount Used"] = pivot_df_tagging["Approved Performa"].map(
    CN_Amt_df.set_index("Approved Performa No")["Receipt Amount Used"]
)

# Keep 'Receipt Amount Used' only for filtered cases, set others to NaN
pivot_df_tagging.loc[~pivot_df_tagging.index.isin(filtered_df.index), "Receipt Amount Used"] = None

print("✅ Lookup for 'Receipt Amount Used' completed successfully!")

#Save pivot_df_tagging

pivot_df_tagging.to_csv(r'D:\Bhuwan Data\Bhuwan\Desktop\Adhoc data of BPnA team\Sonali\Grouping\CN_with_R.csv', index=False)
print('CN_with_R Saved')

✅ Lookup for 'Receipt Amount Used' completed successfully!
CN_with_R Saved


In [27]:
# Rename 'Receipt Amount Used' to 'CN Amt For R Cases'
pivot_df_tagging.rename(columns={"Receipt Amount Used": "CN Amt For R Cases"}, inplace=True)

# Calculate 'CN Tagged Amt Ex GST' as 'CN Amt For R Cases' / 1.18
pivot_df_tagging["CN Tagged Amt Ex GST"] = pivot_df_tagging["CN Amt For R Cases"] / 1.18

print("✅ Columns renamed and 'CN Tagged Amt Ex GST' calculated successfully!")

#Save file
pivot_df_tagging.to_csv(r'D:\Bhuwan Data\Bhuwan\Desktop\Adhoc data of BPnA team\Sonali\Grouping\CN Tagged Amt Ex GST.csv', index=False)
print('✅ CN Tagged Amt Ex GST Saved')

✅ Columns renamed and 'CN Tagged Amt Ex GST' calculated successfully!
✅ CN Tagged Amt Ex GST Saved


In [29]:
import pandas as pd

# Define the file path
file_path = r"D:\Bhuwan Data\Bhuwan\Desktop\Adhoc data of BPnA team\Sonali\Grouping\Inbnd Adjustment Note Appl. INDIAMART_ANUJ 2025-02-04T10_42_43.xlsx"

# Define the sheet name
sheet_name = "Inbnd Adjustment Note Appl."

# Load the Excel file (supports both .xlsx and .xlsb)
try:
    if file_path.endswith(".xlsb"):
        # Use pyxlsb for .xlsb files
        import pyxlsb
        ADJN_Amt_df = pd.read_excel(file_path, sheet_name=sheet_name, engine="pyxlsb")
    else:
        # Standard pandas read for .xlsx
        ADJN_Amt_df = pd.read_excel(file_path, sheet_name=sheet_name)

    # Ensure required columns exist
    required_columns = ["Approved Performa", "Base Amount"]
    missing_columns = [col for col in required_columns if col not in ADJN_Amt_df.columns]

    if missing_columns:
        raise ValueError(f"❌ Missing columns in the file: {missing_columns}")

    # Select only the required columns
    ADJN_Amt_df = ADJN_Amt_df[required_columns]

    # Group by 'Approved Performa' and sum 'Base Amount'
    ADJN_Amt_df = ADJN_Amt_df.groupby("Approved Performa", as_index=False).agg({"Base Amount": "sum"})

    print("✅ ADJN_Amt_df created successfully!")

    # Save ADJN_Amt_df
    ADJN_Amt_df.to_csv(r"D:\Bhuwan Data\Bhuwan\Desktop\Adhoc data of BPnA team\Sonali\Grouping\ADJN_Amt_df.csv", index=False)
    print("✅ ADJN_Amt_df Saved!")

except Exception as e:
    print(f"❌ Error loading file: {e}")

# ------------------------------------
# 🔹 Lookup 'Base Amount' for 'ADJN' cases
# ------------------------------------

# Filter pivot_df_tagging where 'Source for R/NR' is in the given list
filtered_df = pivot_df_tagging[pivot_df_tagging["Source for R/NR"].isin(["Only ADJN", "1. Both CN/ADJN, No Other tagging"])]

# Perform lookup using 'Approved Performa' as the key
pivot_df_tagging["Base Amount"] = pivot_df_tagging["Approved Performa"].map(
    ADJN_Amt_df.set_index("Approved Performa")["Base Amount"]
)
# Set 'Base Amount' to NaN for non-matching cases
pivot_df_tagging.loc[~pivot_df_tagging.index.isin(filtered_df.index), "Base Amount"] = None

print("✅ Lookup for 'Base Amount' completed successfully!")

# Rename 'Base Amount' to 'ADJN Appl Amt'
pivot_df_tagging.rename(columns={"Base Amount": "ADJN Appl Amt"}, inplace=True)

print("✅ Columns renamed and 'Total Amount' calculated successfully!")

# Add 'Total Amount' column (sum of 'CN Tagged Amt Ex GST' + 'ADJN Appl Amt' and fill 0 where value is null)
pivot_df_tagging["Total Amount"] = pivot_df_tagging["CN Tagged Amt Ex GST"].fillna(0) + pivot_df_tagging["ADJN Appl Amt"].fillna(0)

# Save the updated pivot_df_tagging
pivot_df_tagging.to_csv(r"D:\Bhuwan Data\Bhuwan\Desktop\Adhoc data of BPnA team\Sonali\Grouping\ADJN_Only_Cases_Amt_df.csv", index=False)
print("✅ ADJN_Only_Cases_Amt_df Saved!")

✅ ADJN_Amt_df created successfully!
✅ ADJN_Amt_df Saved!
✅ Lookup for 'Base Amount' completed successfully!
✅ Columns renamed and 'Total Amount' calculated successfully!
✅ ADJN_Only_Cases_Amt_df Saved!


In [31]:
print("✅Automation Done till here😄😄🎉")

✅Automation Done till here😄😄🎉
