In [2]:
import pandas as pd
import os

from copy import copy
from openpyxl import load_workbook
from openpyxl.styles import Border,Side,Alignment,Font,PatternFill
from openpyxl.utils import get_column_letter

In [3]:
month,year = 2,2024

prof_tax = 150

wages = pd.DataFrame({"CATEGORY":['SKILLED',"SEMI-SKILLED","UNSKILLED"],"Wage per day":[709,589,504]})

vendor_name = "M/S SPRYSOFT TECHNOLOGIES PRIVATE LIMITED, HYDERABAD."

month_st_dt = pd.Period(f"{year}-{month}").start_time.date().strftime("%d-%m-%Y")
month_end_dt = pd.Period(f"{year}-{month}").end_time.date().strftime("%d-%m-%Y")

month_year = pd.Period(f"{year}-{month}").start_time.date().strftime("%b-%Y")
month_line = f"Due Wage Month:{month_year} from {month_st_dt} to {month_end_dt}"

vendor_name_line = f"Name of Contractor : {vendor_name}"

attendance_path = os.path.join("attendance",f"attendance_format_{month}_{year}.xlsx")

template_path  = "attendance.xlsx"

In [4]:
f"Name of Contractor : {vendor_name}"

'Name of Contractor : M/S SPRYSOFT TECHNOLOGIES PRIVATE LIMITED, HYDERABAD.'

In [5]:
def group_processing(group,category):
    group.drop(["SL NO"],axis=1,inplace=True)
    group["gross_wage"] = group["TOTAL PAY DAYS"] * group["Wage per day"] 
    group["nh_wage"] = group["NH DAY"] * group["Wage per day"] 
    group["gross_wage_pf"] = group["gross_wage"].apply(lambda x:15000 if x > 15000 else x)
    group["emp_epf"] = (group["gross_wage_pf"] * 0.12).round(2)
    group["emp_esi"] = (group["gross_wage"] * 0.0075).round(2)
    group["prof_tax"] =  group["gross_wage_pf"].apply(lambda x: prof_tax if x == 15000 else 0)
    group["emp_epf_esi_total"] = group["emp_epf"] + group["emp_esi"]
    group["empl_epf"] = (group["gross_wage_pf"] * 0.125).round(2)
    group["empl_edli"] = (group["gross_wage_pf"] * 0.005).round(2)
    group["empl_epf_edli_total"] = group["empl_epf"] + group["empl_edli"]
    group["empl_esi"] = (group["gross_wage"] * 0.0325).round(2)
    group["empl_epf_edli_esi_total"] = group["empl_epf"] + group["empl_edli"] + group["empl_esi"]
    group["net_pay"] = (group["gross_wage"] + group["nh_wage"]) - group["emp_epf_esi_total"] - group["prof_tax"]

    last_row = group.sum(axis=0).values
    last_row[:2] = ["",f"{category} TOTAL"]
    group.loc["total"] = last_row
    return group


In [8]:
def pf_esi_preprocessing(attendance_path,wages:pd.DataFrame):
    usecols=["SL NO","CATEGORY OF SKILLNESS","NAME OF CONTRACT PERSONNEL","TOTAL PAY DAYS","NH DAY"]
    data = pd.read_excel(attendance_path,header=7,usecols=usecols,skipfooter=7)
    
    merged_data = data.merge(wages,how="left",left_on="CATEGORY OF SKILLNESS",right_on="CATEGORY").drop(["CATEGORY"],axis=1)
    groups = merged_data.groupby("CATEGORY OF SKILLNESS")
    
    processed_df = pd.DataFrame()
    
    bill_pay_days ={}

    for category in merged_data["CATEGORY OF SKILLNESS"]:

        group = groups.get_group(category)
        processed_group = group_processing(group,category)
        bill_pay_days[category] = processed_group.loc["total","TOTAL PAY DAYS"]
        processed_df = pd.concat([processed_df,processed_group],axis=0)


    final_row = processed_df.loc["total"].sum(axis=0).values
    final_row[:2] = ["","TOTAL"]
    processed_df.loc["final_total"] = final_row
    processed_df.loc[["final_total","total"],"Wage per day"] = ""

    processed_df["SL NO"] = [int(val)+1 if val not in ["total","final_total"] else "" for val in processed_df.index]
    processed_df = pd.concat([processed_df.iloc[:,-1:],processed_df.iloc[:,:-1]],axis=1)

    empl_epf,empl_edli,esi_total = processed_df.loc["final_total",["empl_epf","empl_edli","empl_esi"]].values
    bill_pay_days["EMPL_PF"] = empl_epf
    bill_pay_days["EMPL_EDLI"] = empl_edli
    bill_pay_days["ESI"] = esi_total

    processed_df.reset_index(drop=True,inplace=True)
    processed_df.drop(["CATEGORY OF SKILLNESS"],axis=1,inplace=True)

    rows_yellow_fill = processed_df[processed_df["NAME OF CONTRACT PERSONNEL"].str.contains("TOTAL")].index.values
    
    return processed_df,bill_pay_days,rows_yellow_fill

In [9]:
processed_df,bill_pay_days,rows_yellow_fill = pf_esi_preprocessing(attendance_path,wages)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  group.drop(["SL NO"],axis=1,inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  group["gross_wage"] = group["TOTAL PAY DAYS"] * group["Wage per day"]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  group["nh_wage"] = group["NH DAY"] * group["Wage per day"]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_index

In [12]:
processed_df

Unnamed: 0,SL NO,NAME OF CONTRACT PERSONNEL,TOTAL PAY DAYS,NH DAY,Wage per day,gross_wage,nh_wage,gross_wage_pf,emp_epf,emp_esi,prof_tax,emp_epf_esi_total,empl_epf,empl_edli,empl_epf_edli_total,empl_esi,empl_epf_edli_esi_total,net_pay
0,1.0,NITYA SUNDAR MUDULI,26.0,0,709.0,18434.0,0,15000.0,1800.0,138.26,150,1938.26,1875.0,75.0,1950.0,599.1,2549.1,16345.74
1,,SKILLED TOTAL,26.0,0,,18434.0,0,15000.0,1800.0,138.26,150,1938.26,1875.0,75.0,1950.0,599.1,2549.1,16345.74
2,2.0,JAGANNATH SAHU,25.5,0,589.0,15019.5,0,15000.0,1800.0,112.65,150,1912.65,1875.0,75.0,1950.0,488.13,2438.13,12956.85
3,,SEMI-SKILLED TOTAL,25.5,0,,15019.5,0,15000.0,1800.0,112.65,150,1912.65,1875.0,75.0,1950.0,488.13,2438.13,12956.85
4,3.0,DANA MAJHI,24.5,0,504.0,12348.0,0,12348.0,1481.76,92.61,0,1574.37,1543.5,61.74,1605.24,401.31,2006.55,10773.63
5,,UNSKILLED TOTAL,24.5,0,,12348.0,0,12348.0,1481.76,92.61,0,1574.37,1543.5,61.74,1605.24,401.31,2006.55,10773.63
6,,TOTAL,76.0,0,,45801.5,0,42348.0,5081.76,343.52,300,5425.28,5293.5,211.74,5505.24,1488.54,6993.78,40076.22


In [11]:
bill_pay_days

{'SKILLED': 26.0,
 'SEMI-SKILLED': 25.5,
 'UNSKILLED': 24.5,
 'EMPL_PF': 5293.5,
 'EMPL_EDLI': 211.74,
 'ESI': 1488.54}

In [255]:
def generate_pf_esi_sheet(template_path):
    '''
    This function reads attendance file and return styled excel sheet with values.
    '''
    workbook = load_workbook(template_path)
    max_name_length = max([len(name) for name in processed_df["NAME OF CONTRACT PERSONNEL"].values])

    sheet = workbook['PF_ESI']  # Update with your sheet's name
    sheet.column_dimensions["B"].width = (max_name_length+2)*1.2

    border_style = Border(left=Side(border_style="thin",color="FF000000"),
                              right=Side(border_style="thin",color="FF000000"),
                              top=Side(border_style="thin",color="FF000000"),
                              bottom=Side(border_style="thin",color="FF000000"))

    alignment_style = Alignment(horizontal='center',vertical="center")
    alignment_style_left = Alignment(horizontal='left',vertical="center")
    bold_font = Font(bold=True)
    yellow_fill = PatternFill(start_color="FFFF00",end_color="FFFF00",fill_type="solid")
    green_fill = PatternFill(start_color="00FF00",end_color="00FF00",fill_type="solid")

    cell = sheet.cell(row=2,column=1)
    cell.value = month_line

    cell = sheet.cell(row=3,column=1)
    cell.value = vendor_name_line

    start_row = 6
    start_column = 1  # Assuming you start from column A

    sheet.delete_rows(start_row,sheet.max_row)

    for index, row in processed_df.iterrows():

        for col_num, value in enumerate(row, start=start_column):   

            cell = sheet.cell(row=start_row + index, column=col_num)
            cell.value = value
            cell.font = bold_font

            if col_num != 1:
                cell.number_format = "0.00"

            if start_row + index - 6  in rows_yellow_fill:
                cell.fill = yellow_fill

            if col_num == 2:
                cell.alignment = alignment_style_left
            else:
                cell.alignment = alignment_style

    last_row = sheet[sheet.max_row]
    for cell in last_row:
        cell.fill = green_fill

    for row in sheet.iter_rows(min_row=start_row,min_col=1,max_row=5+len(processed_df),max_col=18):
            for cell in row:
                cell.border = border_style

    return sheet



In [256]:
def create_pf_esi_sheet(attendance_path,sheet):
    attendance_workbook = load_workbook(attendance_path)

    try:
        if attendance_workbook["PF_ESI"]:
            attendance_workbook.remove(attendance_workbook["PF_ESI"])
            target_sheet = attendance_workbook.create_sheet(title = "PF_ESI")
    except:
        target_sheet = attendance_workbook.create_sheet(title = "PF_ESI")

    for row in sheet.iter_rows(values_only=True):
        target_sheet.append(row)

    for row in range(1,sheet.max_row+1):
        for col in range(1,sheet.max_column+1):
            source_cell = sheet.cell(row=row,column=col)
            target_cell = target_sheet.cell(row=row,column=col)
            target_cell.number_format = source_cell.number_format
            target_cell.font = copy(source_cell.font)
            target_cell.alignment = copy(source_cell.alignment)
            target_cell.border = copy(source_cell.border)
            target_cell.fill = copy(source_cell.fill)

    for col_idx,column in enumerate(sheet.columns,start=1):
        target_sheet.column_dimensions[get_column_letter(col_idx)].width = sheet.column_dimensions[get_column_letter(col_idx)].width

    for merged_cell_range in sheet.merged_cells.ranges:
        target_sheet.merge_cells(merged_cell_range.coord)
        for merged_cell in merged_cell_range.cells:
            target_cell = target_sheet.cell(row=merged_cell[0],column=merge_cell[1])
            if merged_cell[0]  in [2,3]:
                target_cell.alignment = Alignment(horizontal="left",vertical="center")
            elif merged_cell[0]  in [4,5]:
                target_cell.alignment = Alignment(horizontal="center",vertical="center",wrap_text=True)               
            else:
                target_cell.alignment = Alignment(horizontal="center",vertical="center")

    attendance_workbook.save(attendance_path)
    return attendance_path

In [257]:
try:
    processed_df,bill_pay_days,rows_yellow_fill = pf_esi_preprocessing(attendance_path,wages)
    sheet = generate_pf_esi_sheet(template_path)
    target_path = create_pf_esi_sheet(attendance_path,sheet)
except Exception as e:
    print(e)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  group.drop(["SL NO"],axis=1,inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  group["gross_wage"] = group["TOTAL PAY DAYS"] * group["Wage per day"]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  group["nh_wage"] = group["NH DAY"] * group["Wage per day"]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_index

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  group["empl_epf_edli_esi_total"] = group["empl_epf"] + group["empl_edli"] + group["empl_esi"]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  group["net_pay"] = (group["gross_wage"] + group["nh_wage"]) - group["emp_epf_esi_total"] - group["prof_tax"]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  group.loc["total"] = last_row
A value is trying to be set o

In [243]:
processed_df

Unnamed: 0,SL NO,NAME OF CONTRACT PERSONNEL,TOTAL PAY DAYS,NH DAY,Wage per day,gross_wage,nh_wage,gross_wage_pf,emp_epf,emp_esi,prof_tax,emp_epf_esi_total,empl_epf,empl_edli,empl_epf_edli_total,empl_esi,empl_epf_edli_esi_total,net_pay
0,1.0,NITYA SUNDAR MUDULI,25.0,0,709.0,17725.0,0,15000.0,1800.0,132.94,150,1932.94,1875.0,75.0,1950.0,576.06,2526.06,15642.06
1,,SKILLED TOTAL,25.0,0,,17725.0,0,15000.0,1800.0,132.94,150,1932.94,1875.0,75.0,1950.0,576.06,2526.06,15642.06
2,2.0,JAGANNATH SAHU,25.0,0,589.0,14725.0,0,14725.0,1767.0,110.44,0,1877.44,1840.62,73.62,1914.24,478.56,2392.8,12847.56
3,,SEMI-SKILLED TOTAL,25.0,0,,14725.0,0,14725.0,1767.0,110.44,0,1877.44,1840.62,73.62,1914.24,478.56,2392.8,12847.56
4,3.0,DANA MAJHI,25.5,0,504.0,12852.0,0,12852.0,1542.24,96.39,0,1638.63,1606.5,64.26,1670.76,417.69,2088.45,11213.37
5,,UNSKILLED TOTAL,25.5,0,,12852.0,0,12852.0,1542.24,96.39,0,1638.63,1606.5,64.26,1670.76,417.69,2088.45,11213.37
6,,TOTAL,75.5,0,,45302.0,0,42577.0,5109.24,339.77,150,5449.01,5322.12,212.88,5535.0,1472.31,7007.31,39702.99


In [7]:
usecols=["SL NO","CATEGORY OF SKILLNESS","NAME OF CONTRACT PERSONNEL","TOTAL PAY DAYS","NH DAY"]
data = pd.read_excel(attendance_path,header=7,usecols=usecols,skipfooter=7)


wages = pd.DataFrame({"CATEGORY":['SKILLED',"SEMI-SKILLED","UNSKILLED"],"Wage per day":[709,589,504]})

merged_data = data.merge(wages,how="left",left_on="CATEGORY OF SKILLNESS",right_on="CATEGORY").drop(["CATEGORY"],axis=1)
groups = merged_data.groupby("CATEGORY OF SKILLNESS")

processed_df = pd.DataFrame()
bill_pay_days ={}

for category in merged_data["CATEGORY OF SKILLNESS"]:
    
    group = groups.get_group(category)
    processed_group = group_processing(group,category)
    bill_pay_days[category] = processed_group.loc["total","TOTAL PAY DAYS"]
    processed_df = pd.concat([processed_df,processed_group],axis=0)
    
    
final_row = processed_df.loc["total"].sum(axis=0).values
final_row[:2] = ["","TOTAL"]
processed_df.loc["final_total"] = final_row
processed_df.loc[["final_total","total"],"Wage per day"] = ""

processed_df["SL NO"] = [int(val)+1 if val not in ["total","final_total"] else "" for val in processed_df.index]
processed_df = pd.concat([processed_df.iloc[:,-1:],processed_df.iloc[:,:-1]],axis=1)

pf_edli_total,esi_total = processed_df.loc["final_total",["empl_epf_edli_total","empl_esi"]].values
bill_pay_days["EMPL_PF_EDLI_TOTAL"] = pf_edli_total
bill_pay_days["ESI"] = esi_total

processed_df.reset_index(drop=True,inplace=True)
processed_df.drop(["CATEGORY OF SKILLNESS"],axis=1,inplace=True)

rows_yellow_fill = processed_df[processed_df["NAME OF CONTRACT PERSONNEL"].str.contains("TOTAL")].index.values