In [1]:
# Remove Images

from openpyxl import load_workbook

# Load the workbook and select the worksheet
workbook_path = 'relatorio.xlsx'
wb = load_workbook(workbook_path)

# Iterate over all the sheets in the workbook
for sheet in wb.sheetnames:
    ws = wb[sheet]
    
    # Access the images property and clear the images list
    ws._images = []

# Save the workbook
wb.save(workbook_path)

print("Images removed successfully.")


Images removed successfully.


In [2]:
# Unmerge cells

from openpyxl import load_workbook

# Load the workbook
workbook_path = 'relatorio.xlsx'
wb = load_workbook(workbook_path)

# Iterate over all the sheets in the workbook
for sheet in wb.sheetnames:
    ws = wb[sheet]
    
    # Collect a list of merged cells
    merged_cells_list = []
    for merged_cell in ws.merged_cells.ranges:
        merged_cells_list.append(merged_cell)
    
    # Unmerge all collected cells
    for merged_cell in merged_cells_list:
        ws.unmerge_cells(str(merged_cell))

# Save the workbook
wb.save(workbook_path)

print("All cells unmerged successfully.")


All cells unmerged successfully.


In [3]:
# remove first 4 rows

from openpyxl import load_workbook

# Load the workbook and select the worksheet
workbook_path = 'relatorio.xlsx'
wb = load_workbook(workbook_path)

# Iterate over all the sheets in the workbook
for sheet in wb.sheetnames:
    ws = wb[sheet]
    
    # Delete the first 4 rows
    ws.delete_rows(1, 4)

# Save the workbook
wb.save(workbook_path)

print("First 4 rows removed successfully.")


First 4 rows removed successfully.


In [4]:
# Remove last 14 rows

from openpyxl import load_workbook

# Load the workbook
workbook_path = 'relatorio.xlsx'
wb = load_workbook(workbook_path)

# Iterate over all the sheets in the workbook
for sheet in wb.sheetnames:
    ws = wb[sheet]
    
    # Determine the last row with data
    last_row = ws.max_row
    
    # Delete the last 14 rows
    if last_row >= 14:
        ws.delete_rows(last_row - 13, 14)  # Delete 14 rows starting from (last_row - 13)

# Save the workbook
wb.save(workbook_path)

print("Last 14 rows removed successfully.")


Last 14 rows removed successfully.


In [5]:
# ADD REMAINING DATA TO THE SPREADSHEET

import pandas as pd

# Read the input Excel file
df = pd.read_excel('relatorio.xlsx')

# Add additional columns
df['FORNECEDOR'] = '1148218'
df['AUTORIZADOR'] = '138208272'
df['NF'] = '2977065'
df['DATANF'] = '16062025'
df['ANOEMPENHO'] = '2025'
df['EMPENHO'] = '3964'

# Save the modified DataFrame back to the Excel file
df.to_excel('relatorio.xlsx', index=False)

print("Columns added successfully.")

Columns added successfully.


In [6]:
# ADD NCOMB

import pandas as pd

# Define a mapping for NCOMB values
ncomb_mapping = {
    'ÁLCOOL': '01',
    'GASOLINA': '02',
    'DIESEL': '03',
    'DIESEL S10': '09'
}

# Read the input Excel file
df = pd.read_excel('relatorio.xlsx')

# Map NCOMB values
df['NCOMB'] = df['Combustível/Serviço'].map(ncomb_mapping)

# Save the modified DataFrame back to the Excel file
df.to_excel('relatorio.xlsx', index=False)

print("NCOMB added successfully.")

NCOMB added successfully.


In [7]:
# Remove hyphens from 'Placa'

import pandas as pd

# Remove hyphens from 'Placa'
df['Placa'] = df['Placa'].str.replace('-', '')

# Save the modified DataFrame back to the Excel file
df.to_excel('relatorio.xlsx', index=False)

print("Hyphens removed successfully.")

Hyphens removed successfully.


In [8]:
# Unique 'Placa' values 

import pandas as pd

# Step 2: Load the Excel file into a DataFrame
file_path = 'relatorio.xlsx'  # Replace with your actual file path
df = pd.read_excel(file_path)

# Step 3: Extract the unique values from the 'Placa' column
unique_values = df['Placa'].unique()

# Print the unique values
print(unique_values)

# Print the number of unique 'Placa' values
print(f"Number of unique 'Placa' values: {len(unique_values)}")

['TEI0F02' 'SHW2I82' 'SIA2G88' 'SIQ6I50' 'SYE7G76' 'QPH0485' 'PVS5554'
 'SFZ0E92' 'PUV3317' 'SIA5B96' 'QOJ8380' 'QNB5054' 'SHY5E18' 'NLE2499'
 'QXB4065' 'QPD3538' 'SIU7C01' 'RUS1F01' 'SIW1F21' 'QMT2926' 'SGA7E56'
 'QNO0603' 'SHD2G36' 'SJC3A40' 'SHY6F33' 'HMN8352' 'SYE7G59' 'SHY6F25'
 'SHZ1J09' 'SYE7G86' 'RVK4B67' 'RFV5F43' 'QOQ0550' 'OQQ2855' 'HMN1022'
 'RFV5G91' 'HMN8135' 'PVS5558' 'SHY6F35' 'SHY5E17' 'PXV3610' 'PVI4672'
 'SIQ6I43' 'QXM5038' 'QUJ2444' 'JKH8653' 'OPY0609' 'QMT2912' 'SIW1F13'
 'SHY6F24' 'QMT2914' 'GOW6069' 'TDI8F78' 'QMT2925']
Number of unique 'Placa' values: 54


In [9]:
# Remove rows containing COR0D00 FUM0001 SHZ1J09 SJC3A40

import pandas as pd

# Read the input Excel file
df = pd.read_excel('relatorio.xlsx')

# Function to remove rows and count how many were removed
def remove_and_count(df, pattern):
    initial_count = len(df)
    df = df[~df.apply(lambda row: row.astype(str).str.contains(pattern).any(), axis=1)]
    removed_count = initial_count - len(df)
    print(f'{pattern} REMOVED: {removed_count}')
    return df

# Remove rows containing specific patterns and print the count of each
df = remove_and_count(df, 'COR0D00')
df = remove_and_count(df, 'COR0000')
df = remove_and_count(df, 'FUM0001')
df = remove_and_count(df, 'FUM0002')
# df = remove_and_count(df, 'SHZ1J09')  # Uncomment if needed
df = remove_and_count(df, 'SJC3A40')
df = remove_and_count(df, 'HOS0001')
df = remove_and_count(df, 'HOS0002')

# Save the modified DataFrame back to the Excel file
df.to_excel('relatorio.xlsx', index=False)

COR0D00 REMOVED: 0
COR0000 REMOVED: 0
FUM0001 REMOVED: 0
FUM0002 REMOVED: 0
SJC3A40 REMOVED: 12
HOS0001 REMOVED: 0
HOS0002 REMOVED: 0


In [10]:
# Replace . with , in 'Qtde (L)' and 'Preco Unitário'

import pandas as pd

# Read the input Excel file
df = pd.read_excel('relatorio.xlsx')
df['Qtde (L)'] = df['Qtde (L)'].astype(str).str.replace('.', ',')
df['Preco Unitário'] = df['Preco Unitário'].astype(str).str.replace('.', ',')

# Save the modified DataFrame back to the Excel file
df.to_excel('relatorio.xlsx', index=False)

In [15]:
# ADD INDEX TO THE SPREADSHEET

import pandas as pd
df = pd.read_excel('relatorio.xlsx')

# Remove existing 'Index' column if it exists
if 'Index' in df.columns:
    df = df.drop('Index', axis=1)

# Add a new column 'Index' as the first column
df.insert(0, 'Index', range(3152, len(df) + 3152))

# Save the modified DataFrame back to the Excel file
df.to_excel('relatorio.xlsx', index=False)

In [13]:
# Split 'Data/Hora' into 'Data' and 'Hora'

import pandas as pd

# Read the input Excel file
df = pd.read_excel('relatorio.xlsx')
df[['Data', 'Hora']] = df['Data/Hora'].str.split(' ', expand=True)

# Save the modified DataFrame back to the Excel file
df.to_excel('relatorio.xlsx', index=False)