In [340]:
import os
import pandas as pd
from datetime import datetime
import tkinter as tk
from tkinter import filedialog

# Function to fix some bugs
def process_dataframe(df):
    # Check if values from first column to column 1289 in the last row are NaN
    if df.iloc[-1, :1289].isna().all():
        # Assign the values contained in columns 1290 and 1291 of the last row to variables
        col_1290_value = df.iloc[-1, 1289]
        col_1291_value = df.iloc[-1, 1290]
        
        # Delete the last row
        df = df.iloc[:-1, :]
        
        # Assign the values to the new last row
        df.iloc[-1, 1289] = col_1290_value
        df.iloc[-1, 1290] = col_1291_value
    
    return df

def append_columns_to_riassunto(riassunto, columns, values):
    for col, val in zip(columns, values):
        riassunto[col] = val
    return riassunto

def count_files(code_list):
    code_count = {}
    for code in code_list:
        if code in code_count:
            code_count[code] += 1
        else:
            code_count[code] = 1
    return code_count

def count_items(item_list):
    return len(item_list)

# Function to convert column to datetime format from row 20 to the last
def convert_column_to_datetime(df, column_index, start_row, date_format):
    for i in range(start_row, len(df)):
        try:
            df.iloc[i, column_index] = pd.to_datetime(df.iloc[i, column_index], format=date_format)
        except ValueError:
            # Skip rows that cannot be converted
            continue

# Loop through the list of dataframes and apply the function
def apply_conversion_to_dataframes(dataframes, column_index, start_row, date_format):
    for i, df in enumerate(dataframes):
        convert_column_to_datetime(df, column_index, start_row, date_format)
#_______________________________________________________________________________________________________________


# Directory containing the txt files
directory = r'V:\TEST CHILLER\TXT'

# List to store dataframes and skipped files
dataframes = []
skipped_files = []

# Loop through all files in the directory
for i, filename in enumerate(os.listdir(directory)):
    if filename.endswith('.txt'):
        file_path = os.path.join(directory, filename)
        try:
            # First method to read the file
            df = pd.read_csv(file_path, delimiter="\t", header=None)
            df = df.drop(17).reset_index(drop=True)
        except Exception as e:
            try:
                # Second method to read the file if the first fails
                total_columns = 321
                df1 = pd.read_csv(file_path, header=None, engine='python', encoding='latin', sep='\t', nrows=17)
                df2 = pd.read_csv(file_path, header=None, engine='python', encoding='latin', sep='\t', skiprows=18)
                df = pd.concat([df1, df2], axis=0).reset_index(drop=True)
            except Exception as e:
                # If both methods fail, skip the file and add to skipped_files list
                skipped_files.append(filename)
                continue
        
        # Add 'File name' and 'Last modification' columns at row 20
        df.loc[20, 'File name'] = filename
        df.loc[20, 'Last modification'] = datetime.fromtimestamp(os.path.getmtime(file_path)).strftime('%d/%m/%Y %H:%M:%S')
        
        # Append the dataframe to the list
        dataframes.append(df)
        
        # Assign the dataframe to a variable name sequentially
        globals()[f'df_{i+1}'] = df

apply_conversion_to_dataframes(dataframes, 1, 20, '%d/%m/%Y %H:%M:%S')


In [341]:
# Function to check if all DataFrames have the same number of columns
def check_columns_consistency(dataframes):
    if not dataframes:
        return True  # If the list is empty, return True
    
    # Get the number of columns in the first DataFrame
    num_columns = dataframes[0].shape[1]
    
    # Check if all DataFrames have the same number of columns
    for df in dataframes:
        if df.shape[1] != num_columns:
            return False
    
    return True

consistent_columns = check_columns_consistency(dataframes)
consistent_columns

True

In [308]:
# Function to create a DataFrame with the name of the DataFrame and the number of columns
def create_summary_dataframe(dataframes):
    summary_data = []
    for i, df in enumerate(dataframes):
        df_name = f"df{i+1}"
        summary_data.append({"DataFrame Name": df_name, "Number of Columns": df.shape[1]})
    
    summary_df = pd.DataFrame(summary_data)
    return summary_df

# Create the summary DataFrame
summary_df = create_summary_dataframe(dataframes)
# Export the DataFrame to a CSV file
summary_df.to_csv("summary_output.csv", index=False)



In [309]:
# Function to split the list of DataFrames into two sublists based on the number of columns
import pandas as pd

def split_dataframes(dataframes):
    dataframes_323 = []
    dataframes_318 = []
    dataframes_323_names = []
    dataframes_318_names = []

    for df in dataframes:
        if df.shape[1] == 323:
            dataframes_323.append(df)
            dataframes_323_names.append(df.name)
        elif df.shape[1] == 318:
            dataframes_318.append(df)
            dataframes_318_names.append(df.name)

    return dataframes_323, dataframes_318, dataframes_323_names, dataframes_318_names

# Assign names to the sample dataframes
for i, df in enumerate(dataframes):
    df.name = f'df{i+1}'

# Split the list of DataFrames
dataframes_323, dataframes_318, dataframes_323_names, dataframes_318_names = split_dataframes(dataframes)

# Display the lists of DataFrame names
print("DataFrames with 323 columns:", dataframes_323_names)
print("DataFrames with 318 columns:", dataframes_318_names)


DataFrames with 323 columns: ['df1', 'df2', 'df3', 'df4', 'df5', 'df6', 'df7', 'df8', 'df9', 'df10', 'df11', 'df12', 'df13', 'df14', 'df15', 'df16', 'df17', 'df18']
DataFrames with 318 columns: []


In [310]:
baseline = dataframes_323[0].iloc[17]

# Iterate over all dataframes in dataframes_323
for df in dataframes_323:
    # Replace the 17th row with the baseline values
    df.iloc[17] = baseline

In [311]:
# Initialize the exceptions list
exceptions_323 = []

# Iterate over the dataframes starting from the second one
for df in dataframes_323[1:]:
    # Compare the 17th row with the baseline
    if not df.iloc[17].equals(baseline):
        # If there is a difference, add the dataframe to exceptions list
        exceptions_323.append(df)

# Remove the exceptions from the original list
dataframes_323 = [df for df in dataframes_323 if not any(df.equals(exc) for exc in exceptions_323)]

# Print the results
print(len(dataframes_323))
print(len(exceptions_323))

18
0


In [312]:
# Initialize a list to store differences
differences = []

# Iterate over the dataframes in exceptions_323
for i, df in enumerate(exceptions_323):
    # Compare the 17th row with the baseline
    diff = df.iloc[17] != baseline
    if diff.any():
        # Store the index of the dataframe, the differing columns, baseline value, and found value
        for col in diff.index[diff]:
            differences.append((i, col, baseline[col], df.iloc[17][col]))

# Create a dataframe to store the differences
diff_df = pd.DataFrame(differences, columns=['Dataframe Index', 'Column ID', 'Baseline Value', 'Found Value'])

# Print the differences dataframe
print(diff_df)

Empty DataFrame
Columns: [Dataframe Index, Column ID, Baseline Value, Found Value]
Index: []


In [313]:
# Define the columns to drop
columns_to_drop = list(range(2, 18)) + list(range(24, 114))  + list(range(122, 128)) + list(range(132, 144)) + list(range(148, 219)) + \
                  list(range(220, 223)) + list(range(224, 227)) + list(range(231, 234)) + list(range(235, 238)) + \
                  list(range(242, 245)) + list(range(246, 249)) + list(range(253, 256)) + list(range(257, 260)) + \
                  list(range(264, 267)) + list(range(268, 271)) + [281, 288, 295] + list(range(311, 321))

# Iterate over all dataframes in dataframes_323 and drop the specified columns
for df in dataframes_323:
    df.drop(columns_to_drop, axis=1, inplace=True)

In [314]:
# Iterate over all dataframes in dataframes_323 and reset the column indexes
for df in dataframes_323:
    df.columns = range(df.shape[1])

In [290]:
columns_df2 = pd.DataFrame({
    "Index": range(len(dataframes_323[0].columns)),
    "Column Names": dataframes_323[0].iloc[17]
})

# Export this dataframe as a CSV file
columns_df2.to_csv("columns_of_dataframe323.csv", index=False)

In [315]:
# Columns to move
columns_to_move = [25, 30, 45, 40, 45]

# Iterate over all dataframes in dataframes_323
for i, df in enumerate(dataframes_323):
    # Move the specified columns to the end
    cols = df.columns.tolist()
    for col in columns_to_move:
        cols.append(cols.pop(cols.index(col)))
    df = df[cols]
    
    # Reset the column indexes
    df.columns = range(df.shape[1])
    
    # Update the dataframe in the list
    dataframes_323[i] = df

In [316]:
column_names  = dataframes_323[0].iloc[17].tolist()
column_names[79] = 'File name'
column_names[80] = 'Last modification'
header = pd.DataFrame(columns=column_names)



# Initialize the lists
#avg_columns = []
#min_columns = []
#max_columns = []

#for col in header.columns[1:-2]:
#    avg_columns.append(f"{col}_std")
#    min_columns.append(f"{col}_min")
#    max_columns.append(f"{col}_max")

# Add the new columns to the dataframe HEADER
#for col in avg_columns + min_columns + max_columns:
#    header[col] = None

#header.to_csv("header.csv", index=False)

In [317]:
# Assuming all dataframes should have the same columns as the first dataframe
expected_columns = dataframes_323[0].columns

# Lists to hold dataframes with matching and different columns
matching_dataframes = []
different_dataframes = []

# Separate dataframes based on column names
for df in dataframes_323:
    if df.columns.equals(expected_columns):
        matching_dataframes.append(df)
    else:
        different_dataframes.append(df)



In [318]:
# Check if the entry at row 20 in the 'File name' column is of string type
all_strings = True
for df in dataframes_323:
    try:
        entry = df.at[20, 'File name']
        if not isinstance(entry, str):
            all_strings = False
            break
    except KeyError:
        all_strings = False
        break

print(f"All dataframes have a string entry at row 20 in the 'File name' column: {all_strings}")

All dataframes have a string entry at row 20 in the 'File name' column: False


In [319]:
import numpy as np

riassunto_dataframes = []
column_names  = dataframes_323[0].iloc[17].tolist()
baseline_df = dataframes_323[0].iloc[:17]
baseline_df = baseline_df.loc[:, baseline_df.notna().any(axis=0)]
baseline_df = baseline_df.drop(14)
baseline_first_column = baseline_df.iloc[:, 0].tolist()

# Process each dataframe and create riassunto dataframes
for i, df in enumerate(dataframes_323):
    # Split the dataframe
    test_id = df.iloc[:17]
    test_id = test_id.loc[:, test_id.notna().any(axis=0)]
    index_row = df.iloc[17]
    remaining_df = df.iloc[17:]
    test_id = test_id.drop(14)
    aligned_test_id = pd.DataFrame(columns=test_id.columns)

    for value in baseline_first_column:
            if value in test_id.iloc[:, 0].values:
                row_index = test_id[test_id.iloc[:, 0] == value].index[0]
                aligned_test_id = pd.concat([aligned_test_id, test_id.loc[[row_index]]])
            else:
                new_row = pd.Series([value, "Value not declared"], index=test_id.columns)
                aligned_test_id = pd.concat([aligned_test_id, new_row.to_frame().T], ignore_index=True)

    # Append the remaining rows
    aligned_test_id.reset_index(drop=True, inplace=True)

    # Initialize variables
    datasets = {}
    current_dataset = []
    dataset_counter = 1
    note_counter = 0

    # Process the remaining rows

    for index, row in remaining_df.iterrows(): 
            if row[0] == "Note":
                note_counter += 1
                if note_counter > 1:
                    datasets[f"dataset_{dataset_counter}"] = pd.DataFrame(current_dataset)
                    dataset_counter += 1
                    current_dataset = []
            current_dataset.append(row)
 

    # Append the last dataset if exists
    if current_dataset:
        datasets[f"dataset_{dataset_counter}"] = pd.DataFrame(current_dataset)

    # Create a dataframe for each dataset and show the names of the dataframes created

    dataframe_names = []

    for key, dataset in datasets.items():
        globals()[key] = dataset
        dataframe_names.append(key)

    # Apply the operations to each dataset
    for key, dataset in datasets.items():

        # Replace all commas with periods in columns 2 to the end
        dataset.iloc[:, 1:] = dataset.iloc[:, 1:].replace(',', '.', regex=True)
        
        # Convert subsequent columns to float, coercing errors
        for col in dataset.columns[2:-2]:
            dataset[col] = pd.to_numeric(dataset[col], errors='coerce')
            dataset.iloc[2:, dataset.columns.get_loc(col)] = pd.to_numeric(dataset.iloc[2:, dataset.columns.get_loc(col)], errors='coerce')
        
        # Update the dataset in the dictionary
        datasets[key] = dataset

    # Create the dataframe for storing general data
    riassunto = pd.DataFrame(columns=column_names)
    new_columns = []
    riassunto = pd.concat([riassunto, pd.DataFrame(columns=new_columns)], axis=1)

    for key, dataset in datasets.items():
        if len(dataset) < 4:
            time_delta = "Cannot be calculated"
        else:
            # Ensure the values are Timestamps
            start_row_index = 3
            start_time = dataset.iloc[start_row_index, 1]
            
            # Check if start_time is valid
            while pd.isna(start_time) or isinstance(start_time, str):
                start_row_index += 1
                if start_row_index >= len(dataset):  # Ensure we don't go out of bounds
                    start_time = None
                    break
                start_time = dataset.iloc[start_row_index, 1]
            
            end_row_index = len(dataset) - 1
            end_time = dataset.iloc[end_row_index, 1]
            
            # Check if end_time is valid
            while pd.isna(end_time) or isinstance(end_time, str):
                end_row_index -= 1
                if end_row_index < start_row_index:  # Ensure we don't go out of bounds
                    end_time = None
                    break
                end_time = dataset.iloc[end_row_index, 1]
            
            if start_time is None or end_time is None:
                time_delta = "Cannot be calculated"
            else:
                time_delta = end_time - start_time
        
        row_index = dataframe_names.index(key)
        riassunto.at[row_index, riassunto.columns[1]] = time_delta

    # Assign values from datasets to riassunto sequentially in column 1
    row_counter = 0
    for key, dataset in datasets.items():
        if len(dataset) < 2:
            value_to_assign = "Not enough data"
            riassunto.at[row_index, 'Note'] = value_to_assign
        else:
            value_to_assign = dataset.iloc[1, 0]  # Value from column 1 row 2
            row_index = dataframe_names.index(key)
            riassunto.at[row_index, 'Note'] = value_to_assign
            row_counter += 1

    # Iterate over each dataset
    for key, dataset in datasets.items():

        # Calculate the average values for each column starting from the third column
        averages = dataset.iloc[2:, 2:-2].mean()
        
        # Assign the average values to the corresponding row in riassunto
        row_index = dataframe_names.index(key)
        
        for col_index, avg_value in enumerate(averages, start=2):
            riassunto.iloc[row_index, col_index] = avg_value

    # Create new columns for minimum and maximum values
    #std_columns = []
    #min_columns = []
    #max_columns = []

    #for col in riassunto.columns[2:-2]:
        #std_columns.append(f"{col}_std")
        #min_columns.append(f"{col}_min")
        #max_columns.append(f"{col}_max")
        

    # Append new columns to riassunto dataframe
    #riassunto = pd.concat([riassunto, pd.DataFrame(columns=std_columns + min_columns + max_columns)], axis=1)

    # Iterate over each dataset
    #for key, dataset in datasets.items():
    #    # Calculate the minimum and maximum values for each column starting from the third column
    #    deviations = dataset.iloc[2:, 2:-2].std()
    #    min_values = dataset.iloc[2:, 2:-2].min()
    #    max_values = dataset.iloc[2:, 2:-2].max()
    #    
    #    # Assign the minimum values to the corresponding row in riassunto
    #    row_index = dataframe_names.index(key)
    #    for col_index, deviations in enumerate(deviations, start=2):
    #        riassunto.iloc[row_index, len(riassunto.columns)//3 + col_index] = deviations
    #    
    #    for col_index, min_value in enumerate(min_values, start=2):
    #        riassunto.iloc[row_index, len(riassunto.columns)//3 + col_index] = min_value
    #    
    #    # Assign the maximum values to the corresponding row in riassunto
    #    for col_index, max_value in enumerate(max_values, start=2):
    #        riassunto.iloc[row_index, 2*len(riassunto.columns)//3 + col_index] = max_value

# Assign the maximum values to the corresponding row in riassunto
    # Check if 'File name' and 'Last modification' columns exist before assigning
    #if 'File name' in df.columns and 'Last modification' in df.columns:
        riassunto['File name'] = dataframes_323[i].iloc[20,79]
        riassunto['Last modification'] = dataframes_323[i].iloc[20,80]
    #else:
    #    riassunto['File name'] = 'Data not found'
    #    riassunto['Last modification'] = 'Data not found'
    riassunto.reset_index(drop=True, inplace=True)
    updated_riassunto = append_columns_to_riassunto(riassunto, aligned_test_id.transpose().iloc[0], aligned_test_id.transpose()[1:].iloc[0]) 

# Assin the riassunto dataframe to a variable name sequentially
    globals()[f'riassunto_{i+1}'] = updated_riassunto
    riassunto_dataframes.append(updated_riassunto)



In [320]:
# Check which of the dataframes in the list 'riassunto_dataframes' does not have the same number of columns
def find_inconsistent_dataframes(dataframes):
    num_columns = [df.shape[1] for df in dataframes]
    max_columns = max(num_columns)
    inconsistent_dfs = [i for i, n in enumerate(num_columns) if n != max_columns]
    return inconsistent_dfs

# Find the inconsistent dataframes in riassunto_dataframes
inconsistent_dataframes = find_inconsistent_dataframes(riassunto_dataframes)

if inconsistent_dataframes:
    print(f"The following dataframes do not have the same number of columns: {inconsistent_dataframes}")
else:
    print("All dataframes have the same number of columns.")

All dataframes have the same number of columns.


In [321]:
global_df.head()

Unnamed: 0,Note,Duration,P1,P2,P3,P4,P5,P6,T1,T2,...,AVAILABLE.TEST.ID,AVAILABLE.N.PROVA,EXTRACTED TEXT,HELPER22,HELPER,Column1,Column2,helper3,duration2,SUPPORT
0,"circ.1 - precarica 5,0 kg VT tutta aperta",0 days 00:04:40,1.6,1.6,0.101754,4.24386,1.7,1.6,23.740877,23.840702,...,,,,,,,,,,
1,"CIRC.1 - 5,0 kg VT tutta aperta - report con p...",0 days 00:04:45,1.6,1.6,0.410345,3.731034,1.7,1.7,26.354655,26.370862,...,,,,,,,,,,
2,"CIRC.2 - 5,0 kg VT tutta aperta - report con p...",0 days 00:05:20,1.6,1.6,0.410769,3.735385,1.7,1.7,27.549385,27.564462,...,,,,,,,,,,
3,"CIRC.3 - 5,0 kg VT tutta aperta - report con p...",0 days 00:04:50,1.618644,1.6,0.40339,3.749153,1.7,1.8,28.725763,28.569153,...,,,,,,,,,,
4,"CIRC.4 - 5,0 kg VT tutta aperta - report con p...",0 days 00:04:35,1.7,1.6,0.439286,3.725,1.7,1.8,29.5225,29.367143,...,,,,,,,,,,


In [322]:
# Create the global dataframe with modified column names
global_df = pd.DataFrame(columns=[col for col in riassunto_1.columns])

# Add another column with the name "N.TEST"
global_df["N.TEST2"] = None



In [323]:
# Use the first dataframe of the list riassunto_dataframes as baseline
baseline_columns = riassunto_dataframes[0].columns

# Assign the value of its column names to all other dataframes in the list
for i in range(1, len(riassunto_dataframes)):
    riassunto_dataframes[i].columns = baseline_columns

# Assign a sequential test_id for each dataframe in riassunto_dataframes
test_ids = [f"t{i+1:03d}" for i in range(len(riassunto_dataframes))]

# Concatenate the rows of all dataframes in a global_df
global_df = pd.DataFrame(columns=baseline_columns)
global_df["N.TEST2"] = None

for i, df in enumerate(riassunto_dataframes):
    df["N.TEST2"] = test_ids[i]
    global_df = pd.concat([global_df, df], ignore_index=True)



In [324]:
# Create a new dataframe with an index column and another column containing the names of the columns of global_df
columns_df = pd.DataFrame({
    "Index": range(len(global_df.columns)),
    "Column Names": global_df.columns
})

# Export this dataframe as a CSV file
columns_df.to_csv("columns_of_global_df.csv", index=False)

print("The dataframe with column names has been exported as 'columns_of_global_df.csv'.")

The dataframe with column names has been exported as 'columns_of_global_df.csv'.


In [325]:
# Step 1: Drop columns at positions 79, 80, and 102
columns_to_drop = [79, 80, 102]
global_df.drop(global_df.columns[columns_to_drop], axis=1, inplace=True)

In [326]:


# Step 2: Rename the column "Time Stamp" to "Duration"
global_df.rename(columns={"Time Stamp": "Duration"}, inplace=True)

# Step 3: Create new columns
new_columns = ["TEST.ID", "SUPPORT", "N.PROVA", "AVAILABLE.TEST.ID", "AVAILABLE.N.PROVA", 
               "EXTRACTED TEXT", "HELPER22", "HELPER", "Column1", "Column2", "helper3", "duration2"]
for col in new_columns:
    global_df[col] = None

# Step 4: Reorder the columns as specified
ordered_columns = [
    "STAZIONE DI COLLAUDO", "TEST.ID", "SUPPORT", "N.TEST2", "N.PROVA", "AVAILABLE.TEST.ID", 
    "AVAILABLE.N.PROVA", "OPERATORE", "CODICE MACCHINA", "MATRICOLA MACCHINA", "NUMERO ODL", 
    "DATA/ORA COLLAUDO", "EXTRACTED TEXT", "DATA/ORA COLLAUDO", "FLUIDO", "GAS REFRIGERANTE", "REVISIONE", 
    "LINEA DEL FLUIDO", "HELPER22", "NR CIRCUITI", "RESPONSABILE LINEA CHILLER\\CLIMA", 
    "DIREZIONE TECNICA", "ALIMENTAZIONE", "ESITO COLLAUDO", "File name", "Last modification", 
    "Note", "HELPER", "Duration", "P1", "P2", "P3", "P4", "P5", "P6", "T1", "T2", "T3", "T4", 
    "T5", "T6", "T7", "T8", "TC7", "TC8", "TC9", "TC10", "P7", "P8", "P9", "P10", "PRELE_1", 
    "ITOT_1", "PACTT_1", "CFTOT_1", "FTOT_1", "ITOT_2", "PACTT_2", "CFTOT_2", "FTOT_2", "ITOT_3", 
    "PACTT_3", "CFTOT_3", "FTOT_3", "ITOT_4", "PACTT_4", "CFTOT_4", "FTOT_4", "ITOT_5", "PACTT_5", 
    "CFTOT_5", "FTOT_5", "EVAP1_POWER", "EVAP1_Cp", "EVAP1_Rho", "EVAP1_dT", "EVAP1_TIN_M", 
    "EVAP1_TOUT_M", "EVAP2_POWER", "EVAP2_Cp", "EVAP2_Rho", "EVAP2_dT", "EVAP2_TIN_M", "EVAP2_TOUT_M", 
    "EVAP3_POWER", "EVAP3_Cp", "EVAP3_Rho", "EVAP3_dT", "EVAP3_TIN_M1", "EVAP3_TOUT_M1", "EVAP1_DP_H2O", 
    "EVAP2_DP_H2O", "EVAP3_DP_H2O", "TC_P7", "TC_P8", "TC_P9", "TC_P10", "SOTT_1", "SOTT_2", "SURR_1", 
    "SURR_2", "TAC_M", "MP1_2", "MP3_4", "MP5_6", "VCM_1", "VCM_2", "VCM_3", "VCM_4", "VCM_5", 
    "Column1", "Column2", "helper3", "duration2"
]

global_df = global_df[ordered_columns]


In [338]:
# Replace the values in the "OPERATORE" column
global_df["OPERATORE"].replace({"1 RESTANI MATTEO": "RESTANI", "1 CARLINI MIRKO": "CARLINI"}, inplace=True)

# Update TEST.ID based on changes in NUMERO ODL
global_df['TEST.ID'] = 'CHANGED'
global_df.loc[global_df['NUMERO ODL'].shift() != global_df['NUMERO ODL'], 'TEST.ID'] = 'CURRENT'

# Run through the dataframe and check the column "NUMERO ODL"
for i in range(1, len(global_df)):
    if global_df.at[i, "NUMERO ODL"] != global_df.at[i - 1, "NUMERO ODL"]:
        global_df.at[i, "TEST.ID"] = global_df.at[i - 1, "TEST.ID"]


# Look for the characters "_PA.txt" in the "GAS REFRIGERANTE" column and delete them if found
global_df["GAS REFRIGERANTE"] = global_df["GAS REFRIGERANTE"].str.replace("_PA.txt", "", regex=False)

# Copy the first 10 left characters from "DATA/ORA COLLAUDO" to "EXTRACTED TEXT"
global_df["Column1"] = global_df["CODICE MACCHINA"].str[:5]

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  global_df["OPERATORE"].replace({"1 RESTANI MATTEO": "RESTANI", "1 CARLINI MIRKO": "CARLINI"}, inplace=True)


In [339]:
# Loop through the DataFrame rows and round float values to two decimal places
for index, row in global_df.iterrows():
    for col in global_df.columns:
        if isinstance(row[col], float):
            global_df.at[index, col] = round(row[col], 2)

# Function to convert decimal separator from . to , for display purposes
def convert_decimal_separator(df):
    df_str = df.copy()
    for col in df_str.select_dtypes(include=['float']):
        df_str[col] = df_str[col].map(lambda x: f"{x:.2f}".replace('.', ','))
    return df_str

# Convert the decimal separator for display
global_df_display = convert_decimal_separator(global_df)

# Export the DataFrame to CSV with comma as the decimal separator
global_df_display.to_csv('dicember3.csv', index=False, sep=';')

print("DataFrame has been exported to 'global_df_comma.csv' with comma as the decimal separator.")

DataFrame has been exported to 'global_df_comma.csv' with comma as the decimal separator.
