In [1]:
import os
import pandas as pd
from datetime import datetime
import tkinter as tk
from tkinter import filedialog

# Function to fix some bugs
def process_dataframe(df):
    # Check if values from first column to column 1289 in the last row are NaN
    if df.iloc[-1, :1289].isna().all():
        # Assign the values contained in columns 1290 and 1291 of the last row to variables
        col_1290_value = df.iloc[-1, 1289]
        col_1291_value = df.iloc[-1, 1290]
        
        # Delete the last row
        df = df.iloc[:-1, :]
        
        # Assign the values to the new last row
        df.iloc[-1, 1289] = col_1290_value
        df.iloc[-1, 1290] = col_1291_value
    
    return df

def append_columns_to_riassunto(riassunto, columns, values):
    for col, val in zip(columns, values):
        riassunto[col] = val
    return riassunto

def count_files(code_list):
    code_count = {}
    for code in code_list:
        if code in code_count:
            code_count[code] += 1
        else:
            code_count[code] = 1
    return code_count

def count_items(item_list):
    return len(item_list)

# Function to convert column to datetime format from row 20 to the last
def convert_column_to_datetime(df, column_index, start_row, date_format):
    for i in range(start_row, len(df)):
        try:
            df.iloc[i, column_index] = pd.to_datetime(df.iloc[i, column_index], format=date_format)
        except ValueError:
            # Skip rows that cannot be converted
            continue

# Loop through the list of dataframes and apply the function
def apply_conversion_to_dataframes(dataframes, column_index, start_row, date_format):
    for i, df in enumerate(dataframes):
        convert_column_to_datetime(df, column_index, start_row, date_format)
#_______________________________________________________________________________________________________________


# Directory containing the txt files
directory = r'V:\TEST CHILLER\TXT'

# List to store dataframes and skipped files
dataframes = []
skipped_files = []

# Loop through all files in the directory
for i, filename in enumerate(os.listdir(directory)):
    if filename.endswith('.txt'):
        file_path = os.path.join(directory, filename)
        try:
            # First method to read the file
            df = pd.read_csv(file_path, delimiter="\t", header=None)
            df = df.drop(17).reset_index(drop=True)
        except Exception as e:
            try:
                # Second method to read the file if the first fails
                total_columns = 321
                df1 = pd.read_csv(file_path, header=None, engine='python', encoding='latin', sep='\t', nrows=17)
                df2 = pd.read_csv(file_path, header=None, engine='python', encoding='latin', sep='\t', skiprows=18)
                df = pd.concat([df1, df2], axis=0).reset_index(drop=True)
            except Exception as e:
                # If both methods fail, skip the file and add to skipped_files list
                skipped_files.append(filename)
                continue
        
        # Add 'File name' and 'Last modification' columns at row 20
        df.loc[20, 'File name'] = filename
        df.loc[20, 'Last modification'] = datetime.fromtimestamp(os.path.getmtime(file_path)).strftime('%d/%m/%Y %H:%M:%S')
        
        # Append the dataframe to the list
        dataframes.append(df)
        
        # Assign the dataframe to a variable name sequentially
        globals()[f'df_{i+1}'] = df

apply_conversion_to_dataframes(dataframes, 1, 20, '%d/%m/%Y %H:%M:%S')


In [2]:
# Function to check if all DataFrames have the same number of columns
def check_columns_consistency(dataframes):
    if not dataframes:
        return True  # If the list is empty, return True
    
    # Get the number of columns in the first DataFrame
    num_columns = dataframes[0].shape[1]
    
    # Check if all DataFrames have the same number of columns
    for df in dataframes:
        if df.shape[1] != num_columns:
            return False
    
    return True

consistent_columns = check_columns_consistency(dataframes)
consistent_columns

False

In [2]:
# Function to create a DataFrame with the name of the DataFrame and the number of columns
def create_summary_dataframe(dataframes):
    summary_data = []
    for i, df in enumerate(dataframes):
        df_name = f"df{i+1}"
        summary_data.append({"DataFrame Name": df_name, "Number of Columns": df.shape[1]})
    
    summary_df = pd.DataFrame(summary_data)
    return summary_df

# Create the summary DataFrame
summary_df = create_summary_dataframe(dataframes)
# Export the DataFrame to a CSV file
summary_df.to_csv("summary_output.csv", index=False)



In [3]:
# Function to split the list of DataFrames into two sublists based on the number of columns
import pandas as pd

def split_dataframes(dataframes):
    dataframes_323 = []
    dataframes_318 = []
    dataframes_323_names = []
    dataframes_318_names = []

    for df in dataframes:
        if df.shape[1] == 323:
            dataframes_323.append(df)
            dataframes_323_names.append(df.name)
        elif df.shape[1] == 318:
            dataframes_318.append(df)
            dataframes_318_names.append(df.name)

    return dataframes_323, dataframes_318, dataframes_323_names, dataframes_318_names

# Assign names to the sample dataframes
for i, df in enumerate(dataframes):
    df.name = f'df{i+1}'

# Split the list of DataFrames
dataframes_323, dataframes_318, dataframes_323_names, dataframes_318_names = split_dataframes(dataframes)

# Display the lists of DataFrame names
print("DataFrames with 323 columns:", dataframes_323_names)
print("DataFrames with 318 columns:", dataframes_318_names)


DataFrames with 323 columns: ['df1', 'df2', 'df3', 'df4', 'df5', 'df6', 'df7', 'df8', 'df12', 'df13', 'df14', 'df28', 'df32', 'df33', 'df34', 'df35', 'df36', 'df37', 'df38', 'df48', 'df49', 'df50', 'df51', 'df52', 'df55', 'df56', 'df57', 'df58', 'df59', 'df60', 'df64', 'df65', 'df66', 'df67', 'df68', 'df69', 'df70', 'df71', 'df72', 'df73', 'df74', 'df75', 'df76', 'df77', 'df78', 'df80', 'df81', 'df82', 'df83', 'df84', 'df85', 'df86', 'df87', 'df88', 'df89', 'df90', 'df91', 'df93', 'df94', 'df95', 'df96', 'df97', 'df99', 'df100', 'df101', 'df102', 'df103', 'df104', 'df105', 'df106', 'df107', 'df108', 'df109', 'df110', 'df111', 'df113', 'df114', 'df115', 'df116', 'df117', 'df118', 'df119', 'df120', 'df121', 'df122', 'df123', 'df124', 'df125', 'df126', 'df127', 'df128', 'df129', 'df130', 'df131', 'df132', 'df133', 'df134', 'df135', 'df144', 'df145', 'df146', 'df147', 'df148', 'df149', 'df150', 'df151', 'df152', 'df153', 'df154', 'df155', 'df156', 'df157', 'df158', 'df159', 'df160', 'df161

In [4]:
# Consider the first dataframe as the baseline
baseline = dataframes_323[0].iloc[17]

# Initialize the exceptions list
exceptions_323 = []

# Iterate over the dataframes starting from the second one
for df in dataframes_323[1:]:
    # Compare the 17th row with the baseline
    if not df.iloc[17].equals(baseline):
        # If there is a difference, add the dataframe to exceptions list
        exceptions_323.append(df)

# Remove the exceptions from the original list
dataframes_323 = [df for df in dataframes_323 if not any(df.equals(exc) for exc in exceptions_323)]

# Print the results
print(len(dataframes_323))
print(len(exceptions_323))

139
229


In [9]:

exceptions_323.pop(0)
exceptions_323[0].head(25)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,313,314,315,316,317,318,319,320,File name,Last modification
0,STAZIONE DI COLLAUDO,R60,,,,,,,,,...,,,,,,,,,,
1,OPERATORE,1 RESTANI MATTEO,,,,,,,,,...,,,,,,,,,,
2,CODICE MACCHINA,TALA0NMSBXLR000,,,,,,,,,...,,,,,,,,,,
3,MATRICOLA MACCHINA,00314061,,,,,,,,,...,,,,,,,,,,
4,NUMERO ODL,603281,,,,,,,,,...,,,,,,,,,,
5,DATA/ORA COLLAUDO,06/04/2023 16:47:39,,,,,,,,,...,,,,,,,,,,
6,FLUIDO,ACQUA,,,,,,,,,...,,,,,,,,,,
7,GAS REFRIGERANTE,R134a_PA.txt,,,,,,,,,...,,,,,,,,,,
8,REVISIONE,02,,,,,,,,,...,,,,,,,,,,
9,LINEA DEL FLUIDO,BASSA PORTATA,,,,,,,,,...,,,,,,,,,,


In [10]:
column_names  = exceptions_323[0].iloc[17].tolist()
column_names[321] = 'File name'
column_names[322] = 'Last modification'
header = pd.DataFrame(columns=column_names)

# Initialize the lists
avg_columns = []
min_columns = []
max_columns = []

for col in header.columns[1:-2]:
    avg_columns.append(f"{col}_std")
    min_columns.append(f"{col}_min")
    max_columns.append(f"{col}_max")

# Add the new columns to the dataframe HEADER
for col in avg_columns + min_columns + max_columns:
    header[col] = None

#header.to_csv("header.csv", index=False)


  header[col] = None
  header[col] = None
  header[col] = None
  header[col] = None
  header[col] = None
  header[col] = None
  header[col] = None
  header[col] = None
  header[col] = None
  header[col] = None
  header[col] = None
  header[col] = None
  header[col] = None
  header[col] = None
  header[col] = None
  header[col] = None
  header[col] = None
  header[col] = None
  header[col] = None
  header[col] = None
  header[col] = None
  header[col] = None
  header[col] = None
  header[col] = None
  header[col] = None
  header[col] = None
  header[col] = None
  header[col] = None
  header[col] = None
  header[col] = None
  header[col] = None
  header[col] = None
  header[col] = None
  header[col] = None
  header[col] = None
  header[col] = None
  header[col] = None
  header[col] = None
  header[col] = None
  header[col] = None
  header[col] = None
  header[col] = None
  header[col] = None
  header[col] = None
  header[col] = None
  header[col] = None
  header[col] = None
  header[col]

In [12]:
# Assuming all dataframes should have the same columns as the first dataframe
expected_columns = exceptions_323[0].columns

# Lists to hold dataframes with matching and different columns
matching_dataframes = []
different_dataframes = []

# Separate dataframes based on column names
for df in exceptions_323:
    if df.columns.equals(expected_columns):
        matching_dataframes.append(df)
    else:
        different_dataframes.append(df)

# Concatenate dataframes with matching columns
if matching_dataframes:
    final_dataframe = pd.concat(matching_dataframes, ignore_index=True)
    # Sort the dataframe by 'Last modification'
    final_dataframe.sort_values(by='Last modification', inplace=True)
else:
    final_dataframe = pd.DataFrame()

# Print the list of dataframes with different columns
print("Dataframes with different columns:")
for df in different_dataframes:
    print(df.columns)

Dataframes with different columns:


In [13]:
# Check if the entry at row 20 in the 'File name' column is of string type
all_strings = True
for df in exceptions_323:
    try:
        entry = df.at[20, 'File name']
        if not isinstance(entry, str):
            all_strings = False
            break
    except KeyError:
        all_strings = False
        break

print(f"All dataframes have a string entry at row 20 in the 'File name' column: {all_strings}")

All dataframes have a string entry at row 20 in the 'File name' column: True


In [22]:
list_size = len(exceptions_323)
print(list_size)  # Output: 5

228


In [23]:
import numpy as np

riassunto_dataframes = []
column_names  = exceptions_323[1].iloc[17].tolist()
column_names[321] = 'File name'
column_names[322] = 'Last modification'

baseline_df = exceptions_323[0].iloc[:17]
baseline_df = baseline_df.loc[:, baseline_df.notna().any(axis=0)]
baseline_df = baseline_df.drop(14)
baseline_first_column = baseline_df.iloc[:, 0].tolist()

# Process each dataframe and create riassunto dataframes
for i, df in enumerate(exceptions_323):
    # Split the dataframe
    test_id = df.iloc[:17]
    test_id = test_id.loc[:, test_id.notna().any(axis=0)]
    index_row = df.iloc[17]
    remaining_df = df.iloc[17:]
    test_id = test_id.drop(14)

    aligned_test_id = pd.DataFrame(columns=test_id.columns)
    for value in baseline_first_column:
            if value in test_id.iloc[:, 0].values:
                row_index = test_id[test_id.iloc[:, 0] == value].index[0]
                aligned_test_id = pd.concat([aligned_test_id, test_id.loc[[row_index]]])
            else:
                new_row = pd.Series([value, "Value not declared"], index=test_id.columns)
                aligned_test_id = pd.concat([aligned_test_id, new_row.to_frame().T], ignore_index=True)

    # Append the remaining rows
    aligned_test_id.reset_index(drop=True, inplace=True)

    # Initialize variables
    datasets = {}
    current_dataset = []
    dataset_counter = 1
    note_counter = 0

    # Process the remaining rows
    for index, row in remaining_df.iterrows():
        if row[0] == "Note":
            note_counter += 1
            if note_counter > 1:
                datasets[f"dataset_{dataset_counter}"] = pd.DataFrame(current_dataset)
                dataset_counter += 1
                current_dataset = []
        current_dataset.append(row)

    # Append the last dataset if exists
    if current_dataset:
        datasets[f"dataset_{dataset_counter}"] = pd.DataFrame(current_dataset)

    # Create a dataframe for each dataset and show the names of the dataframes created

    dataframe_names = []

    for key, dataset in datasets.items():
        globals()[key] = dataset
        dataframe_names.append(key)

    # Apply the operations to each dataset
    for key, dataset in datasets.items():

        # Replace all commas with periods in columns 2 to the end
        dataset.iloc[:, 1:] = dataset.iloc[:, 1:].replace(',', '.', regex=True)
        
        # Convert subsequent columns to float, coercing errors
        for col in dataset.columns[2:-2]:
            dataset[col] = pd.to_numeric(dataset[col], errors='coerce')
            dataset.iloc[2:, dataset.columns.get_loc(col)] = pd.to_numeric(dataset.iloc[2:, dataset.columns.get_loc(col)], errors='coerce')
        
        # Update the dataset in the dictionary
        datasets[key] = dataset

    # Create the dataframe for storing general data
    riassunto = pd.DataFrame(columns=column_names)
    new_columns = []
    riassunto = pd.concat([riassunto, pd.DataFrame(columns=new_columns)], axis=1)

    for key, dataset in datasets.items():
        if len(dataset) < 4:
            time_delta = "Cannot be calculated"
        else:
            # Ensure the values are Timestamps
            start_row_index = 3
            start_time = dataset.iloc[start_row_index, 1]
            
            # Check if start_time is valid
            while pd.isna(start_time) or isinstance(start_time, str):
                start_row_index += 1
                if start_row_index >= len(dataset):  # Ensure we don't go out of bounds
                    start_time = None
                    break
                start_time = dataset.iloc[start_row_index, 1]
            
            end_row_index = len(dataset) - 1
            end_time = dataset.iloc[end_row_index, 1]
            
            # Check if end_time is valid
            while pd.isna(end_time) or isinstance(end_time, str):
                end_row_index -= 1
                if end_row_index < start_row_index:  # Ensure we don't go out of bounds
                    end_time = None
                    break
                end_time = dataset.iloc[end_row_index, 1]
            
            if start_time is None or end_time is None:
                time_delta = "Cannot be calculated"
            else:
                time_delta = end_time - start_time
        
        row_index = dataframe_names.index(key)
        riassunto.at[row_index, riassunto.columns[1]] = time_delta

    # Assign values from datasets to riassunto sequentially in column 1
    row_counter = 0
    for key, dataset in datasets.items():
        if len(dataset) < 2:
            value_to_assign = "Not enough data"
            riassunto.at[row_index, 'Note'] = value_to_assign
        else:
            value_to_assign = dataset.iloc[1, 0]  # Value from column 1 row 2
            row_index = dataframe_names.index(key)
            riassunto.at[row_index, 'Note'] = value_to_assign
            row_counter += 1

    # Iterate over each dataset
    for key, dataset in datasets.items():

        # Calculate the average values for each column starting from the third column
        averages = dataset.iloc[2:, 2:-2].mean()
        
        # Assign the average values to the corresponding row in riassunto
        row_index = dataframe_names.index(key)
        
        for col_index, avg_value in enumerate(averages, start=2):
            riassunto.iloc[row_index, col_index] = avg_value

    # Create new columns for minimum and maximum values
    avg_columns = []
    min_columns = []
    max_columns = []

    for col in riassunto.columns[2:-2]:
        avg_columns.append(f"{col}_std")
        min_columns.append(f"{col}_min")
        max_columns.append(f"{col}_max")
        

    # Append new columns to riassunto dataframe
    riassunto = pd.concat([riassunto, pd.DataFrame(columns=avg_columns + min_columns + max_columns)], axis=1)

    # Iterate over each dataset
    for key, dataset in datasets.items():
        # Calculate the minimum and maximum values for each column starting from the third column
        deviations = dataset.iloc[2:, 2:-2].std()
        min_values = dataset.iloc[2:, 2:-2].min()
        max_values = dataset.iloc[2:, 2:-2].max()
        
        # Assign the minimum values to the corresponding row in riassunto
        row_index = dataframe_names.index(key)
        for col_index, deviations in enumerate(deviations, start=2):
            riassunto.iloc[row_index, len(riassunto.columns)//3 + col_index] = deviations
        
        for col_index, min_value in enumerate(min_values, start=2):
            riassunto.iloc[row_index, len(riassunto.columns)//3 + col_index] = min_value
        
        # Assign the maximum values to the corresponding row in riassunto
        for col_index, max_value in enumerate(max_values, start=2):
            riassunto.iloc[row_index, 2*len(riassunto.columns)//3 + col_index] = max_value

# Assign the maximum values to the corresponding row in riassunto
    # Check if 'File name' and 'Last modification' columns exist before assigning
    if 'File name' in df.columns and 'Last modification' in df.columns:
        riassunto['File name'] = exceptions_323[i].iloc[20,321]
        riassunto['Last modification'] = df['Last modification'].iloc[20]
    else:
        riassunto['File name'] = 'Data not found'
        riassunto['Last modification'] = 'Data not found'
    riassunto.reset_index(drop=True, inplace=True)
    updated_riassunto = append_columns_to_riassunto(riassunto, aligned_test_id.transpose().iloc[0], aligned_test_id.transpose()[1:].iloc[0]) 

# Assin the riassunto dataframe to a variable name sequentially
    globals()[f'riassunto_{i+1}'] = updated_riassunto
    riassunto_dataframes.append(updated_riassunto)


ValueError: Length of values (2) does not match length of index (321)

In [24]:
import pandas as pd
import numpy as np

riassunto_dataframes = []
column_names = exceptions_323[1].iloc[17].tolist()
column_names[321] = 'File name'
column_names[322] = 'Last modification'

baseline_df = exceptions_323[0].iloc[:17]
baseline_df = baseline_df.loc[:, baseline_df.notna().any(axis=0)]
baseline_df = baseline_df.drop(14)
baseline_first_column = baseline_df.iloc[:, 0].tolist()

# Process each dataframe and create riassunto dataframes
for i, df in enumerate(exceptions_323):
    try:
        # Split the dataframe
        test_id = df.iloc[:17]
        test_id = test_id.loc[:, test_id.notna().any(axis=0)]
        index_row = df.iloc[17]
        remaining_df = df.iloc[17:]
        test_id = test_id.drop(14)

        aligned_test_id = pd.DataFrame(columns=test_id.columns)
        for value in baseline_first_column:
            if value in test_id.iloc[:, 0].values:
                row_index = test_id[test_id.iloc[:, 0] == value].index[0]
                aligned_test_id = pd.concat([aligned_test_id, test_id.loc[[row_index]]])
            else:
                new_row = pd.Series([value] + ["Value not declared"] * (len(test_id.columns) - 1), index=test_id.columns)
                aligned_test_id = pd.concat([aligned_test_id, new_row.to_frame().T], ignore_index=True)

        # Append the remaining rows
        aligned_test_id.reset_index(drop=True, inplace=True)

        # Initialize variables
        datasets = {}
        current_dataset = []
        dataset_counter = 1
        note_counter = 0

        # Process the remaining rows
        for index, row in remaining_df.iterrows():
            if row[0] == "Note":
                note_counter += 1
                if note_counter > 1:
                    datasets[f"dataset_{dataset_counter}"] = pd.DataFrame(current_dataset)
                    dataset_counter += 1
                    current_dataset = []
            current_dataset.append(row)

        # Append the last dataset if exists
        if current_dataset:
            datasets[f"dataset_{dataset_counter}"] = pd.DataFrame(current_dataset)

        # Create a dataframe for each dataset and show the names of the dataframes created
        dataframe_names = []

        for key, dataset in datasets.items():
            globals()[key] = dataset
            dataframe_names.append(key)

        # Apply the operations to each dataset
        for key, dataset in datasets.items():
            # Replace all commas with periods in columns 2 to the end
            dataset.iloc[:, 1:] = dataset.iloc[:, 1:].replace(',', '.', regex=True)
            
            # Convert subsequent columns to float, coercing errors
            for col in dataset.columns[2:-2]:
                dataset[col] = pd.to_numeric(dataset[col], errors='coerce')
                dataset.iloc[2:, dataset.columns.get_loc(col)] = pd.to_numeric(dataset.iloc[2:, dataset.columns.get_loc(col)], errors='coerce')
            
            # Update the dataset in the dictionary
            datasets[key] = dataset

        # Create the dataframe for storing general data
        riassunto = pd.DataFrame(columns=column_names)
        new_columns = []
        riassunto = pd.concat([riassunto, pd.DataFrame(columns=new_columns)], axis=1)

        for key, dataset in datasets.items():
            if len(dataset) < 4:
                time_delta = "Cannot be calculated"
            else:
                # Ensure the values are Timestamps
                start_row_index = 3
                start_time = dataset.iloc[start_row_index, 1]
                
                # Check if start_time is valid
                while pd.isna(start_time) or isinstance(start_time, str):
                    start_row_index += 1
                    if start_row_index >= len(dataset):  # Ensure we don't go out of bounds
                        start_time = None
                        break
                    start_time = dataset.iloc[start_row_index, 1]
                
                end_row_index = len(dataset) - 1
                end_time = dataset.iloc[end_row_index, 1]
                
                # Check if end_time is valid
                while pd.isna(end_time) or isinstance(end_time, str):
                    end_row_index -= 1
                    if end_row_index < start_row_index:  # Ensure we don't go out of bounds
                        end_time = None
                        break
                    end_time = dataset.iloc[end_row_index, 1]
                
                if start_time is None or end_time is None:
                    time_delta = "Cannot be calculated"
                else:
                    time_delta = end_time - start_time
            
            row_index = dataframe_names.index(key)
            riassunto.at[row_index, riassunto.columns[1]] = time_delta

        # Assign values from datasets to riassunto sequentially in column 1
        row_counter = 0
        for key, dataset in datasets.items():
            if len(dataset) < 2:
                value_to_assign = "Not enough data"
                riassunto.at[row_index, 'Note'] = value_to_assign
            else:
                value_to_assign = dataset.iloc[1, 0]  # Value from column 1 row 2
                row_index = dataframe_names.index(key)
                riassunto.at[row_index, 'Note'] = value_to_assign
                row_counter += 1

        # Iterate over each dataset
        for key, dataset in datasets.items():
            # Calculate the average values for each column starting from the third column
            averages = dataset.iloc[2:, 2:-2].mean()
            
            # Assign the average values to the corresponding row in riassunto
            row_index = dataframe_names.index(key)
            
            for col_index, avg_value in enumerate(averages, start=2):
                riassunto.iloc[row_index, col_index] = avg_value

        # Create new columns for minimum and maximum values
        avg_columns = []
        min_columns = []
        max_columns = []

        for col in riassunto.columns[2:-2]:
            avg_columns.append(f"{col}_std")
            min_columns.append(f"{col}_min")
            max_columns.append(f"{col}_max")
            
        # Append new columns to riassunto dataframe
        riassunto = pd.concat([riassunto, pd.DataFrame(columns=avg_columns + min_columns + max_columns)], axis=1)

        # Iterate over each dataset
        for key, dataset in datasets.items():
            # Calculate the minimum and maximum values for each column starting from the third column
            deviations = dataset.iloc[2:, 2:-2].std()
            min_values = dataset.iloc[2:, 2:-2].min()
            max_values = dataset.iloc[2:, 2:-2].max()
            
            # Assign the minimum values to the corresponding row in riassunto
            row_index = dataframe_names.index(key)
            for col_index, deviation in enumerate(deviations, start=2):
                riassunto.iloc[row_index, len(riassunto.columns)//3 + col_index] = deviation
            
            for col_index, min_value in enumerate(min_values, start=2):
                riassunto.iloc[row_index, len(riassunto.columns)//3 + col_index] = min_value
            
            # Assign the maximum values to the corresponding row in riassunto
            for col_index, max_value in enumerate(max_values, start=2):
                riassunto.iloc[row_index, 2*len(riassunto.columns)//3 + col_index] = max_value

        # Check if 'File name' and 'Last modification' columns exist before assigning
        if 'File name' in df.columns and 'Last modification' in df.columns:
            riassunto['File name'] = exceptions_323[i].iloc[20, 321]
            riassunto['Last modification'] = df['Last modification'].iloc[20]
        else:
            riassunto['File name'] = 'Data not found'
            riassunto['Last modification'] = 'Data not found'
        
        riassunto.reset_index(drop=True, inplace=True)
        updated_riassunto = append_columns_to_riassunto(riassunto, aligned_test_id.transpose().iloc[0], aligned_test_id.transpose()[1:].iloc[0]) 

        # Assign the riassunto dataframe to a variable name sequentially
        globals()[f'riassunto_{i+1}'] = updated_riassunto
        riassunto_dataframes.append(updated_riassunto)
    
    except Exception as e:
        print(f"Error processing dataframe {i}: {e}")

print(f"Processed {len(riassunto_dataframes)} dataframes.")

  sqr = _ensure_numeric((avg - values) ** 2)
  sqr = _ensure_numeric((avg - values) ** 2)
  sqr = _ensure_numeric((avg - values) ** 2)
  sqr = _ensure_numeric((avg - values) ** 2)
  sqr = _ensure_numeric((avg - values) ** 2)
  sqr = _ensure_numeric((avg - values) ** 2)
  sqr = _ensure_numeric((avg - values) ** 2)
  sqr = _ensure_numeric((avg - values) ** 2)
  sqr = _ensure_numeric((avg - values) ** 2)
  sqr = _ensure_numeric((avg - values) ** 2)
  sqr = _ensure_numeric((avg - values) ** 2)
  sqr = _ensure_numeric((avg - values) ** 2)
  sqr = _ensure_numeric((avg - values) ** 2)
  sqr = _ensure_numeric((avg - values) ** 2)
  sqr = _ensure_numeric((avg - values) ** 2)
  sqr = _ensure_numeric((avg - values) ** 2)
  sqr = _ensure_numeric((avg - values) ** 2)
  sqr = _ensure_numeric((avg - values) ** 2)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  sqr = _ensure_numeric((avg - values) ** 2)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  sqr = _ensure_n

Processed 228 dataframes.


In [25]:
# Check which of the dataframes in the list 'riassunto_dataframes' does not have the same number of columns
def find_inconsistent_dataframes(dataframes):
    num_columns = [df.shape[1] for df in dataframes]
    max_columns = max(num_columns)
    inconsistent_dfs = [i for i, n in enumerate(num_columns) if n != max_columns]
    return inconsistent_dfs

# Find the inconsistent dataframes in riassunto_dataframes
inconsistent_dataframes = find_inconsistent_dataframes(riassunto_dataframes)

if inconsistent_dataframes:
    print(f"The following dataframes do not have the same number of columns: {inconsistent_dataframes}")
else:
    print("All dataframes have the same number of columns.")

All dataframes have the same number of columns.


In [31]:
riassunto_3.head()

Unnamed: 0,Note,Time Stamp,RP1,RP6,RP7,RV3V21,RV3V24,RV3V26,RFANS,RVP_A,...,FLUIDO,GAS REFRIGERANTE,REVISIONE,LINEA DEL FLUIDO,NR CIRCUITI,RESPONSABILE LINEA CHILLER\CLIMA,DIREZIONE TECNICA,ALIMENTAZIONE,ESITO COLLAUDO,NOTE DI COLLAUDO
0,R134a precarica 400 gr - capillare,0 days 00:05:45,0.0,0.0,0.0,3.404478,0.0,97.014925,38.80597,0.0,...,ACQUA,R134a_PA.txt,0,BASSA PORTATA,1,Ing. Paolo Russo,Ing. Paolo Russo,400V 3PH 50Hz,,


In [32]:
# Create the global dataframe with modified column names
global_df = pd.DataFrame(columns=[col for col in riassunto_1.columns])

# Add another column with the name "N.TEST"
global_df["N.TEST"] = None

In [33]:
# Use the first dataframe of the list riassunto_dataframes as baseline
baseline_columns = riassunto_dataframes[0].columns

# Assign the value of its column names to all other dataframes in the list
for i in range(1, len(riassunto_dataframes)):
    riassunto_dataframes[i].columns = baseline_columns

# Assign a sequential test_id for each dataframe in riassunto_dataframes
test_ids = [f"t{i+1:03d}" for i in range(len(riassunto_dataframes))]

# Concatenate the rows of all dataframes in a global_df
global_df = pd.DataFrame(columns=baseline_columns)
global_df["N.TEST"] = None

for i, df in enumerate(riassunto_dataframes):
    df["N.TEST"] = test_ids[i]
    global_df = pd.concat([global_df, df], ignore_index=True)



In [34]:
global_df.head(25)

Unnamed: 0,Note,Time Stamp,RP1,RP6,RP7,RV3V21,RV3V24,RV3V26,RFANS,RVP_A,...,GAS REFRIGERANTE,REVISIONE,LINEA DEL FLUIDO,NR CIRCUITI,RESPONSABILE LINEA CHILLER\CLIMA,DIREZIONE TECNICA,ALIMENTAZIONE,ESITO COLLAUDO,NOTE DI COLLAUDO,N.TEST
0,"R134a 1,1 kg VT chiusa 1 giro vent. alternativ...",0 days 00:05:21,0.0,0.0,0.0,14.769231,27.450769,90.615385,50.0,0.0,...,R134a_PA.txt,2,BASSA PORTATA,1,Ing. Paolo Russo,Ing. Paolo Russo,400V 3PH 50Hz,,,t001
1,"R134a 1,2 kg VT chiusa 1 giro",0 days 00:07:56,0.0,0.0,0.0,14.178125,27.915625,71.875,50.0,0.0,...,R134a_PA.txt,2,BASSA PORTATA,1,Ing. Paolo Russo,Ing. Paolo Russo,400V 3PH 50Hz,PASSED,,t002
2,"aggiunto 100 gr tot 1,3 kg (aperto tutta VT pe...",0 days 00:04:40,0.0,0.0,0.0,15.231579,28.294737,42.564912,50.0,0.0,...,R134a_PA.txt,2,BASSA PORTATA,1,Ing. Paolo Russo,Ing. Paolo Russo,400V 3PH 50Hz,PASSED,,t002
3,"chiuso VT 1 giro tot 1,3 kg chiusa 1 giro",0 days 00:05:00,0.0,0.0,0.0,15.703279,28.340984,80.327869,50.0,0.0,...,R134a_PA.txt,2,BASSA PORTATA,1,Ing. Paolo Russo,Ing. Paolo Russo,400V 3PH 50Hz,PASSED,,t002
4,"aggiunto 100 gr tot 1,4 kg VT chiusa 1 giro",0 days 00:07:35,0.0,0.0,0.0,16.163043,28.934783,72.826087,50.0,0.0,...,R134a_PA.txt,2,BASSA PORTATA,1,Ing. Paolo Russo,Ing. Paolo Russo,400V 3PH 50Hz,PASSED,,t002
5,"chiuso VT 1 giro tot 1,4 kg chiusa 2 giri",0 days 00:05:50,0.0,0.0,0.0,17.198592,29.512676,100.0,50.0,0.0,...,R134a_PA.txt,2,BASSA PORTATA,1,Ing. Paolo Russo,Ing. Paolo Russo,400V 3PH 50Hz,PASSED,,t002
6,"aggiunto 100 gr tot 1,5 kg VT chiusa 2 giri",0 days 00:05:25,0.0,0.0,0.0,17.183333,29.912121,100.0,50.0,0.0,...,R134a_PA.txt,2,BASSA PORTATA,1,Ing. Paolo Russo,Ing. Paolo Russo,400V 3PH 50Hz,PASSED,,t002
7,"aggiunto 100 gr tot 1,6 kg VT chiusa 2 giri",0 days 00:04:36,0.0,0.0,0.0,16.761404,28.707018,56.140351,50.0,0.0,...,R134a_PA.txt,2,BASSA PORTATA,1,Ing. Paolo Russo,Ing. Paolo Russo,400V 3PH 50Hz,PASSED,,t002
8,"chiuso VT 1 giro tot 1,6 kg VT chiusa 3 giri",0 days 00:05:15,0.0,0.0,0.0,17.141538,29.469231,100.0,50.0,0.0,...,R134a_PA.txt,2,BASSA PORTATA,1,Ing. Paolo Russo,Ing. Paolo Russo,400V 3PH 50Hz,PASSED,,t002
9,"aggiunto 100 gr tot 1,7 kg VT chiusa 3 giri",0 days 00:04:41,0.0,0.0,0.0,16.252632,29.829825,91.22807,50.0,0.0,...,R134a_PA.txt,2,BASSA PORTATA,1,Ing. Paolo Russo,Ing. Paolo Russo,400V 3PH 50Hz,PASSED,,t002


In [35]:
# Loop through the DataFrame rows and round float values to two decimal places
for index, row in global_df.iterrows():
    for col in global_df.columns:
        if isinstance(row[col], float):
            global_df.at[index, col] = round(row[col], 2)

# Function to convert decimal separator from . to , for display purposes
def convert_decimal_separator(df):
    df_str = df.copy()
    for col in df_str.select_dtypes(include=['float']):
        df_str[col] = df_str[col].map(lambda x: f"{x:.2f}".replace('.', ','))
    return df_str

# Convert the decimal separator for display
global_df_display = convert_decimal_separator(global_df)

# Export the DataFrame to CSV with comma as the decimal separator
global_df_display.to_csv('exceptions_323_comma.csv', index=False, sep=';')

print("DataFrame has been exported to 'global_df_comma.csv' with comma as the decimal separator.")

DataFrame has been exported to 'global_df_comma.csv' with comma as the decimal separator.
