In [1]:
import pandas as pd
import numpy as np

# Load the CSV file
file_path = 'shuffled_data+BC.csv'
data = pd.read_csv(file_path)

# Display the first few rows of the dataframe
print(data.head())

# Function to recalculate and correct statistics for each row
def recalculate_and_correct_statistics(data):
    for index, row in data.iterrows():
        # Extract the raw data values
        Z_values = [row['Z'], row['Z.1'], row['Z.2']]
        Deg_values = [row['Deg'], row['Deg.1'], row['Deg.2']]
        
        # Recalculate values
        Z_mean = np.mean(Z_values)
        Z_median = np.median(Z_values)
        Z_weighted_avg = np.average(Z_values, weights=Z_values)
        Z_geometric_mean = np.exp(np.mean(np.log(Z_values)))
        Z_harmonic_mean = len(Z_values) / np.sum(1.0 / np.array(Z_values))
        
        Deg_mean = np.mean(Deg_values)
        Deg_median = np.median(Deg_values)
        Deg_weighted_avg = np.average(Deg_values, weights=Deg_values)
        
        # Update the DataFrame with the recalculated values if they are incorrect
        if not np.isclose(row['Z_mean'], Z_mean, rtol=1e-05, atol=1e-08):
            data.at[index, 'Z_mean'] = Z_mean
        if not np.isclose(row['Z_median'], Z_median, rtol=1e-05, atol=1e-08):
            data.at[index, 'Z_median'] = Z_median
        if not np.isclose(row['Z_weighted_avg'], Z_weighted_avg, rtol=1e-05, atol=1e-08):
            data.at[index, 'Z_weighted_avg'] = Z_weighted_avg
        if not np.isclose(row['Z_geometric_mean'], Z_geometric_mean, rtol=1e-05, atol=1e-08):
            data.at[index, 'Z_geometric_mean'] = Z_geometric_mean
        if not np.isclose(row['Z_harmonic_mean'], Z_harmonic_mean, rtol=1e-05, atol=1e-08):
            data.at[index, 'Z_harmonic_mean'] = Z_harmonic_mean
        if not np.isclose(row['Deg_mean'], Deg_mean, rtol=1e-05, atol=1e-08):
            data.at[index, 'Deg_mean'] = Deg_mean
        if not np.isclose(row['Deg_median'], Deg_median, rtol=1e-05, atol=1e-08):
            data.at[index, 'Deg_median'] = Deg_median
        if not np.isclose(row['Deg_weighted_avg'], Deg_weighted_avg, rtol=1e-05, atol=1e-08):
            data.at[index, 'Deg_weighted_avg'] = Deg_weighted_avg
    
    return data

# Recalculate and correct statistics
data_corrected = recalculate_and_correct_statistics(data)

# Display the corrected values
print(data_corrected.head())

# Save the corrected data to a new CSV file
data_corrected.to_csv('validated.csv', index=False)
print("Corrected data saved successfully.")

       Frequency         Z     Deg       Z.1   Deg.1       Z.2   Deg.2  \
0      10.000000   56499.0 -79.136   77663.0 -80.485   78808.0 -79.947   
1  253122.636153  137077.0 -15.505  136277.0 -15.574  135589.0 -15.638   
2  253122.636153  140698.0 -15.244  139310.0 -15.338  138191.0 -15.417   
3  253122.636153  150360.0 -14.671  145337.0 -14.957  142687.0 -15.116   
4  245417.877184  137459.0 -15.663  136649.0 -15.735  135963.0 -15.798   

          Z_mean  Z_median  Z_weighted_avg  Z_geometric_mean  Z_harmonic_mean  \
0   70990.000000   77663.0    70990.000000      70189.973204     69340.690586   
1  136314.333333  136277.0   136314.333333     136312.977557    136311.622151   
2  139399.666667  139310.0   139399.666667     139395.897347    139392.130442   
3  146128.000000  145337.0   146128.000000     146093.526271    146059.232314   
4  136690.333333  136649.0   136690.333333     136688.966200    136687.599478   

    Deg_mean  Deg_median  Deg_weighted_avg    label  
0 -79.856000  