In [1]:
import pandas as pd
import os
from sklearn.impute import KNNImputer

In [2]:
def impute_csv_files(folder_path, n_neighbors=10, weights='uniform'):
    # Get a list of all files in the specified folder
    file_list = [f for f in os.listdir(folder_path) if f.endswith('.csv')]

    for file_name in file_list:
        file_path = os.path.join(folder_path, file_name)

        # Read the CSV file into a pandas DataFrame
        df = pd.read_csv(file_path, sep=";")

        # Check if the DataFrame has missing values
        if df.isnull().values.any():
            # Create a KNNImputer and impute missing values
            imputer_knn = KNNImputer(n_neighbors=n_neighbors, weights=weights)
            df_imputed = pd.DataFrame(imputer_knn.fit_transform(df), columns=df.columns)

            # Save the imputed DataFrame to a new CSV file
            new_file_name = file_name.replace('.csv', '_imputed.csv')
            new_file_path = os.path.join(folder_path, new_file_name)
            df_imputed.to_csv(new_file_path, index=False)

            print(f"Imputed file saved: {new_file_path}")
        else:
            print(f"No missing values found in {file_name}. Skipping.")

In [13]:
# Top tracks

# Specify the folder path where your CSV files are located
folder_path = r'C:\Users\raulo\Desktop\ze_vids\phase_3\Tracks_Top\Calibrated'

# Specify the number of neighbors and weights for KNNImputer
n_neighbors = 10
weights = 'uniform'

# Call the function to impute CSV files in the specified folder
impute_csv_files(folder_path, n_neighbors, weights)

Imputed file saved: C:\Users\raulo\Desktop\ze_vids\phase_3\Tracks_Top\Calibrated\LD03_all_top_imputed.csv
Imputed file saved: C:\Users\raulo\Desktop\ze_vids\phase_3\Tracks_Top\Calibrated\LD04_all_top_imputed.csv
Imputed file saved: C:\Users\raulo\Desktop\ze_vids\phase_3\Tracks_Top\Calibrated\LD13_all_top_imputed.csv
Imputed file saved: C:\Users\raulo\Desktop\ze_vids\phase_3\Tracks_Top\Calibrated\LD14_all_top_imputed.csv
Imputed file saved: C:\Users\raulo\Desktop\ze_vids\phase_3\Tracks_Top\Calibrated\LD23_all_top_imputed.csv
Imputed file saved: C:\Users\raulo\Desktop\ze_vids\phase_3\Tracks_Top\Calibrated\LD24_all_top_imputed.csv


In [3]:
def impute_csv_files(folder_path, n_neighbors=10, weights='uniform'):
    # Get a list of all files in the specified folder
    file_list = [f for f in os.listdir(folder_path) if f.endswith('.csv')]

    for file_name in file_list:
        file_path = os.path.join(folder_path, file_name)

        # Read the CSV file into a pandas DataFrame
        df = pd.read_csv(file_path, sep=";")

        # Identify numerical columns for imputation
        numerical_columns = df.select_dtypes(include=['float64', 'int64']).columns

        # Check if the DataFrame has missing values
        if df[numerical_columns].isnull().values.any():
            # Create a KNNImputer and impute missing values for numerical columns
            imputer_knn = KNNImputer(n_neighbors=n_neighbors, weights=weights)
            df_imputed_numerical = pd.DataFrame(imputer_knn.fit_transform(df[numerical_columns]), columns=numerical_columns)

            # Keep non-numerical columns from the original DataFrame
            df_non_numerical = df.drop(columns=numerical_columns)

            # Combine non-numerical columns with imputed numerical columns
            df_imputed = pd.concat([df_non_numerical, df_imputed_numerical], axis=1)

            # Save the imputed DataFrame to a new CSV file
            new_file_name = file_name.replace('.csv', '_imputed.csv')
            new_file_path = os.path.join(folder_path, new_file_name)
            df_imputed.to_csv(new_file_path, index=False)

            print(f"Imputed file saved: {new_file_path}")
        else:
            print(f"No missing values found in {file_name}. Skipping.")

In [4]:
# Front tracks

# Specify the folder path where your CSV files are located
folder_path = r'C:\Users\raulo\Desktop\ze_vids\phase_3\Tracks_Front\Calibrated'

# Specify the number of neighbors and weights for KNNImputer
n_neighbors = 10
weights = 'uniform'

# Call the function to impute CSV files in the specified folder
impute_csv_files(folder_path, n_neighbors, weights)

Imputed file saved: C:\Users\raulo\Desktop\ze_vids\phase_3\Tracks_Front\Calibrated\LD03_label_and_track_front_imputed.csv
Imputed file saved: C:\Users\raulo\Desktop\ze_vids\phase_3\Tracks_Front\Calibrated\LD04_label_and_track_front_imputed.csv
Imputed file saved: C:\Users\raulo\Desktop\ze_vids\phase_3\Tracks_Front\Calibrated\LD13_label_and_track_front_imputed.csv
Imputed file saved: C:\Users\raulo\Desktop\ze_vids\phase_3\Tracks_Front\Calibrated\LD14_label_and_track_front_imputed.csv
Imputed file saved: C:\Users\raulo\Desktop\ze_vids\phase_3\Tracks_Front\Calibrated\LD23_label_and_track_front_imputed.csv
Imputed file saved: C:\Users\raulo\Desktop\ze_vids\phase_3\Tracks_Front\Calibrated\LD24_label_and_track_front_imputed.csv
