In [None]:
import pandas as pd
import os
import numpy as np
from google.colab import drive
import shutil  # To move files

In [None]:
def clean_bicing_csv(file_path):
    """
    Cleans a Bicing CSV file.

    Args:
        file_path (str): The path to the CSV file.

    Returns:
        pandas.DataFrame: The cleaned DataFrame, or None if there was an error.
    """
    try:
        # Load data, skipping bad lines and handling quotes correctly
        df = pd.read_csv(file_path, on_bad_lines='skip')

        # Obtener la lista de nombres de las columnas
        columnas = df.columns.tolist()

        # Limpiar todas las comillas del inicio y el final de cada nombre
        columnas_limpias = [col.lstrip('"\'').rstrip('"\'') for col in columnas]

        # Asignar los nuevos nombres al DataFrame
        df.columns = columnas_limpias

        # --- Data Cleaning ---
        # Drop the traffic column because it has too many null values
        if 'traffic' in df.columns: #check if the column is available
            df_cleaned = df.drop(columns=['traffic'])
        else:
            df_cleaned = df

        if 'V1' in df.columns: #check if the column is available
            df_cleaned = df.drop(columns=['V1'])
        else:
            df_cleaned = df

        if 'last_reported' in df.columns: #check if the column is available
            df_cleaned = df.drop(columns=['last_reported'])
        else:
            df_cleaned = df

        # Drop rows with non-finite values in '"ttl"'
        df_cleaned = df_cleaned[df_cleaned['ttl'].apply(np.isfinite)]

        # Now convert to int
        df_cleaned['ttl'] = df_cleaned['ttl'].astype(int)

        # Remove any row with negative values in "num_bikes_available"
        df_cleaned = df_cleaned[df_cleaned['num_bikes_available'] >= 0]

        # Remove white spaces in 'status'
        df_cleaned['status'] = df_cleaned['status'].str.strip()

        df['status'] = df['status'].str.lstrip('"\'').str.rstrip('"\'')

        return df_cleaned

    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return None

def process_and_replace_csv_files(folder_path):
    """
    Cleans and replaces all CSV files in a folder.

    Args:
        folder_path (str): The path to the folder containing CSV files in Google Drive.
    """

    for filename in os.listdir(folder_path):
        if filename.endswith(".csv"):
            file_path = os.path.join(folder_path, filename)
            cleaned_df = clean_bicing_csv(file_path)

            if cleaned_df is not None:
                # Save the cleaned DataFrame back to the same file, overwriting the original
                cleaned_df.to_csv(file_path, index=False)  # index=False prevents writing the DataFrame index to the CSV
                print(f"Successfully cleaned and replaced {filename}")
            else:
                print(f"Skipped {filename} due to errors during cleaning.")


# --- Main Script ---

# Mount Google Drive
drive.mount('/content/drive')

# Specify the folder ID:
folder_id = "1Xn9PMarYchGvNeIQcchRMrg4Af3hmgzK"

# Construct the Google Drive folder path using the ID
folder_path = f'/content/drive/MyDrive/data'
# If the folder is directly in your MyDrive, you can create it (if it doesn't exist):

#Process and replace CSV files:
process_and_replace_csv_files(folder_path)

print("Script completed.")

Mounted at /content/drive
Successfully cleaned and replaced 2023_08_Agost_BicingNou_ESTACIONS.csv
Successfully cleaned and replaced 2019_03_Març_BICING2_STAT.csv
Successfully cleaned and replaced 2019_04_Abril_BICING2_STAT.csv
Successfully cleaned and replaced 2021_08_Agost_BicingNou_ESTACIONS.csv
Successfully cleaned and replaced 2019_05_Maig_BICING2_STAT.csv
Successfully cleaned and replaced 2019_06_Juny_NouBicing_ESTACIONS.csv
Successfully cleaned and replaced 2019_07_Juliol_BicingNou_ESTACIONS.csv
Successfully cleaned and replaced 2019_08_Agost_BicingNou_ESTACIONS.csv
Successfully cleaned and replaced 2019_09_Setembre_BicingNou_ESTACIONS.csv
Successfully cleaned and replaced 2019_10_Octubre_BicingNou_ESTACIONS.csv
Successfully cleaned and replaced 2019_11_Novembre_BicingNou_ESTACIONS.csv
Successfully cleaned and replaced 2019_12_Desembre_BicingNou_ESTACIONS.csv
Successfully cleaned and replaced 2020_01_Gener_BicingNou_ESTACIONS.csv
Successfully cleaned and replaced 2020_02_Febrer_