In [4]:
import dask.dataframe as dd

df = dd.read_csv('/home/edocame/Desktop/data_python/03_BID_ASK_DATA/EURCHF_oanda_alltime.csv')



In [6]:
# Display current column names to inspect structure
print('Current columns:', df.columns)

Current columns: Index(['2003.08.04 03:00:03.299', '1.53834', '1.53844'], dtype='object')


In [8]:
def rename_columns(df: dd.DataFrame) -> dd.DataFrame:
    """
    Rename columns of the DataFrame to 'datetime', 'bid', and 'ask'.
    Assumes the DataFrame has exactly three columns in the correct order.
    
    Parameters:
    df (dd.DataFrame): The input DataFrame with original column names.
    
    Returns:
    dd.DataFrame: DataFrame with renamed columns.
    """
    # Edge case: Check if there are exactly three columns
    if len(df.columns) != 3:
        raise ValueError(f"Expected 3 columns, got {len(df.columns)}: {df.columns}")
    
    # Rename columns for clarity
    df = df.rename(columns={df.columns[0]: 'datetime',
                            df.columns[1]: 'bid',
                            df.columns[2]: 'ask'})
    return df

# Apply the renaming function
try:
    df = rename_columns(df)
    print('Renamed columns:', df.columns)
except Exception as e:
    print('Error:', e)

Renamed columns: Index(['datetime', 'bid', 'ask'], dtype='object')


In [10]:
# Display the first few rows to confirm column renaming
df.head()

Unnamed: 0,datetime,bid,ask
0,2003.08.04 03:00:03.491,1.53844,1.53854
1,2003.08.04 03:00:36.109,1.5382,1.53845
2,2003.08.04 03:00:36.301,1.53834,1.53859
3,2003.08.04 03:00:46.305,1.53822,1.53832
4,2003.08.04 03:00:46.497,1.53838,1.53848


In [11]:
def convert_and_export(df: dd.DataFrame, parquet_path: str) -> None:
    """
    Converte la colonna 'datetime' in tipo datetime e esporta il DataFrame in un unico file Parquet.
    
    Parameters:
    df (dd.DataFrame): DataFrame con colonne 'datetime', 'bid', 'ask'.
    parquet_path (str): Percorso del file Parquet di destinazione.
    """
    # Conversione della colonna 'datetime' in tipo datetime
    df['datetime'] = dd.to_datetime(df['datetime'], errors='coerce')
    
    # Edge case: verifica conversione
    if df['datetime'].isnull().any().compute():
        print('Attenzione: alcune date non sono state convertite correttamente.')
    
    # Esporta in un unico file Parquet
    df.to_parquet(parquet_path, write_index=False, engine='pyarrow', compression='snappy', write_metadata_file=True, overwrite=True)
    print(f"DataFrame esportato in {parquet_path}")

# Esegui la conversione e l'esportazione
convert_and_export(df, '/home/edocame/Desktop/data_python/03_BID_ASK_DATA/EURCHF_oanda_alltime.parquet')

DataFrame esportato in /home/edocame/Desktop/data_python/03_BID_ASK_DATA/EURCHF_oanda_alltime.parquet


In [1]:
# Leggi e mostra le prime righe del file Parquet esportato per conferma
import pandas as pd
parquet_preview = pd.read_parquet('/home/edocame/Desktop/data_python/03_BID_ASK_DATA/EURCHF_oanda_alltime.parquet')
parquet_preview.head()

Unnamed: 0,datetime,bid,ask
0,2003-08-04 03:00:03.491,1.53844,1.53854
1,2003-08-04 03:00:36.109,1.5382,1.53845
2,2003-08-04 03:00:36.301,1.53834,1.53859
3,2003-08-04 03:00:46.305,1.53822,1.53832
4,2003-08-04 03:00:46.497,1.53838,1.53848
