In [3]:
import numpy as np
import pandas as pd

# Dataset specific download routines.
def download_volatility():
    """Downloads volatility data from OMI website."""

    csv_path = 'ohlc2.csv'
    df = pd.read_csv(csv_path, index_col=0, delimiter=';')  # no explicit index

   
    df['categorical_id'] = df['Symbol'].copy()

    # Processes log volatility
    vol = df['volume'].copy()
    vol.loc[vol == 0.] = np.nan
    df['log_vol'] = np.log(vol)


    df['Region'] = "GER"

    # Performs final processing
    output_df_list = []
    for grp in df.groupby('Symbol'):
        sliced = grp[1].copy()
        sliced.sort_values('hours_from_start', inplace=True)
        # Impute log volatility values
        sliced['log_vol'].fillna(method='ffill', inplace=True)
        sliced.dropna()
        output_df_list.append(sliced)

    df = pd.concat(output_df_list, axis=0)

    output_file = 'formatted_ohlc.csv'
    print('Completed formatting, saving to {}'.format(output_file))
    df.to_csv(output_file)

    print('Done.')

download_volatility()

Completed formatting, saving to formatted_ohlc.csv
Done.
