### CRU: Transform data to time series

In [1]:
import os
import pandas as pd

#### Set input and output folders

In [2]:
data_in  = '../data/CRU_0'
data_out = '../data/CRU_1_time_series'

if not os.path.exists(data_out):
    os.mkdir(data_out)
    
periods = ['annually', 'monthly', 'quarterly']

for period in periods:
    if not os.path.exists(f'{data_out}/{period}'):
        os.mkdir(f'{data_out}/{period}')

#### Load Data-Vectorization Snippet

In [3]:
# %load '../snippets/vectorize_cru_data.py'
def vectorize_cru_data(data, periods):
    time = list()
    values = list()
    for year in data.index:
        for period in periods:
            current_period = str(year)
            current_period += '-{}'.format(period) if period != 'ANN' else ''
            time.append(current_period)
            values.append(data[period][year])
    series = pd.Series(data=values, index=time)
    return series

#### Read input data, vectorize data, and output data to csv format

In [4]:
for attr in sorted(os.listdir(data_in)):
    # define data frames
    df_annually  = pd.DataFrame()
    df_monthly   = pd.DataFrame()
    df_quarterly = pd.DataFrame()
    
    for country_file in os.listdir(f'{data_in}/{attr}'):
        # derive country name
        country = country_file.split('.')[5]

        # read input data
        data = pd.read_csv(f'{data_in}/{attr}/{country_file}',
                           delimiter='\s+', skiprows=3)
        data = data.set_index('YEAR')
        
        # define months, quartals, annual
        anno     = [data.columns[16]]
        months   = data.columns[:12]
        quartals = data.columns[12:16]
        
        # vectorize data
        annual_vec   = vectorize_cru_data(data, anno)
        month_vec    = vectorize_cru_data(data, months)
        quartals_vec = vectorize_cru_data(data, quartals)
        
        # append vector to data frame
        df_annually[country]  = annual_vec
        df_monthly[country]   = month_vec
        df_quarterly[country] = quartals_vec

    # sort columns (countries)
    df_annually  = df_annually.reindex(sorted(df_annually.columns), axis=1)
    df_monthly   = df_monthly.reindex(sorted(df_monthly.columns), axis=1)
    df_quarterly = df_quarterly.reindex(sorted(df_quarterly.columns), axis=1)

    # rename index column
    df_annually.index.name  = 'Time'
    df_monthly.index.name   = 'Time'
    df_quarterly.index.name = 'Time'

    # output data to csv format
    df_annually.to_csv(f'{data_out}/annually/{attr}.csv')
    df_monthly.to_csv(f'{data_out}/monthly/{attr}.csv')
    df_quarterly.to_csv(f'{data_out}/quarterly/{attr}.csv')