In [5]:
import sys
import os
# Local imports
src_dir = os.path.abspath('../')
os.chdir(src_dir)
sys.path.append(src_dir)
from src.config import *

import pandas as pd

In [6]:
class DataLoader:
    def __init__(self, data_dir=DATA_DIR):
        self.data_dir = data_dir
        self.data = None
        self.loaded_data_dct = {}
    
    def load_data(self, data_file, *args, **kwargs):
        data_path = os.path.join(self.data_dir, data_file)
        self.data = pd.read_csv(data_path, *args, **kwargs)
        self.loaded_data_dct[data_file] = self.data
        return self.data

In [120]:
class DataProcessor:
    def __init__(self, data=None):
        self.data = data
    
    def set_dt_index(self, dt_col, data=None, inplace=False):
        data = self.data if data is None else data
        if data is None:
            raise ValueError('No data provided!')
        if not inplace:
            data = data.copy()
        data[dt_col] = pd.to_datetime(data[dt_col])
        data = data.set_index(dt_col)
        data = data.sort_index()
        if inplace:
            self.data = data
        return data
    
    def rename_dt_index(self, new_name, data=None, inplace=False):
        data = self.data if data is None else data
        if data is None:
            raise ValueError('No data provided!')
        if not inplace:
            data = data.copy()
        data.index.names = [new_name]
        if inplace:
            self.data = data
        return data

    def rename_df_columns(self, col_name_map, data=None, inplace=False):
        data = self.data if data is None else data
        if data is None:
            raise ValueError('No data provided!')
        if not inplace:
            data = data.copy()
        data = data.rename(columns=col_name_map)
        if inplace:
            self.data = data
        return data

    def process_data(self):
        pass

In [122]:
dl = DataLoader()
df = dl.load_data('AEP_hourly.csv')
dp = DataProcessor()
dp = DataProcessor(df)
dp.set_dt_index('Datetime', inplace=True)
dp.rename_dt_index('dt', inplace=True)
dp.rename_df_columns({'AEP_MW': 'MW'}, inplace=True)
dp.data

Unnamed: 0_level_0,MW
dt,Unnamed: 1_level_1
2004-10-01 01:00:00,12379.0
2004-10-01 02:00:00,11935.0
2004-10-01 03:00:00,11692.0
2004-10-01 04:00:00,11597.0
2004-10-01 05:00:00,11681.0
...,...
2018-08-02 20:00:00,17673.0
2018-08-02 21:00:00,17303.0
2018-08-02 22:00:00,17001.0
2018-08-02 23:00:00,15964.0


In [113]:
dp.set_dt_index('Datetime', df, inplace=True)

Unnamed: 0_level_0,AEP_MW
Datetime,Unnamed: 1_level_1
2004-10-01 01:00:00,12379.0
2004-10-01 02:00:00,11935.0
2004-10-01 03:00:00,11692.0
2004-10-01 04:00:00,11597.0
2004-10-01 05:00:00,11681.0
...,...
2018-08-02 20:00:00,17673.0
2018-08-02 21:00:00,17303.0
2018-08-02 22:00:00,17001.0
2018-08-02 23:00:00,15964.0


In [43]:
df.columns = ['asd']

In [46]:
df

Unnamed: 0_level_0,AEP_MW
Datetime,Unnamed: 1_level_1
2004-10-01 01:00:00,12379.0
2004-10-01 02:00:00,11935.0
2004-10-01 03:00:00,11692.0
2004-10-01 04:00:00,11597.0
2004-10-01 05:00:00,11681.0
...,...
2018-08-02 20:00:00,17673.0
2018-08-02 21:00:00,17303.0
2018-08-02 22:00:00,17001.0
2018-08-02 23:00:00,15964.0
