In [1]:
import numpy as np
import pandas as pd

import config
from powerutils import data_processing as dp

In [None]:
def read_power():
    '''读取并拼接功率数据.'''
    objs = []
    dirpath = config.dirpath_data / '功率数据'
    for filepath in sorted(dirpath.iterdir()):
        df = dp.read_table(filepath, index_col=0, parse_dates=True)
        objs.append(df)
    
    df = (pd.concat(objs, axis=0)
        .iloc[:, :3]
        .pipe(dp.sort_and_drop_duplicates)
        .pipe(dp.remove_column_units)
        .clip(0, config.cap)
    )
    
    return df

df_power = read_power()
dp.print_time_index(df_power.index)
dp.describe(df_power)

In [None]:
def read_measure():
    '''读取并拼接气象实测数据.'''
    objs = []
    dirpath = config.dirpath_data / '气象实测'
    for filepath in sorted(dirpath.iterdir()):
        df = dp.read_table(filepath, index_col=0, parse_dates=True)
        df = dp.remove_column_units(df)  # 不同文件的列名单位可能存在差别.
        objs.append(df)
    
    df = (pd.concat(objs, axis=0)
        .pipe(dp.sort_and_drop_duplicates)        
        .add_prefix('实测')      
    )

    return df

df_measure = read_measure()
dp.print_time_index(df_measure.index)
dp.describe(df_measure)

In [None]:
def read_nwp():
    '''读取并拼接数值天气预报数据.'''
    objs = []
    dirpath = config.dirpath_data / '数值天气预报'
    for filepath in sorted(dirpath.iterdir()):
        df = dp.read_table(filepath, index_col=0, parse_dates=True)
        objs.append(df)
        
    df = (pd.concat(objs, axis=0)
        .pipe(dp.sort_and_drop_duplicates)
        .pipe(dp.remove_column_units)
        .add_prefix('预报')
    )
    
    return df

df_nwp = read_nwp()
dp.print_time_index(df_nwp.index)
dp.describe(df_nwp)

In [5]:
# 拼接并保存三种数据.
df_all = pd.concat([df_power, df_measure, df_nwp], axis=1)
filepath = config.dirpath_merge / 'train_v1.csv'
df_all.to_csv(str(filepath))