In [1]:
import pandas as pd
import numpy as np

In [2]:
dt_index = pd.date_range('2000-01-01', freq = 'T', periods = 4)
ss = pd.Series([0.0, None, 2.0, 3.0], index = dt_index)
df = pd.DataFrame({'s': ss})

df

Unnamed: 0,s
2000-01-01 00:00:00,0.0
2000-01-01 00:01:00,
2000-01-01 00:02:00,2.0
2000-01-01 00:03:00,3.0


# Upsample and then back fill

In [3]:
'''
將原本每分鐘的sample，以30s作upsample
'''

df.asfreq(freq = '30S')

Unnamed: 0,s
2000-01-01 00:00:00,0.0
2000-01-01 00:00:30,
2000-01-01 00:01:00,
2000-01-01 00:01:30,
2000-01-01 00:02:00,2.0
2000-01-01 00:02:30,
2000-01-01 00:03:00,3.0


In [5]:
'''
back fill 1 missing data
'''

df.asfreq(freq = '30S').bfill(limit = 1)

Unnamed: 0,s
2000-01-01 00:00:00,0.0
2000-01-01 00:00:30,
2000-01-01 00:01:00,
2000-01-01 00:01:30,2.0
2000-01-01 00:02:00,2.0
2000-01-01 00:02:30,3.0
2000-01-01 00:03:00,3.0


# Upsample and then foward fill

In [8]:
frame = pd.DataFrame(np.random.randn(2, 4),
					index=pd.date_range('1/1/2000', periods=2,
					freq='W-WED'),  # freq='W-WED'表示按周
					columns=['Colorado', 'Texas', 'New York', 'Ohio'])

frame

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-05,1.49229,-1.140624,-0.0528,-0.661562
2000-01-12,-0.05534,1.091444,-0.42962,-1.365219


In [10]:
df_daily = frame.asfreq('D')

df_daily

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-05,1.49229,-1.140624,-0.0528,-0.661562
2000-01-06,,,,
2000-01-07,,,,
2000-01-08,,,,
2000-01-09,,,,
2000-01-10,,,,
2000-01-11,,,,
2000-01-12,-0.05534,1.091444,-0.42962,-1.365219


In [12]:
'''
使用foward fill
'''

frame.asfreq('D').ffill(limit = 1)

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-05,1.49229,-1.140624,-0.0528,-0.661562
2000-01-06,1.49229,-1.140624,-0.0528,-0.661562
2000-01-07,,,,
2000-01-08,,,,
2000-01-09,,,,
2000-01-10,,,,
2000-01-11,,,,
2000-01-12,-0.05534,1.091444,-0.42962,-1.365219


# Upsample by reindex

In [35]:
date = '2021-07-21'
dt_index = pd.date_range(f'{date} 15:30', freq = '2T', periods = 4)
ss = pd.Series([0.0, 1.0, 2.0, 3.0], index = dt_index)
df = pd.DataFrame({'value': ss})

df

Unnamed: 0,value
2021-07-21 15:30:00,0.0
2021-07-21 15:32:00,1.0
2021-07-21 15:34:00,2.0
2021-07-21 15:36:00,3.0


In [46]:
dt_index = pd.date_range(start = f'{date} 15:00', end = f'{date} 16:00', freq='2T')

df_reindex = df.reindex(dt_index)

df_reindex

Unnamed: 0,value
2021-07-21 15:00:00,
2021-07-21 15:02:00,
2021-07-21 15:04:00,
2021-07-21 15:06:00,
2021-07-21 15:08:00,
2021-07-21 15:10:00,
2021-07-21 15:12:00,
2021-07-21 15:14:00,
2021-07-21 15:16:00,
2021-07-21 15:18:00,


In [52]:
df_reindex_copy = df_reindex.copy()

df_reindex_copy['value'].fillna(method = 'bfill', inplace = True)
df_reindex_copy['value'].fillna(method = 'ffill', inplace = True)

df_reindex_copy

Unnamed: 0,value
2021-07-21 15:00:00,0.0
2021-07-21 15:02:00,0.0
2021-07-21 15:04:00,0.0
2021-07-21 15:06:00,0.0
2021-07-21 15:08:00,0.0
2021-07-21 15:10:00,0.0
2021-07-21 15:12:00,0.0
2021-07-21 15:14:00,0.0
2021-07-21 15:16:00,0.0
2021-07-21 15:18:00,0.0
