In [11]:
import numpy as np
import pandas as pd
from openbb import obb
obb.user.preferences.output_type = "dataframe"

In [13]:
df = obb.equity.price.historical(
    "AAPL",
    start_date="2020-07-01",
    end_date="2023-07-06",
    provider="yfinance",
)

In [14]:
# Reindexing the DataFrame to include all the calendar days between the start and end date of the times series
calendar_dates = pd.date_range(
    start=df.index.min(),
    end=df.index.max(),
    freq="D"
)

In [15]:
calendar_prices = df.reindex(calendar_dates)

In [25]:
# Filling the missing data in different ways
# First we use bfill to replace name values with the next valid oberservation
calendar_prices.bfill()

Unnamed: 0,open,high,low,close,volume,split_ratio,dividend
2020-07-01,91.279999,91.839996,90.977501,91.027496,110737200.0,0.0,0.0
2020-07-02,91.962502,92.617500,90.910004,91.027496,114041600.0,0.0,0.0
2020-07-03,92.500000,93.945000,92.467499,93.462502,118655600.0,0.0,0.0
2020-07-04,92.500000,93.945000,92.467499,93.462502,118655600.0,0.0,0.0
2020-07-05,92.500000,93.945000,92.467499,93.462502,118655600.0,0.0,0.0
...,...,...,...,...,...,...,...
2023-07-02,193.779999,193.880005,191.759995,192.460007,31458200.0,0.0,0.0
2023-07-03,193.779999,193.880005,191.759995,192.460007,31458200.0,0.0,0.0
2023-07-04,191.570007,192.979996,190.619995,191.330002,46920300.0,0.0,0.0
2023-07-05,191.570007,192.979996,190.619995,191.330002,46920300.0,0.0,0.0


In [27]:
# Here we use ffill to propagate the last value forward
calendar_prices.ffill()

Unnamed: 0,open,high,low,close,volume,split_ratio,dividend
2020-07-01,91.279999,91.839996,90.977501,91.027496,110737200.0,0.0,0.0
2020-07-02,91.962502,92.617500,90.910004,91.027496,114041600.0,0.0,0.0
2020-07-03,91.962502,92.617500,90.910004,91.027496,114041600.0,0.0,0.0
2020-07-04,91.962502,92.617500,90.910004,91.027496,114041600.0,0.0,0.0
2020-07-05,91.962502,92.617500,90.910004,91.027496,114041600.0,0.0,0.0
...,...,...,...,...,...,...,...
2023-07-02,191.630005,194.479996,191.259995,193.970001,85069600.0,0.0,0.0
2023-07-03,193.779999,193.880005,191.759995,192.460007,31458200.0,0.0,0.0
2023-07-04,193.779999,193.880005,191.759995,192.460007,31458200.0,0.0,0.0
2023-07-05,191.570007,192.979996,190.619995,191.330002,46920300.0,0.0,0.0


In [29]:
# We use linear interpolation to fill in the missing values
calendar_prices.interpolate(method="linear")

Unnamed: 0,open,high,low,close,volume,split_ratio,dividend
2020-07-01,91.279999,91.839996,90.977501,91.027496,1.107372e+08,0.0,0.0
2020-07-02,91.962502,92.617500,90.910004,91.027496,1.140416e+08,0.0,0.0
2020-07-03,92.096876,92.949375,91.299377,91.636248,1.151951e+08,0.0,0.0
2020-07-04,92.231251,93.281250,91.688751,92.244999,1.163486e+08,0.0,0.0
2020-07-05,92.365625,93.613125,92.078125,92.853750,1.175021e+08,0.0,0.0
...,...,...,...,...,...,...,...
2023-07-02,193.063334,194.080002,191.593328,192.963338,4.932867e+07,0.0,0.0
2023-07-03,193.779999,193.880005,191.759995,192.460007,3.145820e+07,0.0,0.0
2023-07-04,192.675003,193.430000,191.189995,191.895004,3.918925e+07,0.0,0.0
2023-07-05,191.570007,192.979996,190.619995,191.330002,4.692030e+07,0.0,0.0


In [31]:
# Here we use cubic spline interpolation to fill in the missing values
calendar_prices.interpolate(method="cubicspline")

Unnamed: 0,open,high,low,close,volume,split_ratio,dividend
2020-07-01,91.279999,91.839996,90.977501,91.027496,1.107372e+08,0.000000e+00,0.000000e+00
2020-07-02,91.962502,92.617500,90.910004,91.027496,1.140416e+08,0.000000e+00,0.000000e+00
2020-07-03,91.905178,93.018261,91.109728,91.804272,1.181507e+08,3.592598e-24,1.363686e-15
2020-07-04,91.628604,93.240770,91.492200,92.824548,1.213994e+08,7.185197e-24,2.727373e-15
2020-07-05,91.653353,93.483519,91.972948,93.555050,1.221227e+08,7.185197e-24,2.727373e-15
...,...,...,...,...,...,...,...
2023-07-02,194.071253,195.513330,192.326285,194.488423,5.806444e+07,0.000000e+00,-2.115795e-21
2023-07-03,193.779999,193.880005,191.759995,192.460007,3.145820e+07,0.000000e+00,0.000000e+00
2023-07-04,192.906895,193.224617,191.295466,191.403875,3.331034e+07,0.000000e+00,4.882604e-22
2023-07-05,191.570007,192.979996,190.619995,191.330002,4.692030e+07,0.000000e+00,0.000000e+00
