In [1]:
from icecap import Project
import icecap as ice
import polars as pl 

In [2]:
proj = Project() 

raw_input = proj.scan_csv("world_bank_gdp_data.csv")

proj['raw'] = raw_input 

input = proj['raw'].select(
    pl.col("Country Code").alias("Country"),
    pl.col("^[0-9]+.*$").cast(pl.Float32, strict=False).map_alias(lambda s: s[:4]))

input = input.melt(id_vars=["Country"], variable_name="Date", value_name="GDP") #, "pivot tall"

input = input.select(
    pl.col("Country"),
    pl.col("Date").cast(pl.Int32),
    pl.col("*").exclude("Country", "Date")
) #, "cast dates to number"

input = input.drop_nulls().sort(by=["Country", "Date"]) #, "drop nulls and sort"

proj['input'] = input


In [3]:
widget = proj['input'].inspect()
widget

HBox(children=(VBox(children=(IntSlider(value=5, max=5), Output()), layout=Layout(min_width='320px')), Output(…

In [10]:
calc = proj['input'].copy()

# # No, not going to support this. If you do this, just write the 'over' expr directly.
# with calc.over("Country"):
#     calc = calc.with_columns(diff = (pl.col("GDP") - pl.col("GDP").shift(1)).over("Country"))

with calc.over("Country"):
    calc['diff']     = pl.col("GDP") - pl.col("GDP").shift(1)
    calc['diff_avg'] = pl.col('diff').rolling_mean(5)  
    calc['diff_std'] = pl.col('diff').rolling_std(5)

proj['calc'] = calc 

proj['calc'].inspect(until='input')



HBox(children=(VBox(children=(IntSlider(value=9, max=9), Output()), layout=Layout(min_width='320px')), Output(…

In [5]:
disp = proj['calc']
disp = disp.filter(pl.col("Date").is_between(2015,2020))
disp = disp.melt(["Country","Date"],[], "Field")
disp = disp.collect().pivot("value",["Country","Field"],"Date").sort(by=["Country","Field"])
disp.inspect(until=lambda df:df.parent_attr=="copy")

HBox(children=(VBox(children=(IntSlider(value=7, max=7), Output()), layout=Layout(min_width='320px')), Output(…

In [6]:
# display(proj['calc'].groupby("Country", maintain_order=True).last().head(5).collect())
# display(proj['calc'].filter(pl.col("Date") == 2010).head(5).collect())
display(proj['calc'].filter((pl.col("Date") >= 2010) & (pl.col("Date") < 2015)).collect().pivot(["GDP","diff","diff_avg"], "Country", "Date"))

Country,GDP_Date_2010,GDP_Date_2011,GDP_Date_2012,GDP_Date_2013,GDP_Date_2014,diff_Date_2010,diff_Date_2011,diff_Date_2012,diff_Date_2013,diff_Date_2014,diff_avg_Date_2010,diff_avg_Date_2011,diff_avg_Date_2012,diff_avg_Date_2013,diff_avg_Date_2014
str,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32
"""ABW""",2.4536e9,2.6380e9,2.6151e9,2.7279e9,2.7911e9,-1.0000e8,1.84357632e8,-2.2905088e7,1.12849152e8,6.3128576e7,1.8770944e7,3.3631284e7,-1.2513946e7,-2.3016756e7,4.7486056e7
"""AFE""",8.6352e11,9.6782e11,9.7535e11,9.8599e11,1.0065e12,1.4162e11,1.0431e11,7.5302e9,1.0632e10,2.0539e10,6.9916e10,7.8048e10,6.2497e10,5.5090e10,5.6925e10
"""AFG""",1.5634e10,1.8190e10,2.0204e10,2.0564e10,2.0551e10,3.4790e9,2.5566e9,2.0132e9,3.60912896e8,-1.3903872e7,1.8815e9,2.2438e9,2.0976e9,2.0629e9,1.6791e9
"""AFW""",5.9852e11,6.8202e11,7.3759e11,8.3395e11,8.9432e11,9.0159e10,8.3494e10,5.5574e10,9.6359e10,6.0374e10,5.7526e10,5.7019e10,5.4421e10,5.3231e10,7.7192e10
"""AGO""",8.1700e10,1.0944e11,1.2500e11,1.3340e11,1.3724e11,1.1392e10,2.7737e10,1.5562e10,8.4034e9,3.8429e9,8.9457e9,1.1411e10,1.1946e10,8.9726e9,1.3387e10
"""ALB""",1.1927e10,1.2891e10,1.2320e10,1.2776e10,1.3228e10,-1.1728e8,9.63833856e8,-5.7093e8,4.56390656e8,4.51922944e8,7.74969856e8,7.98937088e8,3.28501856e8,-2.1025588e7,2.3678832e8
"""AND""",3.4499e9,3.6291e9,3.1887e9,3.1935e9,3.2717e9,-2.2426e8,1.79207936e8,-4.4048e8,4.86016e6,7.8172672e7,5.8019712e7,3.4573696e7,-1.5275e8,-1.7845e8,-8.050056e7
"""ARB""",2.3346e12,2.5560e12,2.8017e12,2.8597e12,2.9057e12,3.4758e11,2.2134e11,2.4572e11,5.7972e10,4.6064e10,2.0605e11,2.0209e11,2.0075e11,1.1933e11,1.8374e11
"""ARE""",2.8979e11,3.5067e11,3.8461e11,4.0022e11,4.1411e11,3.6240e10,6.0879e10,3.3944e10,1.5608e10,1.3887e10,2.1834e10,2.5710e10,2.5339e10,1.6949e10,3.2112e10
"""ARG""",4.2363e11,5.3016e11,5.4598e11,5.5203e11,5.2632e11,9.0651e10,1.0653e11,1.5824e10,6.0427e9,-2.5705e10,4.4978e10,5.9520e10,5.1690e10,3.8093e10,3.8669e10


In [7]:
# display(proj.named)
# display(ice.traverse(calc))

In [8]:
# with calc.group("Country") as grp:
#     grp['diff']     = pl.col("GDP") - pl.col("GDP").shift(1)
#     grp['diff_avg'] = pl.col('diff').rolling_mean(5)

# def adjust(df):
#     if "Country" in df.columns:
#         return df.filter(pl.col("Country")=="USA").head(10).collect()
#     else:
#         return df.head(10).collect()

# inspect_df(calc, adjust)

TODOs

1. 'until' on traverse
2. track graphs (joins)
3. Diagnostics for Period Series like tables. 
4. Table formats