Peform the following actions on the input data:

- Normalize $D_{\text{local}}$ values
- Take the arithmetic average of the x and y directions for the normalized $D_{\text{local}}$
- Cap the values of the normalized average
- Shift PMF values
- Substitute infinite PMF values with finite value
- **Scale the PMF values**
- Create periodic duplicates of the resulting data
- Output locations for data profiles

Continuing from `p20201204a_processing.ipynb`.


## Setup

In [1]:
import os
import os.path as osp

In [2]:
import operator
import itertools

In [3]:
import numpy as np

In [4]:
import pandas as pd

In [5]:
import simproc

In [6]:
import simproc.requesthandler.yaml_manager as yaml_manager
import simproc.requesthandler.locators as locators

## Constants

In [7]:
taglist=['12A','16A','20A','27pct','50pct','na_27pct']

In [8]:
dlocal_comp_list=["xx","yy","zz","avg"]

In [9]:
meshmeta_infname_tmpl="box{mtag}.yaml"

In [10]:
dlocal_infname_tmpl="Dlocal_collected_{tag}.csv"

In [11]:
pmf_infname_tmpl="pmf2d_{tag}.csv"

In [12]:
dlocal_outfname_tmpl="D{comp}_{tag}.xz"

In [13]:
dlocal_outfname_tmpl_nodupe="D{comp}_{tag}_nd.csv"

In [14]:
pmf_outfname_tmpl="pmf_{tag}.xz"

In [15]:
pmf_outfname_tmpl_nodupe="pmf_{tag}_nd.csv"

In [16]:
bbox_outfname_tmpl="bbox_{tag}.csv"

In [17]:
profile_data_outfname="profile_data.yaml"

In [18]:
minmax=["min","max"]

In [19]:
minmaxspan=minmax+["span"]

In [20]:
bbox_idx=["mesh","limits","Dxx","Dyy","Dzz","Davg","pmf"]

In [21]:
profile_allpoints="ABCD"

## Data folder

In [22]:
datadir=osp.abspath("../silicate")
assert osp.isdir(datadir)

In [23]:
o=locators.SetDataFolder(datafolder=datadir)

## Locators

In [24]:
#locators.folder_structure.keys()

In [25]:
locators.folder_structure.update(Unprocessed=['inputs','unprocessed'])

In [26]:
locators.folder_structure.update(Processed=['inputs','processed'])

In [27]:
locators.folder_structure.update(Manual=['inputs','manual'])

In [28]:
locators.folder_structure.update(meshmetafile=["mesh","output",0,"metadata"])

In [29]:
Unprocessed=lambda fname: self.renderstr(locators.Unprocessed(fname))

In [30]:
Processed=lambda fname: self.renderstr(locators.Processed(fname))

In [31]:
Manual=lambda fname: self.renderstr(locators.Manual(fname))

In [32]:
MeshMeta=lambda fname: self.renderstr(locators.meshmetafile(fname))

## Dummy request for loading data

In [33]:
self=simproc.requesthandler.request.Request(name="ypress.processing")

## Data file names and paths

Manual data

In [34]:
manualdata_fpath=Manual('other_input_values.yaml')
assert osp.isfile(manualdata_fpath)

Mesh metadata

Paths require manual data; see below.

Data limits

In [35]:
datalimits_fpath=Unprocessed('data_limits.yaml')
assert osp.isfile(datalimits_fpath)

$D_{\text{local}}$ input

In [36]:
dloc_inpaths={tag:Unprocessed(dlocal_infname_tmpl.format(tag=tag)) for tag in taglist}

In [37]:
dloc_existence_list=[osp.isfile(fpath) for fpath in dloc_inpaths.values()]
assert all(dloc_existence_list)

PMF input

In [38]:
pmf_inpaths={tag:Unprocessed(pmf_infname_tmpl.format(tag=tag)) for tag in taglist}

In [39]:
pmf_existence_list=[osp.isfile(fpath) for fpath in pmf_inpaths.values()]
assert all(pmf_existence_list)

$D_{\text{local}}$ output

In [40]:
dlocal_outpaths={}
for tag in taglist:
  dlocal_outpaths[tag]={comp:Processed(dlocal_outfname_tmpl.format(tag=tag,comp=comp)) \
                        for comp in dlocal_comp_list}

In [41]:
dlocal_nodupe_outpaths={}
for tag in taglist:
  dlocal_nodupe_outpaths[tag]={comp:Processed(dlocal_outfname_tmpl_nodupe.format(tag=tag,comp=comp)) \
                               for comp in dlocal_comp_list}

PMF output

In [42]:
pmf_outpaths={tag:Processed(pmf_outfname_tmpl.format(tag=tag)) for tag in taglist}

In [43]:
pmf_nodupe_outpaths={tag:Processed(pmf_outfname_tmpl_nodupe.format(tag=tag)) for tag in taglist}

Bounding Box data

In [44]:
bbox_outpaths={tag:Processed(bbox_outfname_tmpl.format(tag=tag)) for tag in taglist}

Profile data

In [45]:
profile_data_outpath=Processed(profile_data_outfname)

## Read the input data

Manual

In [46]:
other_input_values=yaml_manager.readfile(manualdata_fpath)

In [47]:
D_bulk_value=other_input_values['bulk_D']

In [48]:
limiting_D=other_input_values['limiting_D']

In [49]:
limiting_PMF=other_input_values['limiting_pmf']

In [50]:
pmf_shift_values=other_input_values['pmf_shift_values']

In [51]:
pmf_scaling_values=other_input_values['pmf_scaling_values']

In [52]:
meshes_by_tag=other_input_values["meshes_by_tag"]

Data limits

In [53]:
data_limits=yaml_manager.readfile(datalimits_fpath)

In [54]:
data_limits

{'12A': {'x': [5.3, 5.3], 'y': [5.75, 6.35], 'z': [4.75, 6.05]},
 '16A': {'x': [5.75, 5.75], 'y': [4.9, 5.5], 'z': [4.7, 6.4]},
 '20A': {'x': [5.9, 5.9], 'y': [5.4, 6.0], 'z': [4.65, 6.75]},
 '20A_alt': {'x': [5.8, 5.8], 'y': [5.4, 6.0], 'z': [4.65, 6.75]},
 '27pct': {'x': [5.627, 5.627], 'y': [4.6, 5.2], 'z': [4.55, 6.65]},
 '50pct': {'x': [5.7, 5.7], 'y': [4.4, 5.0], 'z': [4.55, 6.65]},
 'na_27pct': {'x': [5.627, 5.627], 'y': [4.6, 5.2], 'z': [4.55, 6.65]}}

Mesh metadata

In [55]:
meshmeta_fpaths={tag:MeshMeta(meshmeta_infname_tmpl.format(mtag=meshes_by_tag[tag])) for tag in taglist}

In [56]:
meshmeta_existence_list=[osp.isfile(fpath) for fpath in meshmeta_fpaths.values()]
assert all(meshmeta_existence_list)

In [57]:
meshmeta={tag:yaml_manager.readfile(fpath) for tag,fpath in meshmeta_fpaths.items()}

$D_\text{local}$

In [58]:
dlocal_in={tag:pd.read_csv(fpath) for tag,fpath in dloc_inpaths.items()}

PMF

In [59]:
pmf_in={tag:pd.read_csv(fpath) for tag,fpath in pmf_inpaths.items()}

## Remove failed fits from the D local data, and normalize by bulk value

In [60]:
def do_trap_err(val):
  if val=="fitting_error":
    return np.nan
  else:
    return float(val)

In [61]:
out_dlocal={}
for tag in taglist:
  out_dlocal[tag]={}
  df=dlocal_in[tag]
  for coord in "xyz":
    comp=coord+coord
    newrows=[]
    valcol="D_fit_%s"%coord
    for oldrow in df.itertuples(index=False,name="full_D_results"):
      if getattr(oldrow,"ok_%s"%coord):
        addrow=[getattr(oldrow,attr) for attr in "XYZ"]
        Dval=do_trap_err(getattr(oldrow,valcol))
        if Dval is not np.nan:
          addrow.append(Dval/D_bulk_value)
          newrows.append(addrow)
    outdf=pd.DataFrame(newrows,columns=["x","y","z","D%s"%(comp)])
    out_dlocal[tag][comp]=outdf

## Get arithmetic average of $D_{xx}$ and $D_{yy}$ where applicable, and normalize

In [62]:
for tag in taglist:
  df=dlocal_in[tag]
  newrows=[]
  for oldrow in df.itertuples(index=False,name="full_D_results"):
    if all([getattr(oldrow,"ok_%s"%coord) for coord in "xy"]):
      addrow=[getattr(oldrow,attr) for attr in "XYZ"]
      Dval_list=[do_trap_err(getattr(oldrow,"D_fit_%s"%coord))/D_bulk_value for coord in "xy"]
      addrow.append(np.mean(Dval_list))
      newrows.append(addrow)
  outdf=pd.DataFrame(newrows,columns=["x","y","z","Davg_uncapped"])
  out_dlocal[tag]["avg"]=outdf

## Limit the averaged $D$ values

In [63]:
def dolimit(row):
  return min(row['Davg_uncapped'],limiting_D)

In [64]:
for tag in taglist:
  df=out_dlocal[tag]["avg"]
  df["Davg"]=df.apply(dolimit,axis=1)

## Apply shift to PMF values

In [65]:
shifted_pmf={}
for tag,df in pmf_in.items():
  outdf=df.copy()
  pmf_shift=pmf_shift_values[tag]
  def doshift(row):
    return row['PMF']+pmf_shift
  outdf["PMF"]=df.apply(doshift,axis=1)
  outdf.columns=["x","y","z","PMF"] #replace uppercase coordinate columns with lowercase
  shifted_pmf[tag]=outdf

## Replace infinite values with finite value for PMF

In [66]:
capped_pmf={}
for tag,df in shifted_pmf.items():
  outdf=df.copy()
  outdf.loc[df["PMF"] == np.inf,"PMF"]=limiting_PMF
  capped_pmf[tag]=outdf

## Scale the PMF values

In [67]:
out_pmf={}
for tag,df in capped_pmf.items():
  outdf=df.copy()
  outdf["PMF"]=df["PMF"]*pmf_scaling_values[tag]
  out_pmf[tag]=outdf

## Store non-duplicated data to file

In [68]:
for tag,df in out_pmf.items():
  df.to_csv(pmf_nodupe_outpaths[tag],index=False)

In [69]:
for tag,ddict in out_dlocal.items():
  for comp,df in ddict.items():
    df.to_csv(dlocal_nodupe_outpaths[tag][comp],index=False)

## Summarize coordinate ranges from various data sources

In [70]:
bbox_columns=[l+m for l in "XYZ" for m in minmaxspan]

In [71]:
bboxes={}
for tag in taglist:
  bbox_df=pd.DataFrame(index=bbox_idx,columns=bbox_columns)
  for coord in "XYZ":
    for sidx,side in enumerate(minmax):
      lcoord=coord.lower()
      col=coord+side
      bbox_df.loc["mesh",col]=meshmeta[tag][col]
      bbox_df.loc["limits",col]=data_limits[tag][lcoord][sidx]
      for comp in dlocal_comp_list:
        bbox_df.loc["D"+comp,col]=getattr(out_dlocal[tag][comp][lcoord],side)()
      bbox_df.loc["pmf",col]=getattr(out_pmf[tag][lcoord],side)()
    for row in bbox_idx:
      col=coord+"span"
      bbox_df.loc[row,col]=bbox_df.loc[row,coord+"max"]-bbox_df.loc[row,coord+"min"]
  bboxes[tag]=bbox_df

In [72]:
for tag,bbox_df in bboxes.items():
  bbox_df.to_csv(bbox_outpaths[tag])

## Compute X and Y shifts for periodic copies

In [73]:
Xshift={tag:(bbox_df.loc["mesh","Xmax"]-bbox_df.loc["mesh","Xmin"]) for tag,bbox_df in bboxes.items()}

In [74]:
Yshift={tag:(bbox_df.loc["mesh","Ymax"]-bbox_df.loc["mesh","Ymin"]) for tag,bbox_df in bboxes.items()}

In [75]:
XYshifts={}
for tag in taglist:
  XYshifts[tag]=[]
  for xm in [-1,0,1]:
    for ym in [-1,0,1]:
      tup=(xm*Xshift[tag],ym*Yshift[tag])
      XYshifts[tag].append(tup)

## Create dataframes with periodic duplicates

In [76]:
def gen_duped_df(df,XYshifts):
  newtb=[]
  for stup in XYshifts:
    xs,ys=stup
    for oldrow in df.itertuples(index=False):
      newx=xs+oldrow.x
      newy=ys+oldrow.y
      newtb.append((newx,newy)+oldrow[2:])
  return pd.DataFrame(newtb,columns=df.columns)

In [77]:
duped_dlocal={}
for tag,ddict in out_dlocal.items():
  df=ddict["avg"] #We only need this component duplicated for D local
  duped_dlocal[tag]=gen_duped_df(df,XYshifts[tag])

In [78]:
duped_pmf={}
for tag,df in out_pmf.items():
  duped_pmf[tag]=gen_duped_df(df,XYshifts[tag])

## Output data files for homogenization

In [79]:
for tag,df in duped_dlocal.items():
  df.to_csv(dlocal_outpaths[tag]["avg"],index=False)

In [80]:
for tag,df in duped_pmf.items():
  df.to_csv(pmf_outpaths[tag],index=False)

## Get unique values of all coordinates

In [81]:
uniques={}
for tag in taglist:
  dframes={'pmf':out_pmf[tag],"dloc":out_dlocal[tag]["avg"]}
  uniques[tag]={}
  for dfname,df in dframes.items():
    uniques[tag][dfname]={}
    for coord in "xyz":
      uniques[tag][dfname][coord]=df[coord].unique()
      uniques[tag][dfname][coord].sort()

## Select output profile locations

In [82]:
coord_num_values={'x':1,'y':4}

In [83]:
assert np.prod([v for v in coord_num_values.values()])==4, "Sorry, I was expecting exactly 4 profiles."

In [84]:
#Select the requested number of x and y data points, equally spaced within the input data coordinate values
outcoords={}
for tag,udict in uniques.items():
  outcoords[tag]={}
  bbox_df=bboxes[tag]
  for var,uqv in udict.items():
    outcoords[tag][var]={}
    for coordname,numvals in coord_num_values.items():
      meshmin=bbox_df.loc["mesh",coordname.upper()+"min"]
      meshmax=bbox_df.loc["mesh",coordname.upper()+"max"]
      vals=[v for v in uqv[coordname] if v >= meshmin and v <= meshmax] #Confirm that point is within mesh
      num_items=len(vals)
      step=num_items/numvals
      positions=[int(i*step)-1 for i in range(1,numvals+1)]
      outcoords[tag][var][coordname]=[vals[p] for p in positions]

In [85]:
outcoords[taglist[0]]

{'pmf': {'x': [5.3], 'y': [5.875, 6.025, 6.175, 6.325]},
 'dloc': {'x': [5.3], 'y': [5.85, 6.0, 6.15, 6.35]}}

In [86]:
#Combine the x and y values to generate points in the xy plane for profiles
xy_points={}
for tag,ocd in outcoords.items():
  xy_points[tag]={}
  for var,pointsets in ocd.items():
    flatcoords=[pointsets[coordname] for coordname in 'xy']
    itr=itertools.product(*flatcoords)
    xy_points[tag][var]=[p for p in itr]

In [87]:
xy_points[taglist[0]]

{'pmf': [(5.3, 5.875), (5.3, 6.025), (5.3, 6.175), (5.3, 6.325)],
 'dloc': [(5.3, 5.85), (5.3, 6.0), (5.3, 6.15), (5.3, 6.35)]}

In [88]:
profile_startlocs={}
profile_endlocs={}
profile_queries={}
for tag,xy_allvars in xy_points.items():
  profile_startlocs[tag]={}
  profile_endlocs[tag]={}
  profile_queries[tag]={}
  for varname, xylist in xy_allvars.items():
    for idx, xyt in enumerate(xylist):
      profname=profile_allpoints[idx]+"_"+varname
      xy=[v for v in xyt]
      profile_startlocs[tag][profname]=xy+[bbox_df.loc['mesh','Zmin']]
      profile_endlocs[tag][profname]=xy+[bbox_df.loc['mesh','Zmax']]
      query_items=[]
      for cdx in (0,1):
        coordval=xyt[cdx]
        coordname='xy'[cdx]
        for op,delta in {'>':-0.001, '<':0.001}.items():
          qu='%s %s %0.3f'%(coordname,op,coordval+delta)
          query_items.append(qu)
      query_string=' and '.join(query_items)
      profile_queries[tag][profname]=query_string

## Output profile location data to file

In [89]:
outer_indent=""
mid_indent=outer_indent+"  "
inner_indent=mid_indent+"  "

In [90]:
with open(profile_data_outpath,"w") as fp:
  #Profile start locations
  fp.write(outer_indent+"profile_startlocs:\n")
  for tag in taglist:
    fp.write(mid_indent+tag+":\n")
    for profname,data in profile_startlocs[tag].items():
      datastr_list=["%0.3f"%v for v in data]
      datastr=", ".join(datastr_list)
      fp.write(inner_indent+profname+": ["+datastr+"]\n")
  #Profile end locations
  fp.write(outer_indent+"profile_endlocs:\n")
  for tag in taglist:
    fp.write(mid_indent+tag+":\n")
    for profname,data in profile_endlocs[tag].items():
      datastr_list=["%0.3f"%v for v in data]
      datastr=", ".join(datastr_list)
      fp.write(inner_indent+profname+": ["+datastr+"]\n")
  #Profile queries
  fp.write(outer_indent+"profile_queries:\n")
  for tag in taglist:
    fp.write(mid_indent+tag+":\n")
    for profname,data in profile_queries[tag].items():
      fp.write(inner_indent+profname+': "'+data+'"\n')     