Peform the following actions on the input data, for the `chan6` analysis set:

- Normalize $D_{\text{local}}$ values
- Take the arithmetic average of the x and y directions for the normalized $D_{\text{local}}$
- Cap the values of the normalized average
- Shift PMF values (new data is based on the gauge condition)
- Take the spatial average of the PMF values in the $y$-direction **but keep the unaveraged version as well**
- Substitute infinite PMF values with finite value
- Scale the PMF values (factor is currently 1.0)
- Reflect values across right AND top boundaries to create periodic unit cell in both directions
- Output locations for data profiles - not completed

Continuing from `p2021-0507b_processing_chan6.ipynb`.



## Setup

In [1]:
import os
import os.path as osp

In [2]:
import operator
import itertools

In [3]:
import numpy as np

In [4]:
import pandas as pd

In [5]:
import simproc

In [6]:
import simproc.requesthandler.yaml_manager as yaml_manager
import simproc.requesthandler.locators as locators

## Constants

In [7]:
SELTOL=1e-6

In [8]:
STEPOFF=1e-4

In [9]:
taglist=['12A','16A','20A','27pct','50pct','na_27pct'] #All model tags (as opposed to mesh tags)

In [10]:
dlocal_comp_list=["xx","yy","zz","avg"] #Components of Dlocal

In [11]:
dlocal_sel_comp="avg" #component selected for use in subsequent analyses

In [12]:
meshmeta_infname_tmpl="{mtag}.yaml" #Filename template for mesh metadata files

In [13]:
dlocal_infname_tmpl="Dlocal_collected_{tag}.csv" #Filename template for input Dlocal files

In [14]:
pmf_infname_tmpl="pmf2d_{tag}.csv" #Filename template for input PMF files

In [15]:
dlocal_outfname_tmpl="D{comp}_{tag}.xz" #Filename template for output Dlocal files

In [16]:
dlocal_outfname_tmpl_nodupe="D{comp}_{tag}_nd.csv" #Filename template for output Dlocal files without coordinate alterations

In [17]:
pmf_outfname_tmpl="pmf_{tag}.xz" #Filename template for output PMF files

In [18]:
pmf_noavg_outfname_tmpl="pmf_noavg_{tag}.xz" #Filename templae for un-averaged output PMF files

In [19]:
pmf_outfname_tmpl_nodupe="pmf_{tag}_nd.csv" #Filename template for ouput PMF files without coordinate alterations

In [20]:
pmf_average_outfname="pmf_average_values.yaml" #Filename for output of PMF average values

In [21]:
bbox_outfname_tmpl="bbox_{tag}.csv" #Filename template for output bounding box files

In [22]:
profile_data_outfname="profile_data.yaml" #Filename template for profile locations data output

In [23]:
minmax=["min","max"] #For bounding boxes

In [24]:
minmaxspan=minmax+["span"] #For bounding box and span

In [25]:
bbox_idx=["mesh","limits","Dxx","Dyy","Dzz","Davg","pmf"] #Different data sources for coordinate bounding boxes

In [26]:
profile_allpoints="ABCD" #Profile point names

## Data folder

In [27]:
datadir=osp.abspath("../chan6")
assert osp.isdir(datadir)

In [28]:
o=locators.SetDataFolder(datafolder=datadir)

## Locators

In [29]:
#locators.folder_structure.keys()

In [30]:
locators.folder_structure.update(Unprocessed=['inputs','unprocessed'])

In [31]:
locators.folder_structure.update(Processed=['inputs','processed'])

In [32]:
locators.folder_structure.update(Manual=['inputs','manual'])

In [33]:
locators.folder_structure.update(meshmetafile=["mesh","output",0,"metadata"])

In [34]:
Unprocessed=lambda fname: self.renderstr(locators.Unprocessed(fname))

In [35]:
Processed=lambda fname: self.renderstr(locators.Processed(fname))

In [36]:
Manual=lambda fname: self.renderstr(locators.Manual(fname))

In [37]:
MeshMeta=lambda fname: self.renderstr(locators.meshmetafile(fname))

## Dummy request for loading data

In [38]:
self=simproc.requesthandler.request.Request(name="chan6.processing")

## Data file names and paths

Manual data

In [39]:
manualdata_fpath=Manual('other_input_values.yaml')
assert osp.isfile(manualdata_fpath)

Mesh metadata

Paths require manual data; see below.

Data limits

In [40]:
datalimits_fpath=Unprocessed('data_limits.yaml')
assert osp.isfile(datalimits_fpath)

$D_{\text{local}}$ input

In [41]:
dloc_inpaths={tag:Unprocessed(dlocal_infname_tmpl.format(tag=tag)) for tag in taglist}

In [42]:
dloc_existence_list=[osp.isfile(fpath) for fpath in dloc_inpaths.values()]
assert all(dloc_existence_list)

PMF input

In [43]:
pmf_inpaths={tag:Unprocessed(pmf_infname_tmpl.format(tag=tag)) for tag in taglist}

In [44]:
pmf_existence_list=[osp.isfile(fpath) for fpath in pmf_inpaths.values()]
assert all(pmf_existence_list)

$D_{\text{local}}$ output

In [45]:
dlocal_outpaths={}
for tag in taglist:
  dlocal_outpaths[tag]={comp:Processed(dlocal_outfname_tmpl.format(tag=tag,comp=comp)) \
                        for comp in dlocal_comp_list}

In [46]:
dlocal_nodupe_outpaths={}
for tag in taglist:
  dlocal_nodupe_outpaths[tag]={comp:Processed(dlocal_outfname_tmpl_nodupe.format(tag=tag,comp=comp)) \
                               for comp in dlocal_comp_list}

PMF output

In [47]:
pmf_outpaths={tag:Processed(pmf_outfname_tmpl.format(tag=tag)) for tag in taglist}

In [48]:
pmf_noavg_outpaths={tag:Processed(pmf_noavg_outfname_tmpl.format(tag=tag)) for tag in taglist}

In [49]:
pmf_nodupe_outpaths={tag:Processed(pmf_outfname_tmpl_nodupe.format(tag=tag)) for tag in taglist}

In [50]:
pmf_average_outpath=Processed(pmf_average_outfname)

Bounding Box data

In [51]:
bbox_outpaths={tag:Processed(bbox_outfname_tmpl.format(tag=tag)) for tag in taglist}

Profile data

In [52]:
profile_data_outpath=Processed(profile_data_outfname)

## Read the input data

Manual

In [53]:
other_input_values=yaml_manager.readfile(manualdata_fpath)

In [54]:
D_bulk_value=other_input_values['bulk_D']

In [55]:
limiting_D=other_input_values['limiting_D']

In [56]:
limiting_PMF=other_input_values['limiting_pmf']

In [57]:
pmf_shift_values=other_input_values['pmf_shift_values']

In [58]:
pmf_scaling_values=other_input_values['pmf_scaling_values']

In [59]:
meshes_by_tag=other_input_values["meshes_by_tag"]

In [60]:
reflection_lines=other_input_values["reflection_lines"]

Data limits

In [61]:
data_limits=yaml_manager.readfile(datalimits_fpath)

In [62]:
data_limits

{'12A': {'x': [5.3, 5.3], 'y': [5.75, 6.35], 'z': [4.75, 6.05]},
 '16A': {'x': [5.75, 5.75], 'y': [4.9, 5.5], 'z': [4.7, 6.4]},
 '20A': {'x': [5.9, 5.9], 'y': [5.4, 6.0], 'z': [4.65, 6.75]},
 '20A_alt': {'x': [5.8, 5.8], 'y': [5.4, 6.0], 'z': [4.65, 6.75]},
 '27pct': {'x': [5.627, 5.627], 'y': [4.6, 5.2], 'z': [4.55, 6.65]},
 '50pct': {'x': [5.7, 5.7], 'y': [4.4, 5.0], 'z': [4.55, 6.65]},
 'na_27pct': {'x': [5.627, 5.627], 'y': [4.6, 5.2], 'z': [4.55, 6.65]}}

Mesh metadata

In [63]:
meshmeta_fpaths={tag:MeshMeta(meshmeta_infname_tmpl.format(mtag=meshes_by_tag[tag])) for tag in taglist}

In [64]:
meshmeta_existence_list=[osp.isfile(fpath) for fpath in meshmeta_fpaths.values()]
assert all(meshmeta_existence_list)

In [65]:
meshmeta={tag:yaml_manager.readfile(fpath) for tag,fpath in meshmeta_fpaths.items()}

$D_\text{local}$

In [66]:
dlocal_in={tag:pd.read_csv(fpath) for tag,fpath in dloc_inpaths.items()}

PMF

In [67]:
pmf_in={tag:pd.read_csv(fpath) for tag,fpath in pmf_inpaths.items()}

## Remove failed fits from the D local data, and normalize by bulk value

In [68]:
def do_trap_err(val):
  if val=="fitting_error":
    return np.nan
  else:
    return float(val)

In [69]:
out_dlocal={}
for tag in taglist:
  out_dlocal[tag]={}
  df=dlocal_in[tag]
  for coord in "xyz":
    comp=coord+coord
    newrows=[]
    valcol="D_fit_%s"%coord
    for oldrow in df.itertuples(index=False,name="full_D_results"):
      if getattr(oldrow,"ok_%s"%coord):
        addrow=[getattr(oldrow,attr) for attr in "XYZ"]
        Dval=do_trap_err(getattr(oldrow,valcol))
        if Dval is not np.nan:
          addrow.append(Dval/D_bulk_value)
          newrows.append(addrow)
    outdf=pd.DataFrame(newrows,columns=["x","y","z","D%s"%(comp)])
    out_dlocal[tag][comp]=outdf

## Get arithmetic average of $D_{xx}$ and $D_{yy}$ where applicable, and normalize

In [70]:
for tag in taglist:
  df=dlocal_in[tag]
  newrows=[]
  for oldrow in df.itertuples(index=False,name="full_D_results"):
    if all([getattr(oldrow,"ok_%s"%coord) for coord in "xy"]):
      addrow=[getattr(oldrow,attr) for attr in "XYZ"]
      Dval_list=[do_trap_err(getattr(oldrow,"D_fit_%s"%coord))/D_bulk_value for coord in "xy"]
      addrow.append(np.mean(Dval_list))
      newrows.append(addrow)
  outdf=pd.DataFrame(newrows,columns=["x","y","z","Davg_uncapped"])
  out_dlocal[tag]["avg"]=outdf

## Limit the averaged $D$ values

In [71]:
def dolimit(row):
  return min(row['Davg_uncapped'],limiting_D)

In [72]:
for tag in taglist:
  df=out_dlocal[tag]["avg"]
  df["Davg"]=df.apply(dolimit,axis=1)

## Apply shift to PMF values

In [73]:
shifted_pmf={}
for tag,df in pmf_in.items():
  outdf=df.copy()
  pmf_shift=pmf_shift_values[tag]
  def doshift(row):
    return row['PMF']+pmf_shift
  outdf["PMF"]=df.apply(doshift,axis=1)
  outdf.columns=["x","y","z","PMF"] #replace uppercase coordinate columns with lowercase
  shifted_pmf[tag]=outdf

## Average the PMF in the $y$-direction

In [74]:
def select_by_value(inframe,col,val,tol=SELTOL):
  """Select rows from a dataframe where a given column's value is near the specified value.
  
  inframe = input dataframe
  col = column name
  val = value to select
  tol = tolerance value"""
  cond=(inframe[col]<(val+tol)) & (inframe[col]>(val-tol))
  sel=inframe[cond]
  return sel

In [75]:
def average_across(inframe,selcol,avgcol,tol=SELTOL):
  """Take the average of a given column over the unique values of another column.
  
  inframe = input dataframe
  selcol = column to use the unique values of
  avgcol = column to take the average of
  tol = tolerance value"""
  vlist=list(inframe[selcol].unique())
  vlist.sort()
  rows=[]
  for vv in vlist:
    subset=select_by_value(inframe,selcol,vv,tol)
    avg_val=subset[avgcol].mean()
    rows.append((vv,avg_val))
  outframe=pd.DataFrame(rows,columns=[selcol,avgcol])
  return outframe

In [76]:
pmf_avg_dict={}
for tag in taglist:
  pmf_avg=average_across(shifted_pmf[tag],'z','PMF')
  pmf_avg_dict[tag]=pmf_avg

In [77]:
averaged_pmf={}
for tag in taglist:
  outdf=shifted_pmf[tag].copy()
  pmf_avg=average_across(shifted_pmf[tag],'z','PMF')
  def do_averaging(row):
    query_string="z > {} and z < {}".format(row['z']-STEPOFF,row['z']+STEPOFF)
    match=pmf_avg.query(query_string)
    assert len(match)==1
    return match["PMF"].iloc[0]
  outdf["PMF"]=outdf.apply(do_averaging,axis=1)
  averaged_pmf[tag]=outdf

## Replace infinite values with finite value for PMF

In [78]:
capped_pmf={}
for tag,df in averaged_pmf.items():
  outdf=df.copy()
  outdf.loc[df["PMF"] == np.inf,"PMF"]=limiting_PMF
  capped_pmf[tag]=outdf

In [79]:
capped_pmf_noavg={}
for tag,df in shifted_pmf.items():
  outdf=df.copy()
  outdf.loc[df["PMF"] == np.inf,"PMF"]=limiting_PMF
  capped_pmf_noavg[tag]=outdf

## Scale the PMF values

In [80]:
out_pmf={}
for tag,df in capped_pmf.items():
  outdf=df.copy()
  outdf["PMF"]=df["PMF"]*pmf_scaling_values[tag]
  out_pmf[tag]=outdf

In [81]:
out_pmf_noavg={}
for tag,df in capped_pmf_noavg.items():
  outdf=df.copy()
  outdf["PMF"]=df["PMF"]*pmf_scaling_values[tag]
  out_pmf_noavg[tag]=outdf

## Get the average PMF values

In [82]:
pmf_averages={}
for tag,outdf in out_pmf.items():
  avg1=average_across(outdf,'z',"PMF")
  pmf_averages[tag]=avg1["PMF"].mean() 
pmf_averages

{'12A': 3.486943556062595,
 '16A': 2.3847612643778646,
 '20A': 3.6218699936266954,
 '27pct': 5.374087503834025,
 '50pct': 7.17903571906422,
 'na_27pct': -2.406500096028664}

In [83]:
with open(pmf_average_outpath,'w') as fp:
  for tag,val in pmf_averages.items():
    fp.write("%s: %f\n"%(tag,val))

## Store non-duplicated data to file

In [84]:
for tag,df in out_pmf.items():
  df.to_csv(pmf_nodupe_outpaths[tag],index=False)

In [85]:
for tag,ddict in out_dlocal.items():
  for comp,df in ddict.items():
    df.to_csv(dlocal_nodupe_outpaths[tag][comp],index=False)

## Store non-averaged PMF data to file

In [86]:
for tag,df in out_pmf_noavg.items():
  df.to_csv(pmf_noavg_outpaths[tag],index=False)

## Reflect data around both right and top boundaries, and switch from $y$-$z$ to XY plane.

In [87]:
def generate_mirrored_df(df,yref,zref,valcol):
  dbl_yref=2*yref
  dbl_zref=2*zref
  mirrored_data=[]
  new_columns=["X","Y",valcol]
  for oldrow in df.itertuples(index=False):
    #Exclude points above the reflection line
    if oldrow.z <= zref:
      dataval=getattr(oldrow,valcol)
      #Original point
      newrow_unref=[oldrow.y,oldrow.z,dataval]
      mirrored_data.append(newrow_unref)
      #Reflected in y (new X)
      x_reflected=dbl_yref-oldrow.y
      newrow_reflected=[x_reflected,oldrow.z,dataval]
      mirrored_data.append(newrow_reflected)
      #Reflected in z (new Y)
      y_reflected=dbl_zref-oldrow.z
      newrow_up=[oldrow.y,y_reflected,dataval]
      mirrored_data.append(newrow_up)
      #Reflected in both
      newrow_quad=[x_reflected,y_reflected,dataval]
      mirrored_data.append(newrow_quad)
  mirrored_df=pd.DataFrame(mirrored_data,columns=new_columns)
  return mirrored_df

In [88]:
mirrored_pmf={}
for tag,df in out_pmf.items():
  mtag=meshes_by_tag[tag]
  yref,zref=reflection_lines[mtag]
  mirrored_pmf[tag]=generate_mirrored_df(df,yref,zref,"PMF")

In [89]:
mirrored_dlocal={}
for tag,ddict in out_dlocal.items():
  mtag=meshes_by_tag[tag]
  yref,zref=reflection_lines[mtag]
  mirrored_dlocal[tag]=generate_mirrored_df(ddict[dlocal_sel_comp],yref,zref,"Davg")

## Output data files for homogenization

In [90]:
for tag,df in mirrored_dlocal.items():
  df.to_csv(dlocal_outpaths[tag][dlocal_sel_comp],index=False)

In [91]:
for tag,df in mirrored_pmf.items():
  df.to_csv(pmf_outpaths[tag],index=False)

## Summarize coordinate ranges from various data sources

In [92]:
bbox_columns=[l+m for l in "XYZ" for m in minmaxspan]

In [93]:
bboxes={}
for tag in taglist:
  bbox_df=pd.DataFrame(index=bbox_idx,columns=bbox_columns)
  for coord in "XYZ":
    for sidx,side in enumerate(minmax):
      lcoord=coord.lower()
      col=coord+side
      if coord != "Z":
        bbox_df.loc["mesh",col]=meshmeta[tag][col]
      bbox_df.loc["limits",col]=data_limits[tag][lcoord][sidx]
      for comp in dlocal_comp_list:
        bbox_df.loc["D"+comp,col]=getattr(out_dlocal[tag][comp][lcoord],side)()
      bbox_df.loc["pmf",col]=getattr(out_pmf[tag][lcoord],side)()
    for row in bbox_idx:
      col=coord+"span"
      bbox_df.loc[row,col]=bbox_df.loc[row,coord+"max"]-bbox_df.loc[row,coord+"min"]
  bboxes[tag]=bbox_df

In [94]:
for tag,bbox_df in bboxes.items():
  bbox_df.to_csv(bbox_outpaths[tag])

In [95]:
bbox_df

Unnamed: 0,Xmin,Xmax,Xspan,Ymin,Ymax,Yspan,Zmin,Zmax,Zspan
mesh,4.61,5.79,1.18,4.6,8.6,4.0,,,
limits,5.627,5.627,0.0,4.6,5.2,0.6,4.55,6.65,2.1
Dxx,5.627,5.627,0.0,4.6,5.2,0.6,4.55,6.65,2.1
Dyy,5.627,5.627,0.0,4.6,5.2,0.6,4.55,6.65,2.1
Dzz,5.627,5.627,0.0,4.6,5.2,0.6,4.55,6.65,2.1
Davg,5.627,5.627,0.0,4.6,5.2,0.6,4.55,6.65,2.1
pmf,5.627,5.627,0.0,4.575,5.225,0.65,4.525,6.675,2.15


## <span style="color:red">STOP HERE for now</span>

In [None]:
assert False

## Get unique values of all coordinates

In [None]:
uniques={}
for tag in taglist:
  dframes={'pmf':out_pmf[tag],"dloc":out_dlocal[tag]["avg"]}
  uniques[tag]={}
  for dfname,df in dframes.items():
    uniques[tag][dfname]={}
    for coord in "xyz":
      uniques[tag][dfname][coord]=df[coord].unique()
      uniques[tag][dfname][coord].sort()

## Select output profile locations

In [None]:
coord_num_values={'x':1,'y':4}

In [None]:
assert np.prod([v for v in coord_num_values.values()])==4, "Sorry, I was expecting exactly 4 profiles."

In [None]:
#Select the requested number of x and y data points, equally spaced within the input data coordinate values
outcoords={}
for tag,udict in uniques.items():
  outcoords[tag]={}
  bbox_df=bboxes[tag]
  for var,uqv in udict.items():
    outcoords[tag][var]={}
    for coordname,numvals in coord_num_values.items():
      meshmin=bbox_df.loc["mesh",coordname.upper()+"min"]
      meshmax=bbox_df.loc["mesh",coordname.upper()+"max"]
      vals=[v for v in uqv[coordname] if v >= meshmin and v <= meshmax] #Confirm that point is within mesh
      num_items=len(vals)
      step=num_items/numvals
      positions=[int(i*step)-1 for i in range(1,numvals+1)]
      outcoords[tag][var][coordname]=[vals[p] for p in positions]

In [None]:
outcoords[taglist[0]]

In [None]:
#Combine the x and y values to generate points in the xy plane for profiles
xy_points={}
for tag,ocd in outcoords.items():
  xy_points[tag]={}
  for var,pointsets in ocd.items():
    flatcoords=[pointsets[coordname] for coordname in 'xy']
    itr=itertools.product(*flatcoords)
    xy_points[tag][var]=[p for p in itr]

In [None]:
xy_points[taglist[0]]

In [None]:
profile_startlocs={}
profile_endlocs={}
profile_queries={}
for tag,xy_allvars in xy_points.items():
  profile_startlocs[tag]={}
  profile_endlocs[tag]={}
  profile_queries[tag]={}
  for varname, xylist in xy_allvars.items():
    for idx, xyt in enumerate(xylist):
      profname=profile_allpoints[idx]+"_"+varname
      xy=[v for v in xyt]
      profile_startlocs[tag][profname]=xy+[bbox_df.loc['mesh','Zmin']]
      profile_endlocs[tag][profname]=xy+[bbox_df.loc['mesh','Zmax']]
      query_items=[]
      for cdx in (0,1):
        coordval=xyt[cdx]
        coordname='xy'[cdx]
        for op,delta in {'>':-0.001, '<':0.001}.items():
          qu='%s %s %0.3f'%(coordname,op,coordval+delta)
          query_items.append(qu)
      query_string=' and '.join(query_items)
      profile_queries[tag][profname]=query_string

## Output profile location data to file

In [None]:
outer_indent=""
mid_indent=outer_indent+"  "
inner_indent=mid_indent+"  "

In [None]:
with open(profile_data_outpath,"w") as fp:
  #Profile start locations
  fp.write(outer_indent+"profile_startlocs:\n")
  for tag in taglist:
    fp.write(mid_indent+tag+":\n")
    for profname,data in profile_startlocs[tag].items():
      datastr_list=["%0.3f"%v for v in data]
      datastr=", ".join(datastr_list)
      fp.write(inner_indent+profname+": ["+datastr+"]\n")
  #Profile end locations
  fp.write(outer_indent+"profile_endlocs:\n")
  for tag in taglist:
    fp.write(mid_indent+tag+":\n")
    for profname,data in profile_endlocs[tag].items():
      datastr_list=["%0.3f"%v for v in data]
      datastr=", ".join(datastr_list)
      fp.write(inner_indent+profname+": ["+datastr+"]\n")
  #Profile queries
  fp.write(outer_indent+"profile_queries:\n")
  for tag in taglist:
    fp.write(mid_indent+tag+":\n")
    for profname,data in profile_queries[tag].items():
      fp.write(inner_indent+profname+': "'+data+'"\n')     