In [18]:
import os
import glob
import numpy as np
import pandas as pd
import xarray as xr

smvdatasets = pd.read_csv(
    "docs/smvdatasets.csv", 
    index_col="dataset", 
    header=0)

files = sorted(glob.glob("../tmp_data/*.txt"), key=lambda x: int(x.split("\\")[1].split("_")[0]))
files

['../tmp_data\\0_36.945279521752646_-114.04045643153523.txt',
 '../tmp_data\\1_36.945279521752646_-113.94709543568463.txt',
 '../tmp_data\\2_36.945279521752646_-113.85373443983401.txt',
 '../tmp_data\\3_36.945279521752646_-113.76037344398338.txt',
 '../tmp_data\\4_36.945279521752646_-113.66701244813275.txt',
 '../tmp_data\\5_36.945279521752646_-113.57365145228215.txt',
 '../tmp_data\\6_36.945279521752646_-113.48029045643152.txt',
 '../tmp_data\\7_36.945279521752646_-113.38692946058089.txt',
 '../tmp_data\\8_36.945279521752646_-113.29356846473028.txt',
 '../tmp_data\\9_36.945279521752646_-113.20020746887967.txt',
 '../tmp_data\\10_36.945279521752646_-113.10684647302904.txt',
 '../tmp_data\\11_36.945279521752646_-113.0134854771784.txt',
 '../tmp_data\\12_36.945279521752646_-112.92012448132779.txt',
 '../tmp_data\\13_36.945279521752646_-112.82676348547717.txt',
 '../tmp_data\\14_36.945279521752646_-112.73340248962653.txt',
 '../tmp_data\\15_36.945279521752646_-112.64004149377591.txt',
 '.

In [53]:
numvalid = lambda v: np.count_nonzero(~np.isnan(v.data))
allnan = lambda v: numvalid(v)==0

latatts = dict(
    standard_name="latitude",
    long_name="sample latitude",
    units="degrees_north")

lonatts = dict(
    standard_name="latitude",
    long_name="sample latitude",
    units="degrees_north")

def txt_to_pd(f):
    """Parses response.text to data frame with date index."""
    df = pd.read_csv(f, header=4, index_col="time")  # read to df
    df.index = pd.to_datetime(df.index)              # convert index to dates
    return(df)

def split_pd(col):
    """Splits pd column by ; and set all values to float, nan."""
    df = col.str.split(";",n=2,expand=True)           # split col by ;
    df = df.replace('', np.nan)                       # set '' to nan
    df = df.astype(float)                             # set all to float
    df.columns = ["Max","Mean","Min"]                 # add column names
    return(df.Mean)

def pd_to_xr(dataset, series, id, lat, lon):
    """Makes an xr.Dataset from a pandas column (series) and coords."""
    
    a = smvdatasets.loc[dataset].to_dict()
    xra = xr.DataArray(series, name=dataset, attrs=a)
    
    d = ["sample"]                          
    s = xr.DataArray(data=[id], dims=d) # get sample, lat, lon xr arrays
    y = xr.DataArray(data=[lat], name="lat", coords=[s], dims=d, attrs=latatts)
    x = xr.DataArray(data=[lon], name="lon", coords=[s], dims=d, attrs=lonatts)
    
    xds = xra.to_dataset()
    xds = xds.assign_coords(sample=s, lat=y, lon=x)
    
    return(xds)

In [None]:
SMAP_rootzone = []
SMAP_surface = []
#AirMOSS_rootzone = []
#AirMOSS_surface = []

for f in files:
    i, lat, lon = f.split("\\")[1].split("_")[0:3]
    lon = lon[:-4]
    
    df = txt_to_pd(f)
    
    rzmean = split_pd(df.SMAP_rootzone)
    sumean = split_pd(df.SMAP_surface)
    
    rzxr = pd_to_xr("SMAP_rootzone", rzmean, int(i), float(lat), float(lon))
    rzsu = pd_to_xr("SMAP_surface", sumean, int(i), float(lat), float(lon))
    
    SMAP_rootzone.append(rzxr)
    SMAP_surface.append(rzsu)

In [67]:
SMAP_surface[0]

<xarray.Dataset>
Dimensions:       (sample: 1, time: 6175)
Coordinates:
  * time          (time) datetime64[ns] 2002-04-01 2002-04-02 ... 2019-02-25
  * sample        (sample) int64 0
    lat           (sample) float64 36.95
    lon           (sample) float64 -114.0
Data variables:
    SMAP_surface  (time) float64 nan nan nan nan nan ... 22.17 20.9 19.2 17.27