In [11]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from osgeo import gdal
import multiprocessing
from functools import partial
from scipy.stats import pearsonr

# daily

In [87]:
smap= pd.read_csv('SMAP_site.csv')
noah= pd.read_csv('Noah_site.csv')
meso= pd.read_csv('zhendata.csv', skiprows=1)

In [88]:
def clean(df, type='smap'):
    
    if type=='smap':
        df.rename(columns= {'Unnamed: 0': 'time'}, inplace=True)
        df.time= pd.to_datetime(df.time, format='%Y%m%d')
        df.sort_values(by='time', inplace=True)
        df.set_index('time', inplace=True)
        
    elif type=='noah':
        df.rename(columns= {'Unnamed: 0': 'time'}, inplace=True)
        df.time= pd.to_datetime(df.time, format='%Y-%m-%d %H:%M:%S')
        df.sort_values(by='time', inplace=True)
        df.set_index('time', inplace=True)
        df.index= df.index.tz_localize('UTC').tz_convert('US/Central').tz_localize(None)
        
    elif type=='meso':
        df= df.drop(0, axis=0)
        df.rename(columns= {'stnm': 'time'}, inplace=True)
        df.time= pd.to_datetime(df.time)
        df.sort_values(by='time', inplace=True)
        df.set_index('time', inplace=True)
        df.index= df.index.tz_localize('UTC').tz_convert('US/Central').tz_localize(None)
    
    
    df[df<0]= np.nan
    
    return df

def RMSD(x,y):
    common= (x>=0) & (y>=0)
    x= x[common]
    y= y[common]
    
    return (((x-y)**2).sum()/len(x))**.5

def r(x,y):
    common= (x>=0) & (y>=0)
    x= x[common]
    y= y[common]
    
    return pearsonr(x,y)[0]

def metrics(df):
    smap= df.smap
    noah= df.noah
    meso= df.meso
    
    results= {
        'RMSD': {},
        'CC': {}
        
    }
    if len(df.dropna(axis=0))>2:
        results['RMSD']['meso/noah']= RMSD(meso.values, noah.values)
        results['RMSD']['smap/noah']= RMSD(smap.values, noah.values)
        results['RMSD']['smap/meso']= RMSD(smap.values, meso.values)
        results['CC']['meso/noah']= r(meso.values, noah.values)
        results['CC']['smap/noah']= r(smap.values, noah.values)
        results['CC']['smap/meso']= r(smap.values, meso.values)    
        
        return results
    else:
        pass
    
    

In [89]:
smap= clean(smap)
noah= clean(noah, 'noah')/100.
meso= clean(meso, 'meso')

In [90]:
noah_daily= noah.resample('D', how='mean')
meso_daily= meso.resample('D', how='mean')

the new syntax is .resample(...).mean()
  """Entry point for launching an IPython kernel.
the new syntax is .resample(...).mean()
  


In [91]:
intercomp_daily= {}
for col in meso.columns:
    noah_col= noah_daily[col].rename(columns={col: 'noah'})
    smap_col= smap[col].rename(columns={col: 'smap'})
    meso_col= meso_daily[col].rename(columns={col: 'meso'})
    df= pd.concat([noah_col, smap_col, meso_col], axis=1)
    df.rename(columns={0: 'noah',
                      1: 'smap',
                      2: 'meso'}, inplace=True)
    _results= metrics(df)
    intercomp_daily[col]= _results



In [92]:
siteDF= pd.read_excel('/home/ZhiLi/soilmoisture/Mesonet_Stations_info.xlsx')
siteDF.stnm= siteDF.stnm.astype(str)

In [93]:
siteDF.set_index('stnm', inplace=True)

In [104]:
# try:
    for site in intercomp_daily.keys():
        try:
            for key in intercomp_daily[site].keys():
                for item in intercomp_daily[site][key].keys():
                    siteDF.loc[site, key+'_'+item]= intercomp_daily[site][key][item]
        except: pass
        

In [106]:
siteDF.to_csv('daily_tranditional.csv')

# 6 AM

In [135]:
smap= pd.read_csv('SMAP_site_AM.csv')
noah= pd.read_csv('Noah_site.csv')
meso= pd.read_csv('zhendata.csv', skiprows=1)

smap= clean(smap)
noah= clean(noah, 'noah')/100.
meso= clean(meso, 'meso')

In [136]:
slt= pd.date_range(start='2015-04-01 06:00:00', end="2019-07-02 06:00:00", freq='D')
meso_AM= meso.loc[slt,:]
noah_AM= pd.DataFrame(columns=noah.columns)
for time in slt:
    try:
        noah_AM.loc[time,:]= noah.loc[time,:]
    except:
        pass
    

In [137]:
noah_AM= noah_AM.resample('D', how='sum')
meso_AM= meso_AM.resample('D', how='sum')
smap_AM= smap.loc[meso_AM.index,:]

the new syntax is .resample(...).sum()
  """Entry point for launching an IPython kernel.
the new syntax is .resample(...).sum()
  


In [154]:
intercomp_AM= {}
for col in meso.columns:
    noah_col= noah_AM[col].rename(columns={col: 'noah'})
    smap_col= smap_AM[col].rename(columns={col: 'smap'})
    meso_col= meso_AM[col].rename(columns={col: 'meso'})
    df= pd.concat([noah_col, smap_col, meso_col], axis=1)
    df.rename(columns={0: 'noah',
                      1: 'smap',
                      2: 'meso'}, inplace=True)
    _results= metrics(df)
    intercomp_AM[col]= _results



In [155]:
siteDF= pd.read_excel('/home/ZhiLi/soilmoisture/Mesonet_Stations_info.xlsx')
siteDF.stnm= siteDF.stnm.astype(str)
siteDF.set_index('stnm', inplace=True)

In [158]:
# try:
    for site in intercomp_AM.keys():
        try:
            for key in intercomp_AM[site].keys():
                for item in intercomp_AM[site][key].keys():
                    siteDF.loc[site, key+'_'+item]= intercomp_AM[site][key][item]
        except: pass
        

In [159]:
siteDF

Unnamed: 0_level_0,stid,name,city,rang,cdir,cnty,nlat,elon,elev,cdiv,...,CLAY75,TEXT75,datc,datd,RMSD_meso/noah,RMSD_smap/noah,RMSD_smap/meso,CC_meso/noah,CC_smap/noah,CC_smap/meso
stnm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
110,ACME,Acme,Rush Springs,4.0,WNW,Grady,34.80833,-98.02325,397,Central,...,22.8,Sandy clay loam,19940101,20991231,0.064406,0.075125,0.088384,0.666324,0.325615,0.361226
1,ADAX,Ada,Ada,2.0,NNE,Pontotoc,34.79851,-96.66909,295,South Central,...,29.8,Clay loam,19940101,20991231,0.091888,0.130188,0.147090,0.602252,0.196339,0.182794
2,ALTU,Altus,Altus,3.0,S,Jackson,34.58722,-99.33808,416,Southwest,...,44.1,Silty clay,19940101,20991231,0.078828,0.104956,0.095169,0.807311,0.411327,0.449191
116,ALV2,Alva,Alva,7.2,SSW,Woods,36.70823,-98.70974,439,North Central,...,-999.0,-999,19981217,20991231,0.135272,0.110011,0.142131,0.515212,0.504501,0.270041
135,ANT2,Antlers,Antlers,3.0,WNW,Pushmataha,34.24967,-95.66844,172,Southeast,...,26.1,Sandy clay loam,20110415,20991231,0.078232,0.176882,0.174389,0.744979,0.323915,0.307469
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
104,WEST,Westville,Westville,5.0,WNW,Adair,36.01100,-94.64496,348,East Central,...,-999.0,-999,19940101,20991231,0.121264,0.174060,0.229048,0.578089,0.131519,0.104296
105,WILB,Wilburton,Wilburton,2.0,SW,Latimer,34.90092,-95.34805,199,Southeast,...,63.9,Clay,19940101,20991231,0.108871,0.283851,0.282317,0.589034,0.174631,0.111394
106,WIST,Wister,Wister,3.0,ENE,LeFlore,34.98426,-94.68778,143,Southeast,...,70.8,Clay,19940101,20991231,0.104756,0.144144,0.169771,0.712091,0.234786,0.258139
107,WOOD,Woodward,Woodward,2.0,WSW,Woodward,36.42329,-99.41682,625,North Central,...,25.5,Loam,19940101,20991231,0.089539,0.084681,0.120534,0.614237,0.318648,0.293763


In [160]:
siteDF.to_csv('AM_tranditional.csv')

# PM

In [143]:
smap= pd.read_csv('SMAP_site_PM.csv')
noah= pd.read_csv('Noah_site.csv')
meso= pd.read_csv('zhendata.csv', skiprows=1)

smap= clean(smap)
noah= clean(noah, 'noah')/100.
meso= clean(meso, 'meso')

In [144]:
slt= pd.date_range(start='2015-04-01 18:00:00', end="2019-07-02 18:00:00", freq='D')
meso_PM= meso.loc[slt,:].resample('D', how='sum')
noah_PM= noah.loc[slt,:].resample('D', how='sum')
smap_PM= smap.loc[meso_PM.index,:]

the new syntax is .resample(...).sum()
  
the new syntax is .resample(...).sum()
  This is separate from the ipykernel package so we can avoid doing imports until


In [161]:
intercomp_PM= {}
for col in meso.columns:
    noah_col= noah_PM[col].rename(columns={col: 'noah'})
    smap_col= smap_PM[col].rename(columns={col: 'smap'})
    meso_col= meso_PM[col].rename(columns={col: 'meso'})
    df= pd.concat([noah_col, smap_col, meso_col], axis=1)
    df.rename(columns={0: 'noah',
                      1: 'smap',
                      2: 'meso'}, inplace=True)
    _results= metrics(df)
    intercomp_PM[col]= _results



In [162]:
siteDF= pd.read_excel('/home/ZhiLi/soilmoisture/Mesonet_Stations_info.xlsx')
siteDF.stnm= siteDF.stnm.astype(str)
siteDF.set_index('stnm', inplace=True)

In [163]:
# try:
    for site in intercomp_PM.keys():
        try:
            for key in intercomp_PM[site].keys():
                for item in intercomp_PM[site][key].keys():
                    siteDF.loc[site, key+'_'+item]= intercomp_PM[site][key][item]
        except: pass
        

In [164]:
siteDF

Unnamed: 0_level_0,stid,name,city,rang,cdir,cnty,nlat,elon,elev,cdiv,...,CLAY75,TEXT75,datc,datd,RMSD_meso/noah,RMSD_smap/noah,RMSD_smap/meso,CC_meso/noah,CC_smap/noah,CC_smap/meso
stnm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
110,ACME,Acme,Rush Springs,4.0,WNW,Grady,34.80833,-98.02325,397,Central,...,22.8,Sandy clay loam,19940101,20991231,0.062632,0.068861,0.080414,0.680784,0.428844,0.478132
1,ADAX,Ada,Ada,2.0,NNE,Pontotoc,34.79851,-96.66909,295,South Central,...,29.8,Clay loam,19940101,20991231,0.092146,0.077606,0.096693,0.637849,0.474365,0.510571
2,ALTU,Altus,Altus,3.0,S,Jackson,34.58722,-99.33808,416,Southwest,...,44.1,Silty clay,19940101,20991231,0.072347,0.113740,0.104644,0.828343,0.436332,0.428355
116,ALV2,Alva,Alva,7.2,SSW,Woods,36.70823,-98.70974,439,North Central,...,-999.0,-999,19981217,20991231,0.132310,0.112112,0.133510,0.497799,0.584726,0.422670
135,ANT2,Antlers,Antlers,3.0,WNW,Pushmataha,34.24967,-95.66844,172,Southeast,...,26.1,Sandy clay loam,20110415,20991231,0.078174,0.105502,0.096575,0.763625,0.461234,0.495540
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
104,WEST,Westville,Westville,5.0,WNW,Adair,36.01100,-94.64496,348,East Central,...,-999.0,-999,19940101,20991231,0.117833,0.065739,0.134927,0.590833,0.431181,0.308607
105,WILB,Wilburton,Wilburton,2.0,SW,Latimer,34.90092,-95.34805,199,Southeast,...,63.9,Clay,19940101,20991231,0.107424,0.098969,0.112399,0.629408,0.362737,0.401506
106,WIST,Wister,Wister,3.0,ENE,LeFlore,34.98426,-94.68778,143,Southeast,...,70.8,Clay,19940101,20991231,0.103120,0.086518,0.120135,0.727640,0.455225,0.441490
107,WOOD,Woodward,Woodward,2.0,WSW,Woodward,36.42329,-99.41682,625,North Central,...,25.5,Loam,19940101,20991231,0.083729,0.084562,0.115097,0.700384,0.370123,0.461028


In [165]:
siteDF.to_csv('PM_tranditional.csv')