In [1]:
%reset -f


In [2]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [3]:
import pandas as pd
import numpy as np
import scipy
from scipy.spatial import distance
import matplotlib.pylab as pylab
import matplotlib.pyplot as plt
import os
from sklearn.metrics import mean_squared_error
from decimal import Decimal

dataDIR = '/content/drive/My Drive/Colab Notebooks/TeamProjects/24_us_stations'

def chi2_distance(A, B):
  chi = 0.5 * np.sum([((a - b)**2) / (a + b)
                    for (a, b) in zip(A, B)])
  return chi

lvlOneFolders = [f for f in os.listdir(dataDIR) if '.' not in f]
print(lvlOneFolders)

df = pd.DataFrame(columns=['cligen', 'daymet', 'gridmet', 'prism'])
df.index.name='station'

for i, folder in enumerate(lvlOneFolders):

  files = os.listdir(os.path.join(dataDIR, folder))

  for f in files:
    if '_CLIGEN.cli' in f:
      cligen_file = f
    elif '_GHCND.cli' in f:
      ghcnd_file = f
    elif '_daymet.cli' in f:
      daymet_file = f
    elif '_gridmet.cli' in f:
      gridmet_file = f
    elif '_prism.cli' in f:
      prism_file = f
    else:
      pass

  with open(os.path.join(dataDIR, folder, cligen_file)) as f:
    lines = f.readlines()
    p = [float(l.split()[3]) for l in lines[15:-1] if float(l.split()[3]) > 0.0]
    cligen_p = p

  with open(os.path.join(dataDIR, folder, ghcnd_file)) as f:
    lines = f.readlines()
    p = [float(l.split()[3]) for l in lines[15:-1] if float(l.split()[3]) > 0.0]
    ghcnd_p = p

  with open(os.path.join(dataDIR, folder, daymet_file)) as f:
    lines = f.readlines()
    p = [float(l.split()[3]) for l in lines[15:-1] if float(l.split()[3]) > 0.0]
    daymet_p = p

  with open(os.path.join(dataDIR, folder, gridmet_file)) as f:
    lines = f.readlines()
    p = [float(l.split()[3]) for l in lines[15:-1] if float(l.split()[3]) > 0.0]
    gridmet_p = p

  with open(os.path.join(dataDIR, folder, prism_file)) as f:
    lines = f.readlines()
    p = [float(l.split()[3]) for l in lines[15:-1] if float(l.split()[3]) > 0.0]
    prism_p = p

  max_value = max(max(cligen_p), max(ghcnd_p), max(daymet_p), max(gridmet_p), max(prism_p))
  min_value = min(min(cligen_p), min(ghcnd_p), min(daymet_p), min(gridmet_p), min(prism_p))

  bins_pdf = np.linspace(min_value, max_value, 51)

  cligen_h = np.array(np.histogram(cligen_p, bins_pdf)[0], float)
  ghcnd_h = np.array(np.histogram(ghcnd_p, bins_pdf)[0], float)
  daymet_h = np.array(np.histogram(daymet_p, bins_pdf)[0], float)
  gridmet_h = np.array(np.histogram(gridmet_p, bins_pdf)[0], float)
  prism_h = np.array(np.histogram(prism_p, bins_pdf)[0], float)

  cligen_h[cligen_h == 0] = 1e-9
  ghcnd_h[ghcnd_h == 0] = 1e-9
  daymet_h[daymet_h == 0] = 1e-9
  gridmet_h[gridmet_h == 0] = 1e-9
  prism_h[prism_h == 0] = 1e-9

  cligen_pdf = np.array([count/len(cligen_p) for i, count in enumerate(cligen_h)])
  ghcnd_pdf = np.array([count/len(ghcnd_p) for i, count in enumerate(ghcnd_h)])
  daymet_pdf = np.array([count/len(daymet_p) for i, count in enumerate(daymet_h)])
  gridmet_pdf = np.array([count/len(gridmet_p) for i, count in enumerate(gridmet_h)])
  prism_pdf = np.array([count/len(prism_p) for i, count in enumerate(prism_h)])

  cligen_dev = round(chi2_distance(ghcnd_pdf, cligen_pdf), 5)
  daymet_dev = round(chi2_distance(ghcnd_pdf, daymet_pdf), 5)
  gridmet_dev = round(chi2_distance(ghcnd_pdf, gridmet_pdf), 5)
  prism_dev = round(chi2_distance(ghcnd_pdf, prism_pdf), 5)

  df.at[folder, 'cligen'] = cligen_dev
  df.at[folder, 'daymet'] = daymet_dev
  df.at[folder, 'gridmet'] = gridmet_dev
  df.at[folder, 'prism'] = prism_dev

df.sort_index(key=lambda i: i.str.split('_').str[0].astype(int), inplace=True)

print(df)

df.to_csv(os.path.join(dataDIR, 'chi_dist_table.csv'))




['10_Illinois', '11_Arkansas', '12_Texas', '13_Kansas', '14_Colorado', '15_Montana', '16_Arizona', '17_California', '18_Idaho', '19_Washington', '1_NewYork', '20_Oregon', '21_Nevada', '22_Utah', '23_Arizona', '24_NewMexico', '2_NorthCarolina', '3_Florida', '4_Ohio', '5_Mississippi', '6_Iowa', '7_Wisconsin', '8_NorthDakota', '9_Nebraska']
                  cligen   daymet  gridmet    prism
station                                            
1_NewYork        0.00358  0.01731  0.01111  0.00495
2_NorthCarolina  0.00515  0.03812  0.00889  0.00363
3_Florida        0.00511  0.08333  0.01957  0.01118
4_Ohio            0.0041  0.05977  0.00372   0.0031
5_Mississippi    0.01227  0.03497  0.02286   0.0118
6_Iowa           0.00531  0.03454  0.01014  0.00242
7_Wisconsin      0.00474  0.04748  0.00413  0.00385
8_NorthDakota     0.0078  0.07631   0.0088  0.00571
9_Nebraska       0.01033  0.02726  0.02043  0.00416
10_Illinois      0.00509  0.03157  0.02522   0.0111
11_Arkansas      0.00344  0.08007  0