In [None]:
%reset -f

In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [9]:
import pandas as pd
import numpy as np
import requests
from datetime import datetime as dt
from datetime import timedelta
import shutil
import os
import scipy as scipy
import statsmodels.api as sm
from google.colab import data_table
data_table.enable_dataframe_formatter()


stationIDLINK = 'https://www.ncei.noaa.gov/pub/data/ghcn/daily/ghcnd-stations.txt'
metadataLINK = 'https://www.ncei.noaa.gov/pub/data/ghcn/daily/ghcnd-inventory.txt'
dataLINK = 'https://www.ncei.noaa.gov/data/global-historical-climatology-network-daily/access'
outFILE = '/content/drive/My Drive/Colab Notebooks/GHCN_Historical_Annual_Trends_Results.csv'
dataFILE = '/content/drive/My Drive/Colab Notebooks/GHCN_Historical_Annual_Trends_Data.csv'
momeanFILE = '/content/drive/My Drive/Colab Notebooks/GHCN_Historical_Monthly_MEAN_Trends.csv'
mosdevFILE = '/content/drive/My Drive/Colab Notebooks/GHCN_Historical_Monthly_SDEV_Trends.csv'
moskewFILE = '/content/drive/My Drive/Colab Notebooks/GHCN_Historical_Monthly_SKEW_Trends.csv'
moslpmeanFILE = '/content/drive/My Drive/Colab Notebooks/GHCN_Historical_Monthly_MEAN_SLP_Trends.csv'
moslpsdevFILE = '/content/drive/My Drive/Colab Notebooks/GHCN_Historical_Monthly_SDEV_SLP_Trends.csv'
moslpskewFILE = '/content/drive/My Drive/Colab Notebooks/GHCN_Historical_Monthly_SKEW_SLP_Trends.csv'


url = stationIDLINK
req = requests.get(url)
text = req.text

keepers_step_one = []
lines = (line for line in text.splitlines())
for line in lines:
  row = line.split()
  stationID = row[0]
  if stationID[:2] == 'US':
    state = row[4]
    if state in ['AZ', 'NM', 'NV', 'UT']:
      keepers_step_one.append(stationID)

url = metadataLINK
req = requests.get(url)
text = req.text

keepers_step_two = []
lines = (line for line in text.splitlines())
for line in lines:
  row = line.split()
  if 'PRCP' in row and row[0] in keepers_step_one:
    if int(row[4]) <= 1974 and int(row[5]) >= 2013:
      keepers_step_two.append(row[0])

dates = []
start_date = dt(1974, 1, 1, 0, 0)
end_date = dt(2013, 12, 31, 0, 0)
date = start_date
while date < end_date:
  dates.append(date)
  date = date + timedelta(days=1)



#No html address
bad = []
for keeper in keepers_step_two:
  ct = 0
  url = dataLINK + '/' + keeper + '.csv'
  req = requests.get(url)
  text = req.text
  if not '404 Not Found' in text:
    ct = 0
    lines = [line for line in text.splitlines()]
    save_lines = []
    hdrs = lines[0].split(',')
    prcp_i = hdrs.index('"PRCP"')
    date_i = hdrs.index('"DATE"')
    for line in lines[1:]:
      row = line.split('","')
      name_no_comma = row[5].replace(',', '')
      line = line.replace(row[5], name_no_comma)
      line = line.replace('","', ',')
      row = line.split(',')
      date = dt.strptime(row[date_i].strip('"'), '%Y-%m-%d')
      prcp = row[prcp_i].strip('"')
      if date.year >= 1974 and date.year <= 2013:
        if prcp != '' and not any([s in prcp for s in ['P', 'T', 'H', '9999']]):
          prcp = float(prcp)
          ct += 1

    if float(ct)/float(len(dates))*100. < 98.:
      print(str(float(ct)/float(len(dates))*100))
      bad.append(keeper)

  else:
    bad.append(keeper)

keepers_step_three = [k for k in keepers_step_two if k not in bad]
keepers = keepers_step_three

#keepers = ['USC00020949']


#DATA ANALYSIS


def get_yr(d):
  yr = d.year
  return yr

def get_mo(d):
  mo = d.month
  return mo

data_df = pd.DataFrame()
month_mean_df = pd.DataFrame(list(range(1, 13)))
month_sdev_df = pd.DataFrame(list(range(1, 13)))
month_skew_df = pd.DataFrame(list(range(1, 13)))
month_slp_mean_df = pd.DataFrame(list(range(1, 13)))
month_slp_sdev_df = pd.DataFrame(list(range(1, 13)))
month_slp_skew_df = pd.DataFrame(list(range(1, 13)))

i = 0
done = False
save_data = []
while done == False:
  keeper = keepers[i]
  print(keeper)

  try:
    url = dataLINK + '/' + keeper + '.csv'
    req = requests.get(url)
    text = req.text

    data_dict = {'date':[], 'precip':[]}
    lines = [line for line in text.splitlines()]
    hdrs = lines[0].split(',')
    prcp_i = hdrs.index('"PRCP"')
    date_i = hdrs.index('"DATE"')
    for line in lines[1:]:
      row = line.split('","')
      name_no_comma = row[5].replace(',', '')
      line = line.replace(row[5], name_no_comma)
      line = line.replace('","', ',')
      row = line.split(',')
      lon = row[3]
      lat = row[2]
      date = dt.strptime(row[date_i].strip('"'), '%Y-%m-%d')
      prcp = row[prcp_i].strip('"')
      if date.year >= 1974 and date.year <= 2013:

        if prcp != '' and not any([s in prcp for s in ['P', 'T', 'H', '9999']]):
          prcp = float(prcp)/10.
        else:
          prcp = 0.

        data_dict['date'].append(date)
        data_dict['precip'].append(prcp)

    #pd.DataFrame(data_dict).to_csv('/content/drive/My Drive/GEE_Downloads/test.csv')

    df = pd.DataFrame(data=data_dict)
    df['year'] = df['date'].apply(get_yr)
    df['month'] = df['date'].apply(get_mo)

    mean = df.loc[df['precip'] > 0.].mean()
    sdev = df.loc[df['precip'] > 0.].std()
    try:
      skew = df.loc[df['precip'] > 0.].skew()
    except TypeError:
      skew = 0.

    means = df.loc[df['precip'] > 0.].groupby('year')['precip'].mean()
    sdevs = df.loc[df['precip'] > 0.].groupby('year')['precip'].std()
    skews = df.loc[df['precip'] > 0.].groupby('year')['precip'].skew()

    x = np.arange(0, 40)
    X = sm.add_constant(x)

    model = sm.OLS(means, X)
    results = model.fit()
    slope_means = results.params[1]

    model = sm.OLS(sdevs, X)
    results = model.fit()
    slope_sdevs = results.params[1]

    model = sm.OLS(skews, X)
    results = model.fit()
    slope_skews = results.params[1]

    save_data.append([keeper, lon, lat, str(slope_means), str(mean), str(slope_sdevs), str(sdev), str(slope_skews), str(skew)])

    i += 1

    data_df[keeper + '_means'] = means
    data_df[keeper + '_sdevs'] = sdevs
    data_df[keeper + '_skews'] = skews

    means = df.loc[df['precip'] > 0.].groupby('month')['precip'].mean()
    sdevs = df.loc[df['precip'] > 0.].groupby('month')['precip'].std()
    skews = df.loc[df['precip'] > 0.].groupby('month')['precip'].skew()

    month_mean_df[keeper] = means
    month_sdev_df[keeper] = sdev
    month_skew_df[keeper] = skew

    slope_means_tmp, slope_sdevs_tmp, slope_skews_tmp = [], [], []

    for month in range(1, 13):
      print(month)
      means = df.loc[df['month'] == month].replace(0., np.nan).groupby('year')['precip'].mean().replace(np.nan, 0.)
      sdevs = df.loc[df['month'] == month].replace(0., np.nan).groupby('year')['precip'].std().replace(np.nan, 0.)
      skews = df.loc[df['month'] == month].replace(0., np.nan).groupby('year')['precip'].skew().replace(np.nan, 0.)

      display(means)

      x = np.arange(0, 40)
      X = sm.add_constant(x)
      model = sm.OLS(means, X)
      results = model.fit()
      slope_means = results.params[1]

      model = sm.OLS(sdevs, X)
      results = model.fit()
      slope_sdevs = results.params[1]

      model = sm.OLS(skews, X)
      results = model.fit()
      slope_skews = results.params[1]

      slope_means_tmp.append(slope_means)
      slope_sdevs_tmp.append(slope_sdevs)
      slope_skews_tmp.append(slope_skews)

    month_slp_mean_df = slope_means_tmp
    month_slp_sdev_df = slope_sdevs_tmp
    month_slp_skew_df = slope_skews_tmp

  except requests.exceptions.Timeout:
    pass

  if len(save_data) == len(keepers):
    done = True

data_df.to_csv(dataFILE)
month_mean_df.to_csv(momeanFILE)
month_sdev_df.to_csv(mosdevFILE)
month_skew_df.to_csv(moskewFILE)
month_slp_mean_df.to_csv(moslpmeanFILE)
month_slp_sdev_df.to_csv(moslpsdevFILE)
month_slp_skew_df.to_csv(moslpskewFILE)

with open(outFILE, 'w') as fo:
  fo.write('stationID,x,y,slope_means,mean,slope_sdevs,sdev,slope_skews,skew\n')
  for elem in save_data:
    fo.write(','.join([e for e in elem]) + '\n')






USC00020949
1


year
1974     7.700000
1975     2.450000
1976     0.000000
1977     2.975000
1978     5.918182
1979     7.757143
1980     5.557143
1981     2.800000
1982     2.425000
1983     2.300000
1984     0.300000
1985     3.650000
1986     2.000000
1987     5.200000
1988     7.825000
1989     8.300000
1990     3.700000
1991     3.800000
1992     4.100000
1993     8.636364
1994     1.000000
1995     7.416667
1996     1.000000
1997     5.900000
1998     2.466667
1999     1.050000
2000     0.500000
2001     5.950000
2002     0.000000
2003     4.000000
2004     1.300000
2005     4.790000
2006     0.000000
2007     1.800000
2008     8.150000
2009     0.500000
2010     8.850000
2011     0.000000
2012     0.500000
2013    14.750000
Name: precip, dtype: float64

2


year
1974     0.000000
1975     0.750000
1976     4.980000
1977     0.000000
1978     4.210000
1979     3.450000
1980     7.085714
1981     4.000000
1982     5.350000
1983     2.900000
1984     0.000000
1985     1.740000
1986     6.400000
1987     3.900000
1988    10.150000
1989     0.000000
1990     3.133333
1991    25.700000
1992     7.014286
1993    11.483333
1994     3.600000
1995     4.960000
1996     1.900000
1997     3.050000
1998     7.080000
1999     8.000000
2000     0.900000
2001     3.920000
2002     0.000000
2003     4.887500
2004     1.980000
2005     6.960000
2006     1.300000
2007     0.550000
2008     3.700000
2009     4.960000
2010     3.557143
2011     6.500000
2012     1.150000
2013     2.966667
Name: precip, dtype: float64

3


year
1974     4.650000
1975     3.700000
1976     0.500000
1977     1.800000
1978     5.700000
1979     5.437500
1980     6.100000
1981     6.625000
1982     4.975000
1983     6.100000
1984     0.000000
1985     1.000000
1986     3.700000
1987     1.966667
1988     0.500000
1989     1.800000
1990     2.033333
1991     5.000000
1992     5.433333
1993     5.600000
1994    10.180000
1995     4.325000
1996     0.000000
1997     0.000000
1998     2.550000
1999     0.300000
2000     1.275000
2001     8.850000
2002     2.550000
2003     5.933333
2005     1.880000
2006     4.600000
2007     2.300000
2008     0.000000
2009     0.000000
2010     6.366667
2011     2.500000
2012     1.050000
2013     1.500000
Name: precip, dtype: float64

ValueError: endog and exog matrices are different sizes