In [1]:
%reset -f

In [2]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [8]:
import pandas as pd
import numpy as np
import requests
from datetime import datetime as dt
from datetime import timedelta
import os
import scipy as scipy
import statsmodels.api as sm
from google.colab import data_table
data_table.enable_dataframe_formatter()


stationIDLINK = 'https://www.ncei.noaa.gov/pub/data/ghcn/daily/ghcnd-stations.txt'
metadataLINK = 'https://www.ncei.noaa.gov/pub/data/ghcn/daily/ghcnd-inventory.txt'
dataLINK = 'https://www.ncei.noaa.gov/data/global-historical-climatology-network-daily/access'
outFILE = '/content/drive/My Drive/Colab Notebooks/GHCN_Historical_Annual_Results.csv'
dataFILE = '/content/drive/My Drive/Colab Notebooks/GHCN_Historical_Annual_Data.csv'
momeanFILE = '/content/drive/My Drive/Colab Notebooks/GHCN_Historical_Monthly_MEAN.csv'
mosdevFILE = '/content/drive/My Drive/Colab Notebooks/GHCN_Historical_Monthly_SDEV.csv'
moskewFILE = '/content/drive/My Drive/Colab Notebooks/GHCN_Historical_Monthly_SKEW.csv'
moslpmeanFILE = '/content/drive/My Drive/Colab Notebooks/GHCN_Historical_Monthly_MEAN_SLP_Trends.csv'
moslpsdevFILE = '/content/drive/My Drive/Colab Notebooks/GHCN_Historical_Monthly_SDEV_SLP_Trends.csv'
moslpskewFILE = '/content/drive/My Drive/Colab Notebooks/GHCN_Historical_Monthly_SKEW_SLP_Trends.csv'


dates = []
start_date = dt(1974, 1, 1, 0, 0)
end_date = dt(2013, 12, 31, 0, 0)
date = start_date
while date <= end_date:
  dates.append(date)
  date = date + timedelta(days=1)


url = stationIDLINK
req = requests.get(url)
text = req.text


keepers_step_one = []
lines = (line for line in text.splitlines())
for line in lines:
  row = line.split()
  stationID = row[0]
  if stationID[:2] == 'US':
    state = row[4]
    if state in ['AZ', 'NM', 'NV', 'UT']:
      keepers_step_one.append(stationID)

url = metadataLINK
req = requests.get(url)
text = req.text

keepers_step_two = []
lines = (line for line in text.splitlines())
for line in lines:
  row = line.split()
  if 'PRCP' in row and row[0] in keepers_step_one:
    if int(row[4]) <= 1974 and int(row[5]) >= 2013:
      keepers_step_two.append(row[0])

#No html address
bad = []
for keeper in keepers_step_two:

  try:
    ct = 0
    url = dataLINK + '/' + keeper + '.csv'
    req = requests.get(url)
    text = req.text
    if not '404 Not Found' in text:
      ct = 0
      lines = [line for line in text.splitlines()]
      save_lines = []
      hdrs = lines[0].split(',')
      prcp_i = hdrs.index('"PRCP"')
      date_i = hdrs.index('"DATE"')
      for line in lines[1:]:
        row = line.split('","')
        name_no_comma = row[5].replace(',', '')
        line = line.replace(row[5], name_no_comma)
        line = line.replace('","', ',')
        row = line.split(',')
        date = dt.strptime(row[date_i].strip('"'), '%Y-%m-%d')
        prcp = row[prcp_i].strip('"')
        if date.year >= 1974 and date.year <= 2013:
          if prcp != '' and not any([s in prcp for s in ['P', 'T', 'H', '9999']]):
            prcp = float(prcp)
            ct += 1

      if float(ct)/float(len(dates))*100. < 98.:
        print(str(float(ct)/float(len(dates))*100))
        bad.append(keeper)

    else:
      bad.append(keeper)

  except requests.exceptions.Timeout:
    print('hi')
    pass


keepers_step_three = [k for k in keepers_step_two if k not in bad]
keepers = keepers_step_three
#keepers = ['USC00020949', 'USC00021248','USC00025270']



#DATA ANALYSIS



def get_yr(d):
  yr = d.year
  return yr

def get_mo(d):
  mo = d.month
  return mo

data_df = pd.DataFrame()
month_mean_df = pd.DataFrame()
month_sdev_df = pd.DataFrame()
month_skew_df = pd.DataFrame()
month_slp_mean_df = pd.DataFrame()
month_slp_sdev_df = pd.DataFrame()
month_slp_skew_df = pd.DataFrame()

i = 0
done = False
save_data = []
while done == False:
  keeper = keepers[i]
  print(i)
  url = dataLINK + '/' + keeper + '.csv'
  req = requests.get(url)
  text = req.text

  data_dict = {'date':[], 'precip':[]}
  lines = [line for line in text.splitlines()]
  hdrs = lines[0].split(',')
  prcp_col_i = hdrs.index('"PRCP"')
  date_col_i = hdrs.index('"DATE"')
  date_i = 0
  for line in lines[1:]:
    row = line.split('","')
    name_no_comma = row[5].replace(',', '')
    line = line.replace(row[5], name_no_comma)
    line = line.replace('","', ',')
    row = line.split(',')
    lon = row[3]
    lat = row[2]
    date = dt.strptime(row[date_col_i].strip('"'), '%Y-%m-%d')
    prcp = row[prcp_col_i].strip('"')

    if date.year >= 1974 and date.year <= 2013:

      if date == dates[date_i]:

        if prcp != '' and not any([s in prcp for s in ['P', 'T', 'H', '9999']]):
          prcp = float(prcp)/10.
        else:
          prcp = 0.

        data_dict['date'].append(date)
        data_dict['precip'].append(prcp)
        date_i += 1

      else:

        while date >= dates[date_i]:
          data_dict['date'].append(dates[date_i])
          data_dict['precip'].append(0.)
          date_i += 1

  while date < dates[-1]:
    data_dict['date'].append(dates[date_i])
    data_dict['precip'].append(0.)
    date = dates[date_i]
    date_i += 1

  df = pd.DataFrame(data=data_dict)
  df['year'] = df['date'].apply(get_yr)
  df['month'] = df['date'].apply(get_mo)

  mean = df.loc[df['precip'] > 0.]['precip'].mean()
  sdev = df.loc[df['precip'] > 0.]['precip'].std()
  try:
    skew = df.loc[df['precip'] > 0.]['precip'].skew()
  except TypeError:
    skew = 0.

  means = df.loc[df['precip'] > 0.].groupby('year')['precip'].mean()
  sdevs = df.loc[df['precip'] > 0.].groupby('year')['precip'].std()
  skews = df.loc[df['precip'] > 0.].groupby('year')['precip'].skew()

  x = np.arange(0, 40)
  X = sm.add_constant(x)

  model = sm.OLS(means, X)
  results = model.fit()
  slope_means = results.params[1]

  model = sm.OLS(sdevs, X)
  results = model.fit()
  slope_sdevs = results.params[1]

  model = sm.OLS(skews, X)
  results = model.fit()
  slope_skews = results.params[1]

  save_data.append([keeper, lon, lat, str(slope_means), str(mean), str(slope_sdevs), str(sdev), str(slope_skews), str(skew)])

  i += 1

  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews

  means = df.loc[df['precip'] > 0.].groupby('month')['precip'].mean()
  sdevs = df.loc[df['precip'] > 0.].groupby('month')['precip'].std()
  skews = df.loc[df['precip'] > 0.].groupby('month')['precip'].skew()

  month_mean_df[keeper] = means
  month_sdev_df[keeper] = sdevs
  month_skew_df[keeper] = skews

  slope_means_tmp, slope_sdevs_tmp, slope_skews_tmp = [], [], []

  for month in range(1, 13):

    means = df.loc[df['month'] == month].replace(0., np.nan).groupby('year')['precip'].mean().replace(np.nan, 0.)
    sdevs = df.loc[df['month'] == month].replace(0., np.nan).groupby('year')['precip'].std().replace(np.nan, 0.)
    skews = df.loc[df['month'] == month].replace(0., np.nan).groupby('year')['precip'].skew().replace(np.nan, 0.)
    x = np.arange(0, 40)
    X = sm.add_constant(x)
    model = sm.OLS(means, X)
    results = model.fit()
    slope_means = results.params[1]

    model = sm.OLS(sdevs, X)
    results = model.fit()
    slope_sdevs = results.params[1]

    model = sm.OLS(skews, X)
    results = model.fit()
    slope_skews = results.params[1]

    slope_means_tmp.append(slope_means)
    slope_sdevs_tmp.append(slope_sdevs)
    slope_skews_tmp.append(slope_skews)

  month_slp_mean_df[keeper] = slope_means_tmp
  month_slp_sdev_df[keeper] = slope_sdevs_tmp
  month_slp_skew_df[keeper] = slope_skews_tmp

  if len(save_data) == len(keepers):
    done = True

data_df.to_csv(dataFILE)
month_mean_df.to_csv(momeanFILE)
month_sdev_df.to_csv(mosdevFILE)
month_skew_df.to_csv(moskewFILE)
month_slp_mean_df.to_csv(moslpmeanFILE)
month_slp_sdev_df.to_csv(moslpsdevFILE)
month_slp_skew_df.to_csv(moslpskewFILE)

with open(outFILE, 'w') as fo:
  fo.write('stationID,x,y,slope_means,mean,slope_sdevs,sdev,slope_skews,skew\n')
  for elem in save_data:
    fo.write(','.join([e for e in elem]) + '\n')

#display(month_mean_df)
#display(month_slp_mean_df)





85.37303216974675
93.48391512662559
95.98904859685146
96.65982203969884
87.11841204654345
92.14236824093088
77.02258726899383
89.96577686516085
90.10951403148528
97.96714579055441
95.19507186858316
93.55236139630391
92.80629705681041
79.69883641341546
81.73169062286105
95.34565366187543
95.34565366187543
90.69130732375086
92.75154004106776
97.97399041752224
97.44695414099932
74.39425051334703
23.032169746748803
96.0164271047228
90.47912388774812
92.10130047912389
92.58726899383983
96.00958247775496
95.0444900752909
77.82340862422998
96.47501711156742
95.29089664613278
86.1533196440794
97.97399041752224
9.028062970568104
96.29021218343601
97.7618069815195
86.52977412731006
9.17180013689254
21.711156741957563
97.14579055441477
95.19507186858316
26.016427104722794
14.455852156057494
93.22381930184805
15.154004106776181
79.48665297741273
96.22861054072554
95.4072553045859
92.56673511293634
70.6981519507187
91.22518822724162
97.97399041752224
92.02600958247776
95.91375770020534
84.845995893

  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


34


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


35


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


36


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


37


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


38


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


39


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


40


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


41


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


42


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


43


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


44


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


45


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


46


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


47


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


48


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


49


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


50


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


51


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


52


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


53


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


54


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


55


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


56


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


57


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


58


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


59


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


60


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


61


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


62


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


63


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


64


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


65


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


66


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


67


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


68


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


69


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


70


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


71


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


72


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


73


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


74


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


75


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


76


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


77


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


78


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


79


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


80


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


81


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


82


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


83


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


84


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


85


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


86


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


87


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


88


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


89


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


90


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


91


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


92


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


93


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


94


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


95


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


96


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


97


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


98


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


99


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


100


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews
  month_mean_df[keeper] = means
  month_sdev_df[keeper] = sdevs
  month_skew_df[keeper] = skews
  month_slp_mean_df[keeper] = slope_means_tmp
  month_slp_sdev_df[keeper] = slope_sdevs_tmp
  month_slp_skew_df[keeper] = slope_skews_tmp


101


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews
  month_mean_df[keeper] = means
  month_sdev_df[keeper] = sdevs
  month_skew_df[keeper] = skews
  month_slp_mean_df[keeper] = slope_means_tmp
  month_slp_sdev_df[keeper] = slope_sdevs_tmp
  month_slp_skew_df[keeper] = slope_skews_tmp


102


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews
  month_mean_df[keeper] = means
  month_sdev_df[keeper] = sdevs
  month_skew_df[keeper] = skews
  month_slp_mean_df[keeper] = slope_means_tmp
  month_slp_sdev_df[keeper] = slope_sdevs_tmp
  month_slp_skew_df[keeper] = slope_skews_tmp


103


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews
  month_mean_df[keeper] = means
  month_sdev_df[keeper] = sdevs
  month_skew_df[keeper] = skews
  month_slp_mean_df[keeper] = slope_means_tmp
  month_slp_sdev_df[keeper] = slope_sdevs_tmp
  month_slp_skew_df[keeper] = slope_skews_tmp


104


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews
  month_mean_df[keeper] = means
  month_sdev_df[keeper] = sdevs
  month_skew_df[keeper] = skews
  month_slp_mean_df[keeper] = slope_means_tmp
  month_slp_sdev_df[keeper] = slope_sdevs_tmp
  month_slp_skew_df[keeper] = slope_skews_tmp


105


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews
  month_mean_df[keeper] = means
  month_sdev_df[keeper] = sdevs
  month_skew_df[keeper] = skews
  month_slp_mean_df[keeper] = slope_means_tmp
  month_slp_sdev_df[keeper] = slope_sdevs_tmp
  month_slp_skew_df[keeper] = slope_skews_tmp


106


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews
  month_mean_df[keeper] = means
  month_sdev_df[keeper] = sdevs
  month_skew_df[keeper] = skews
  month_slp_mean_df[keeper] = slope_means_tmp
  month_slp_sdev_df[keeper] = slope_sdevs_tmp
  month_slp_skew_df[keeper] = slope_skews_tmp


107


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews
  month_mean_df[keeper] = means
  month_sdev_df[keeper] = sdevs
  month_skew_df[keeper] = skews
  month_slp_mean_df[keeper] = slope_means_tmp
  month_slp_sdev_df[keeper] = slope_sdevs_tmp
  month_slp_skew_df[keeper] = slope_skews_tmp


108


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews
  month_mean_df[keeper] = means
  month_sdev_df[keeper] = sdevs
  month_skew_df[keeper] = skews
  month_slp_mean_df[keeper] = slope_means_tmp
  month_slp_sdev_df[keeper] = slope_sdevs_tmp
  month_slp_skew_df[keeper] = slope_skews_tmp
