In [1]:
%reset -f

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
import pandas as pd
import numpy as np
import requests
from datetime import datetime as dt
from datetime import timedelta
import shutil
import os
import scipy as scipy
import statsmodels.api as sm

stationIDLINK = 'https://www.ncei.noaa.gov/pub/data/ghcn/daily/ghcnd-stations.txt'
metadataLINK = 'https://www.ncei.noaa.gov/pub/data/ghcn/daily/ghcnd-inventory.txt'
dataLINK = 'https://www.ncei.noaa.gov/data/global-historical-climatology-network-daily/access'
outFILE = '/content/drive/My Drive/Colab Notebooks/GHCN_Historical_Annual_Mean_MEANP_Trends_Results.csv'
dataFILE = '/content/drive/My Drive/Colab Notebooks/GHCN_Historical_Annual_Mean_MEANP_Trends_Data.csv'

url = stationIDLINK
req = requests.get(url)
text = req.text

keepers_step_one = []
lines = (line for line in text.splitlines())
for line in lines:
  row = line.split()
  stationID = row[0]
  if stationID[:2] == 'US':
    state = row[4]
    if state in ['AZ', 'NM', 'NV', 'UT']:
      keepers_step_one.append(stationID)

url = metadataLINK
req = requests.get(url)
text = req.text

keepers_step_two = []
lines = (line for line in text.splitlines())
for line in lines:
  row = line.split()
  if 'PRCP' in row and row[0] in keepers_step_one:
    if int(row[4]) <= 1974 and int(row[5]) >= 2013:
      keepers_step_two.append(row[0])

dates = []
start_date = dt(1974, 1, 1, 0, 0)
end_date = dt(2013, 12, 31, 0, 0)
date = start_date
while date < end_date:
  dates.append(date)
  date = date + timedelta(days=1)

#No html address
bad = []
for keeper in keepers_step_two:
  ct = 0
  url = dataLINK + '/' + keeper + '.csv'
  req = requests.get(url)
  text = req.text
  if not '404 Not Found' in text:
    ct = 0
    lines = [line for line in text.splitlines()]
    save_lines = []
    hdrs = lines[0].split(',')
    prcp_i = hdrs.index('"PRCP"')
    date_i = hdrs.index('"DATE"')
    for line in lines[1:]:
      row = line.split('","')
      name_no_comma = row[5].replace(',', '')
      line = line.replace(row[5], name_no_comma)
      line = line.replace('","', ',')
      row = line.split(',')
      date = dt.strptime(row[date_i].strip('"'), '%Y-%m-%d')
      prcp = row[prcp_i].strip('"')
      if date.year >= 1974 and date.year <= 2013:
        if prcp != '' and not any([s in prcp for s in ['P', 'T', 'H', '9999']]):
          prcp = float(prcp)
          ct += 1

    if float(ct)/float(len(dates))*100. < 98.:
      print(str(float(ct)/float(len(dates))*100))
      bad.append(keeper)

  else:
    bad.append(keeper)

keepers_step_three = [k for k in keepers_step_two if k not in bad]
keepers = keepers_step_three



#DATA ANALYSIS



data_df = pd.DataFrame()
i = 0
done = False
save_data = []
while done == False:
  keeper = keepers[i]
  print(keeper)

  try:
    url = dataLINK + '/' + keeper + '.csv'
    req = requests.get(url)
    text = req.text

    data_dict = {'date':[], 'precip':[]}
    lines = [line for line in text.splitlines()]
    hdrs = lines[0].split(',')
    prcp_i = hdrs.index('"PRCP"')
    date_i = hdrs.index('"DATE"')
    for line in lines[1:]:
      row = line.split('","')
      name_no_comma = row[5].replace(',', '')
      line = line.replace(row[5], name_no_comma)
      line = line.replace('","', ',')
      row = line.split(',')
      lon = row[3]
      lat = row[2]
      date = dt.strptime(row[date_i].strip('"'), '%Y-%m-%d')
      prcp = row[prcp_i].strip('"')
      if date.year >= 1974 and date.year <= 2013:
        if prcp != '' and not any([s in prcp for s in ['P', 'T', 'H', '9999']]):
          prcp = float(prcp)/10.
        else:
          prcp = 0.

        if prcp > 0.:
          data_dict['date'].append(date)
          data_dict['precip'].append(prcp)

    df = pd.DataFrame(data=data_dict)

    def get_yr(d):
      yr = d.year
      return yr

    df['year'] = df['date'].apply(get_yr)

    def get_mo(d):
      mo = d.month
      return mo

    df['month'] = df['date'].apply(get_mo)
    means = df.groupby('year')['precip'].mean()
    sdevs = df.groupby('year')['precip'].std()
    skews = df.groupby('year')['precip'].skew()

    x = np.arange(0, 40)
    X = sm.add_constant(x)

    model = sm.OLS(means, X)
    results = model.fit()
    slope_means = results.params[1]

    model = sm.OLS(sdevs, X)
    results = model.fit()
    slope_sdevs = results.params[1]

    model = sm.OLS(skews, X)
    results = model.fit()
    slope_skews = results.params[1]

    mean = df['precip'].mean()
    sdev = df['precip'].std()
    skew = df['precip'].skew()

    save_data.append([keeper, lon, lat, str(slope_means), str(mean), str(slope_sdevs), str(sdev), str(slope_skews), str(skew)])

    i += 1

    data_df[keeper + '_means'] = means
    data_df[keeper + '_sdevs'] = sdevs
    data_df[keeper + '_skews'] = skews

  except requests.exceptions.Timeout:
    pass

  if len(save_data) == len(keepers):
    done = True

data_df.to_csv(dataFILE)

with open(outFILE, 'w') as fo:
  fo.write('stationID,x,y,slope_means,mean,slope_sdevs,sdev,slope_skews,skew\n')
  for elem in save_data:
    fo.write(','.join([e for e in elem]) + '\n')



85.3788760353207
93.49031418988295
95.995619138887
96.66643849681704
87.12437538503663
92.14867547402287
77.02785953864057
89.97193510849476
90.11568211376549
97.9738517352317
95.20158806215346
93.55876514477377
92.81264973646383
79.70429187487166
81.73728523512904
95.35218016291327
95.35218016291327
90.69751523033747
92.75788897255117
97.98069683072079
97.45362447806147
74.39934287083305
23.033746320761175
96.02299952084331
92.29242247929359
90.48531727017591
92.10760490108837
92.5936066808132
96.01615442535424
95.05099596139365
77.82873571086316
96.48162091861181
95.29741939900062
86.15921692107605
97.98069683072079
9.028680950099254
96.2968033404066
97.76849887055924
86.53569717297557
9.172427955369978
21.712642891368333
97.15244027654187
95.20158806215346
26.01820795400096
14.456841672941337
93.23020056129782
15.155041412827707
79.49209391471011
96.23519748100486
95.41378602231501
92.57307139434594
70.70299130672872
91.23143267848587
97.98069683072079
92.03230885070846
95.920323088

  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USC00292030


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USC00292510


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USC00292608


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USC00292785


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USC00292837


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USC00293296


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USC00293649


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USC00294009


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USC00294398


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USC00294742


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USC00295084


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USC00295150


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USC00295617


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USC00296275


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USC00296619


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USC00296687


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USC00297008


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USC00297226


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USC00297340


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USC00297867


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USC00298535


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USC00299156


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USC00299820


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USC00420086


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USC00420730


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USC00420738


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USC00420788


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USC00420819


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USC00420849


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USC00421144


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USC00421171


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USC00421214


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USC00421308


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USC00421446


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USC00422057


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USC00422101


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USC00422385


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USC00422578


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USC00422592


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USC00422798


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USC00424342


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USC00424508


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USC00424856


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USC00424968


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USC00425402


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USC00425582


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USC00425733


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USC00426135


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USC00426919


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USC00427271


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USC00428119


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USC00428771


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USC00429111


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USC00429165


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USC00429595


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USC00429717


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USW00003103


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USW00023009


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USW00023050


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USW00023054


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USW00023081


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USW00023153


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USW00023154


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USW00023160


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USW00023162


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USW00023169


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USW00023183


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USW00023184


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USW00023185


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USW00023194


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USW00024121


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USW00024127


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USW00024128


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USW00093026


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews


USW00093129


  data_df[keeper + '_means'] = means
  data_df[keeper + '_sdevs'] = sdevs
  data_df[keeper + '_skews'] = skews
