# Download data from any weather station in www.wunderground.com

In [3]:
# To do list:
# - Select Only the Necessary Comlumns
# - Transform time to unix times
# - Transform temperature in F. to celsius
# - Interpolar the data that is missing to 1 second resoluction
# - Check if any day in the girasol machine is not download yet
# - Either download it or remove them
# - Get Starting and ending data of the recording 2017_12_07-2019_01_18
# - Set code in functions
# - Make it rebust so it does not break in case of no existing data for a day
# - Lunch processing for all data

import csv
import requests
import time
import datetime

import numpy as np
import matplotlib.pylab as plt

from scipy import interpolate

In [12]:
# Define the url to request data from
def _get_url(weather_station_id, day, month, year):
    url = r'https://www.wunderground.com/weatherstation/WXDailyHistory.asp?ID={}&day={}&month={}&year={}&graphspan=day&format=1'
    return url.format(weather_station_id, day, month, year)

# Request content on the url
def _request_page_data(url):
    page = requests.get(url)
    return page.content.decode("utf-8") 

# Slip up data by rows
def _data_2_rows(data_):
    rows_ = []
    i = 0
    while True:
        ii = data_[i:].find(r'<br>')
        rows_.append(data_[i:i + ii])
        i = i + ii + 4
        if i > len(data_):
            break
    return rows_
# Temperature in F. to Celcius
def _celsius_2_fahrenheit(Temp):
    return (float(Temp) - 32.) * (5. / 9.)
# Presure in In. to mm.
def _inches_2_milimeters(In):
    return float(In) * 25.4
# Degrees to radinas
def _degrees_2_radias(Degree):
    return np.radians(float(Degree))
# Wind velocity in MPH to KPH
def _MPH_2_mps(MPH):
    return 1000. * float(MPH) * 1.609344 / 3600.
# Format columns to SI units and save it in a csv file
def _format_data(rows_, year, month, day):
    file_name = '{:04}_{:02}_{:02}.csv'.format(year, month, day)
    path = r'C:\Users\Guille\Desktop\weather_station\data\{}'.format(file_name)
    idx_ = [0, 1, 2, 3, 5, 6, 8]
    with open(path, "w", newline = '') as csv_file:
        writer = csv.writer(csv_file, delimiter = ',')
        for i in range(len(rows_) - 1):    
            cells_ = rows_[i].split(',')
            cells_ = [cells_[idx] for idx in idx_] 
            for ii in range(len(cells_)):
                # Return End of the line /n in string
                cells_[ii] = cells_[ii].lstrip()
                # Human Time to Unix 
                if ii == 0 and i > 0.: cells_[ii] = datetime.datetime.strptime(cells_[ii], '%Y-%m-%d %H:%M:%S').timestamp()
                # Temperature in F. to Celcius
                if ii == 1 and i > 0.: cells_[ii] = _celsius_2_fahrenheit(cells_[ii]) 
                # Dew Point in F. to Celcius
                if ii == 2 and i > 0.: cells_[ii] = _celsius_2_fahrenheit(cells_[ii]) 
                # Presure in In. to mm.
                if ii == 3 and i > 0.: cells_[ii] = _inches_2_milimeters(cells_[ii]) 
                # Degrees to radinas
                if ii == 4 and i > 0.: cells_[ii] = _degrees_2_radias(cells_[ii]) 
                # Wind velocity in MPH to KPH
                if ii == 5 and i > 0.: cells_[ii] = _MPH_2_mps(cells_[ii]) 
            if i > 0.: writer.writerow(cells_)
    return path

weather_station_id = r'KNMALBUQ11'  # West UNM Central Campus
weather_station_id = r'KNMALBUQ473' # UNM Hospital Helipad
#weather_station_id = r'KNMALBUQ118' # Airport Weather Station

for year in range(2017, 2018, 1):
    for month in range(12, 13, 1):
        for day in range(1, 32, 1):
            try:
                url = _get_url(weather_station_id, day, month, year)
                print(url)
                data_ = _request_page_data(url)
                rows_ = _data_2_rows(data_)
                path = _format_data(rows_, year, month, day)
            except:
                print(weather_station_id, day, month, year)

https://www.wunderground.com/weatherstation/WXDailyHistory.asp?ID=KNMALBUQ473&day=1&month=12&year=2017&graphspan=day&format=1
https://www.wunderground.com/weatherstation/WXDailyHistory.asp?ID=KNMALBUQ473&day=2&month=12&year=2017&graphspan=day&format=1
https://www.wunderground.com/weatherstation/WXDailyHistory.asp?ID=KNMALBUQ473&day=3&month=12&year=2017&graphspan=day&format=1
https://www.wunderground.com/weatherstation/WXDailyHistory.asp?ID=KNMALBUQ473&day=4&month=12&year=2017&graphspan=day&format=1
https://www.wunderground.com/weatherstation/WXDailyHistory.asp?ID=KNMALBUQ473&day=5&month=12&year=2017&graphspan=day&format=1
https://www.wunderground.com/weatherstation/WXDailyHistory.asp?ID=KNMALBUQ473&day=6&month=12&year=2017&graphspan=day&format=1
https://www.wunderground.com/weatherstation/WXDailyHistory.asp?ID=KNMALBUQ473&day=7&month=12&year=2017&graphspan=day&format=1
https://www.wunderground.com/weatherstation/WXDailyHistory.asp?ID=KNMALBUQ473&day=8&month=12&year=2017&graphspan=day&f

In [24]:
import os

# Interpolation
def _interpolate_csv_columns(X_, Y_):
    x_ = X_[:, 0]
    # Variables Initialization 
    #x_p_ = np.linspace(x_[0], x_[-1], num = int(x_[-1] - x_[0]) + 1, endpoint = True)
    x_p_ = Y_[:, 0]
    X_p_ = np.zeros((x_p_.shape[0], X_.shape[1]))
    X_p_[:, 0] = x_p_
    for idx in range(1, X_.shape[1]):
        y_ = X_[:, idx]
        _f = interpolate.interp1d(x_, y_)
        y_p_ = _f(x_p_)
        X_p_[:, idx] = y_p_
        #plt.figure(figsize = (20, 5))
        #plt.plot(x_p_, y_p_)
        #plt.plot(x_, y_, '.')
    return X_p_
# Load csv file
def _load_csv_file(path):
    return np.loadtxt(open(path, "rb"), delimiter = ",")
# Save it back to the csv file
def _save_csv_file(X_p_, path):
    return np.savetxt(path, X_p_, delimiter = ",")


path_iws = r'E:\weather_station'
path_opy = r'E:\girasol_repository_files\pyranometer'
path_ows = r'E:\girasol_repository_files\weather_station'

for file in os.listdir(path_py):
    file_ws = '{}\{}'.format(path_iws, file)
    file_py = '{}\{}'.format(path_opy, file)
    X_ = _load_csv_file(file_ws)
    Y_ = _load_csv_file(file_py)
    X_p_ = _interpolate_csv_columns(X_, Y_)
    print(X_.shape, X_p_.shape, Y_.shape)
    path = '{}\{}'.format(path_ows, file)
    _save_csv_file(X_p_, path)
    print(path)

(229, 7) (86689, 7) (86689, 2)
E:\girasol_repository_files\weather_station\2017_12_07.csv
(217, 7) (86483, 7) (86483, 2)
E:\girasol_repository_files\weather_station\2017_12_08.csv
(213, 7) (86311, 7) (86311, 2)
E:\girasol_repository_files\weather_station\2017_12_09.csv
(206, 7) (86143, 7) (86143, 2)
E:\girasol_repository_files\weather_station\2017_12_10.csv
(224, 7) (85951, 7) (85951, 2)
E:\girasol_repository_files\weather_station\2017_12_11.csv
(218, 7) (85850, 7) (85850, 2)
E:\girasol_repository_files\weather_station\2017_12_12.csv
(222, 7) (85712, 7) (85712, 2)
E:\girasol_repository_files\weather_station\2017_12_13.csv
(294, 7) (85473, 7) (85473, 2)
E:\girasol_repository_files\weather_station\2017_12_15.csv
(333, 7) (85372, 7) (85372, 2)
E:\girasol_repository_files\weather_station\2017_12_16.csv
(331, 7) (85348, 7) (85348, 2)
E:\girasol_repository_files\weather_station\2017_12_17.csv
(325, 7) (85270, 7) (85270, 2)
E:\girasol_repository_files\weather_station\2017_12_18.csv
(327, 7) (