# Weather Data Export

Station metadata:

   - name, ID, stid: identifiers for the weather station
   - elevation, latitude, longitude: location of the weather station

Observations:

   - date_time
   - air_temp (Celcius)
   - precip_accum (millimeters)
   - relative_humidity (%)
   - wind_speed (m/s)
   - wind_gust (m/s)


In [1]:
import pandas as pd
import numpy as np
import json
import math
import datetime
from dateutil import rrule
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta
from urllib.request import urlopen
from pandas.io.json import json_normalize

In [2]:
outputPath = "E:/GitHub/w210_Wildfire/data/"
baseurl = "https://api.synopticdata.com/v2/stations/timeseries?state=ca&vars=air_temp,wind_speed,wind_gust,pressure,relative_humidity,precip_accum&token=3126cda0bfe5490f91911a15826bbf3b"
obsvars = ['NAME', 'ID', 'STID', 'ELEVATION', 'LATITUDE', 'LONGITUDE',
           'OBSERVATIONS.date_time', 'OBSERVATIONS.air_temp_set_1', 'OBSERVATIONS.precip_accum_set_1', 
           'OBSERVATIONS.relative_humidity_set_1', 'OBSERVATIONS.wind_speed_set_1','OBSERVATIONS.wind_gust_set_1']    

In [3]:
metadataurl = "https://api.synopticdata.com/v2/stations/metadata?state=ca&status=active&start=201601010000&end=201812310000&token=3126cda0bfe5490f91911a15826bbf3b"
response = urlopen(metadataurl)
json_meta_data = response.read().decode('utf-8', 'replace')
metadata = json.loads(json_meta_data)
station_metadata = json_normalize(metadata['STATION'])
stationids = station_metadata['STID']
#stationids[0:10]

In [4]:
# Function to extract observations from single station JSON and return daily summary data

def extract_observations(obs):
    cols = list(obs.columns)
    obs = obs.iloc[0]
    date_time = obs['OBSERVATIONS.date_time']
    date = [pd.to_datetime(d).date() for d in date_time]
    dtlen = len(date_time)
    fake = [np.NaN]*dtlen
    
    # Extract the observations if the columns exist
    air_temp = fake if 'OBSERVATIONS.air_temp_set_1' not in cols else list(np.float_(obs['OBSERVATIONS.air_temp_set_1']))                                                                   
    precip_accum = fake if 'OBSERVATIONS.precip_accum_set_1' not in cols else list(np.float_(obs['OBSERVATIONS.precip_accum_set_1']))
    relative_humidity = fake if 'OBSERVATIONS.relative_humidity_set_1' not in cols else list(np.float_(obs['OBSERVATIONS.relative_humidity_set_1']))
    wind_speed = fake if 'OBSERVATIONS.wind_speed_set_1' not in cols else list(np.float_(obs['OBSERVATIONS.wind_speed_set_1']))
    wind_gust = fake if 'OBSERVATIONS.wind_gust_set_1' not in cols else list(np.float_(obs['OBSERVATIONS.wind_gust_set_1']))

    # Create a dataframe with the observations and date
    full_obs_df = pd.DataFrame(np.column_stack([date, air_temp, precip_accum, relative_humidity, wind_speed, wind_gust]),
                          columns=['date', 'air_temp', 'precip_accum', 'relative_humidity', 'wind_speed', 'wind_gust'])
    full_obs_df[['air_temp', 'precip_accum', 'relative_humidity', 'wind_speed', 'wind_gust']] = full_obs_df[['air_temp', 'precip_accum', 'relative_humidity', 'wind_speed', 'wind_gust']].apply(pd.to_numeric, errors='coerce')

    # Group by date and create daily summary features
    obs_df = full_obs_df.groupby('date', as_index=False).agg({'air_temp':['max', 'min', 'mean'], 'precip_accum':'max', 
                                            'relative_humidity':['max', 'min', 'mean'],
                                            'wind_speed':['max', 'min', 'mean'], 'wind_gust':'max',})
    obs_df.columns = ["_".join(x) for x in obs_df.columns.ravel()]
    
    # Add station metadata
    obs_df['name'] = obs['NAME']
    obs_df['ID'] = obs['ID']
    obs_df['stid'] = obs['STID']
    obs_df['elevation'] = np.float_(obs['ELEVATION'])
    obs_df['latitude'] = np.float_(obs['LATITUDE'])
    obs_df['longitude'] = np.float_(obs['LONGITUDE'])
    
    return obs_df
    

In [5]:
strstart = "201801010000"
strend = "201806302359"

weather = pd.DataFrame()

for stid in stationids:
    # API call for stationid using start and end day
    url = baseurl + "&stid=" + stid + "&start=" + strstart + "&end=" + strend
    response = urlopen(url)
    json_data = response.read().decode('utf-8', 'replace')
    data = json.loads(json_data)
        
    # obtain station observations from JSON and append to data frame
    try:
        station = json_normalize(data['STATION'], errors='ignore')
    except:
        print("No data for " + stid + " on " + strstart)
    else:
        if not station.empty:
            observations = extract_observations(station)
            # dump station data to csv
            filename = strstart + "_" + stid + ".csv"
            observations.to_csv(outputPath + filename, header=True, index=False)
            weather = weather.append(observations, ignore_index = True)

# full dump of annual weather data
filename = strstart + "_all" + ".csv"
weather.to_csv(outputPath + filename, header=True, index=False)

No data for PBUC1 on 201801010000
No data for PUR69 on 201801010000
No data for PUR91 on 201801010000
No data for PUR92 on 201801010000
No data for PG319 on 201801010000
No data for A3610 on 201801010000
No data for SE264 on 201801010000
No data for SE265 on 201801010000
No data for SE266 on 201801010000
No data for PG366 on 201801010000
No data for PG376 on 201801010000
No data for PG393 on 201801010000
No data for SE299 on 201801010000
No data for SE313 on 201801010000
No data for LIB04 on 201801010000
No data for LIB06 on 201801010000
No data for SE316 on 201801010000
No data for SE317 on 201801010000
No data for PG432 on 201801010000
No data for PG426 on 201801010000
No data for SE318 on 201801010000
No data for SE321 on 201801010000
No data for SE322 on 201801010000
No data for PG423 on 201801010000
No data for P042C on 201801010000
No data for PG425 on 201801010000
No data for PG428 on 201801010000
No data for SE323 on 201801010000
No data for SE324 on 201801010000
No data for SE