In [1]:
# Run pip to install prerequisites
!pip install pandas termcolor

import pandas as pd
from termcolor import colored
import zipfile
import os



In [2]:
# Look for this file in our cwd
dataname = 'data/LFPO.txt'
zipname = '../src/wx.zip'

# Extract zip if necessary
if not os.path.isfile(dataname):
    try: 
        zip_ref = zipfile.ZipFile(zipname, 'r')
        zip_ref.extractall('data')
        zip_ref.close()
    except FileNotFoundError as e:
        print(colored("Source zipfile not found: \n\t{}".format(e), 'red'))
        raise e
        
# Use the unix head command to show the source data file
! head data/LFPO.txt 

station,valid,tmpf, dwpf, relh, drct, sknt, p01i, alti, mslp, vsby, gust, skyc1, skyc2, skyc3, skyc4, skyl1, skyl2, skyl3, skyl4, wxcodes, metar
LFPO,2006-12-16 00:00,41.00,37.40,86.89,210.00,8.00,M,30.21,M,6.21,M,M,M,M,M,M,M,M,M,M,LFPO 160000Z 21008KT CAVOK 05/03 Q1023 NOSIG
LFPO,2006-12-16 00:30,41.00,37.40,86.89,210.00,6.00,M,30.21,M,6.21,M,M,M,M,M,M,M,M,M,M,LFPO 160030Z 21006KT CAVOK 05/03 Q1023 NOSIG
LFPO,2006-12-16 01:00,39.20,37.40,93.19,190.00,5.00,M,30.21,M,6.21,M,M,M,M,M,M,M,M,M,M,LFPO 160100Z 19005KT CAVOK 04/03 Q1023 NOSIG
LFPO,2006-12-16 01:30,39.20,37.40,93.19,180.00,6.00,M,30.21,M,6.21,M,M,M,M,M,M,M,M,M,M,LFPO 160130Z 18006KT CAVOK 04/03 Q1023 NOSIG
LFPO,2006-12-16 02:00,39.20,35.60,86.79,170.00,8.00,M,30.21,M,6.21,M,M,M,M,M,M,M,M,M,M,LFPO 160200Z 17008KT CAVOK 04/02 Q1023 NOSIG
LFPO,2006-12-16 02:30,39.20,35.60,86.79,190.00,9.00,M,30.21,M,6.21,M,M,M,M,M,M,M,M,M,M,LFPO 160230Z 19009KT CAVOK 04/02 Q1023 NOSIG
LFPO,2006-12-16 03:00,37.40,35.60,93.14,180.00,8.00,M,30

| Alias        | Original Variable Name | Description |
|:------------------|:------------|:------------|
|station            | station     | three or four character site identifier |
|dtime              | valid       | timestamp of the observation |
|air_temp           | tmpf        | Air Temperature converted to Celsius: typically @ 2 meters |
|dew_pt             | dwpf        | Dew Point Temperature converted to Celsius: typically @ 2 meters |
|rel_humidity       | relh        | Relative Humidity in % |
|wind_direction     | drct        | Wind Direction in degrees from north |
|wind_speed_knot    | sknt        | Wind Speed converted to km/h |
|precip_since_reset | p01i        | One hour precipitation for the period from the observation time to the time of the previous hourly precipitation reset. This varies slightly by site. Values are converted to cm. This value may or may not contain frozen precipitation melted by some device on the sensor or estimated by some other means. Unfortunately we do not know of an authoritative database denoting which station has which sensor. |
|altimeter          | alti        | Pressure altimeter converted to cm |
|sea_level_pressure | mslp        | Sea Level Pressure in millibar |
|visibility         | vsby        | Visibility converted to km |
|wind_gust_knot     | gust        | Wind Gust converted to km/h |
|sky_cov_l1         | skyc1       | Sky Level 1 Coverage |
|sky_cov_l1         | skyc2       | Sky Level 2 Coverage |
|sky_cov_l1         | skyc3       | Sky Level 3 Coverage |
|sky_cov_l1         | skyc4       | Sky Level 4 Coverage |
|sky_alt_l1         | skyl1       | Sky Level 1 Altitude converted to meters |
|sky_alt_l1         | skyl2       | Sky Level 2 Altitude converted to meters |
|sky_alt_l1         | skyl3       | Sky Level 3 Altitude converted to meters |
|sky_alt_l1         | skyl4       | Sky Level 4 Altitude converted to meters |
|weather_codes      | wxcodes     | Present Weather Codes (space seperated) |
|metar              | metar       | unprocessed reported observation in METAR form |

In [3]:
# Use pandas' read_csv, but don't worry about dates here, it seems to be much faster to do it after
wf = pd.read_csv(
    dataname,
    delimiter=',',
    header=0,
    na_values='M',
    names=[
        'station', 'dtime', 'air_temp',  'dew_pt',  'rel_humidity',
        'wind_direction',    'wind_speed',  'precip_since_reset',  'altimeter',  'sea_level_pressure',
        'visibility',    'wind_gust',  'sky_cov_l1', 'sky_cov_l2', 'sky_cov_l3',
        'sky_cov_l4',   'sky_alt_l1', 'sky_alt_l2', 'sky_alt_l3', 'sky_alt_l4',
        'weather_codes', 'metar'],
    memory_map=True,
    dtype={
        'station': 'str',
        'dtime': 'str',
        'air_temp': 'float64',
        'dew_pt': 'float64',
        'rel_humidity': 'float64',
        'wind_direction': 'float64',
        'wind_speed': 'float64',
        'precip_since_reset': 'float64',
        'altimeter': 'float64',
        'sea_level_pressure': 'float64',
        'visibility': 'float64',
        'wind_gust': 'float64',
        'sky_cov_l1': 'str',
        'sky_cov_l2': 'str',
        'sky_cov_l3': 'str',
        'sky_cov_l4': 'str',
        'sky_alt_l1': 'float64',
        'sky_alt_l2': 'float64',
        'sky_alt_l3': 'float64',
        'sky_alt_l4': 'float64',
        'weather_codes': 'str',
        'metar': 'str'
    }
)

In [4]:
# Now parse the timestamps and index on them
wf.dtime = pd.to_datetime(wf.dtime, infer_datetime_format=True)
wf.set_index('dtime', inplace=True, drop=True)

# Convert knots to km/h
knot_to_kph = lambda knot: knot * 1.852
wf.wind_gust  = wf.wind_gust.apply(knot_to_kph)
wf.wind_speed = wf.wind_speed.apply(knot_to_kph)

# Convert Fahrenheit to Celsius
f_to_c = lambda f: ( f - 32 ) * 5 / 9
wf.air_temp = wf.air_temp.apply(f_to_c)
wf.dew_pt   = wf.dew_pt.apply(f_to_c)

# Convert feet to meters
feet_to_meter = lambda feet: feet / 3.281
wf.sky_alt_l1 = wf.sky_alt_l1.apply(feet_to_meter)
wf.sky_alt_l2 = wf.sky_alt_l2.apply(feet_to_meter)
wf.sky_alt_l3 = wf.sky_alt_l3.apply(feet_to_meter)
wf.sky_alt_l4 = wf.sky_alt_l4.apply(feet_to_meter)

# Convert miles to km
mile_to_km = lambda mile: mile * 1.60934
wf.visibility = wf.visibility.apply(mile_to_km)

# Convert inches to cm
inch_to_cm = lambda inch: inch * 2.54
wf.precip_since_reset = wf.precip_since_reset.apply(inch_to_cm)
wf.altimeter          = wf.altimeter.apply(inch_to_cm)

In [5]:
wf.drop(['metar'],axis=1,inplace=True)
wf.to_pickle('data/weather.pickle')
wf

Unnamed: 0_level_0,station,air_temp,dew_pt,rel_humidity,wind_direction,wind_speed,precip_since_reset,altimeter,sea_level_pressure,visibility,wind_gust,sky_cov_l1,sky_cov_l2,sky_cov_l3,sky_cov_l4,sky_alt_l1,sky_alt_l2,sky_alt_l3,sky_alt_l4,weather_codes
dtime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2006-12-16 00:00:00,LFPO,5.0,3.0,86.89,210.0,14.816,,76.7334,,9.994001,,,,,,,,,,
2006-12-16 00:30:00,LFPO,5.0,3.0,86.89,210.0,11.112,,76.7334,,9.994001,,,,,,,,,,
2006-12-16 01:00:00,LFPO,4.0,3.0,93.19,190.0,9.260,,76.7334,,9.994001,,,,,,,,,,
2006-12-16 01:30:00,LFPO,4.0,3.0,93.19,180.0,11.112,,76.7334,,9.994001,,,,,,,,,,
2006-12-16 02:00:00,LFPO,4.0,2.0,86.79,170.0,14.816,,76.7334,,9.994001,,,,,,,,,,
2006-12-16 02:30:00,LFPO,4.0,2.0,86.79,190.0,16.668,,76.7334,,9.994001,,,,,,,,,,
2006-12-16 03:00:00,LFPO,3.0,2.0,93.14,180.0,14.816,,76.6572,,9.994001,,FEW,BKN,,,213.349589,7619.628162,,,
2006-12-16 03:30:00,LFPO,4.0,3.0,93.19,190.0,12.964,,76.6572,,9.994001,,BKN,,,,121.914051,,,,
2006-12-16 04:00:00,LFPO,4.0,3.0,93.19,200.0,14.816,,76.6572,,9.994001,,BKN,BKN,,,121.914051,7619.628162,,,
2006-12-16 04:30:00,LFPO,4.0,3.0,93.19,190.0,16.668,,76.6572,,8.996211,,BKN,,,,152.392563,,,,
