In [79]:
from datetime import datetime
import pandas as pd
from metpy.io import parse_metar_to_dataframe
import numpy as np

In [56]:
file = 'data/64010KMKG200001.dat'
df  = pd.read_csv('data/64010KMKG200001.dat', header=None)

In [77]:
def parse_metar_file(file, wx_subset=True):
    """
    Parses METAR file from NCDC
    
    Input:
    --------
    file = Text file downloaded from NCDC
    
    wx_subset = Flag to determine whether or not to drop non-current weather obs (if True, only returns obs with observed weather)
    
    Output:
    --------
    df = Pandas dataframe filtered for times where current weather is not 'nan'
    """
    
    # Read in the file using Pandas
    pd.read_csv(file, header=None)
    
    # Pull the timestamp from the filename
    timestamp = datetime.strptime(file[-10:], '%Y%m.dat')
    
    # Iterrate over rows to parse METARS
    df_list = []
    for index, row in df.iterrows():
        df_list.append(parse_metar_to_dataframe(row.values[0][52:], year=timestamp.year, month=timestamp.month))
    
    # 
    merged_df = pd.concat(df_list)
    
    # Drop datasets that do not include current weather
    merged_df = merged_df.dropna(subset=['current_wx1'])
    
    # Change the index to datetime
    merged_df.index = merged_df.date_time
    
    # Return the merged dataset sorted by datetime
    return merged_df.sort_index()

In [78]:
parse_metar_file(file)

Unnamed: 0_level_0,station_id,latitude,longitude,elevation,date_time,wind_direction,wind_speed,current_wx1,current_wx2,current_wx3,...,cloud_coverage,air_temperature,dew_point_temperature,altimeter,present_weather,past_weather,past_weather2,air_pressure_at_sea_level,eastward_wind,northward_wind
date_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2000-01-01 00:00:00,KMKG,43.17,-86.23,191,2000-01-01 00:00:00,270.0,7.0,-SN,,,...,8,-3.0,-8.0,29.83,71,0,0,,7.000000,1.285879e-15
2000-01-01 00:05:00,KMKG,43.17,-86.23,191,2000-01-01 00:05:00,280.0,10.0,-SN,,,...,8,-3.0,-7.0,29.83,71,0,0,,9.848078,-1.736482e+00
2000-01-01 00:10:00,KMKG,43.17,-86.23,191,2000-01-01 00:10:00,280.0,11.0,-SN,,,...,8,-3.0,-8.0,29.83,71,0,0,,10.832885,-1.910130e+00
2000-01-01 00:15:00,KMKG,43.17,-86.23,191,2000-01-01 00:15:00,290.0,7.0,-SN,,,...,8,-3.0,-7.0,29.83,71,0,0,,6.577848,-2.394141e+00
2000-01-01 00:20:00,KMKG,43.17,-86.23,191,2000-01-01 00:20:00,280.0,9.0,-SN,,,...,8,-3.0,-7.0,29.83,71,0,0,,8.863270,-1.562834e+00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2000-01-31 23:35:00,KMKG,43.17,-86.23,191,2000-01-31 23:35:00,280.0,10.0,-SN,,,...,8,-3.0,-7.0,29.82,71,0,0,,9.848078,-1.736482e+00
2000-01-31 23:40:00,KMKG,43.17,-86.23,191,2000-01-31 23:40:00,280.0,8.0,-SN,,,...,8,-3.0,-9.0,29.82,71,0,0,,7.878462,-1.389185e+00
2000-01-31 23:45:00,KMKG,43.17,-86.23,191,2000-01-31 23:45:00,280.0,9.0,-SN,,,...,8,-3.0,-7.0,29.82,71,0,0,,8.863270,-1.562834e+00
2000-01-31 23:50:00,KMKG,43.17,-86.23,191,2000-01-31 23:50:00,290.0,10.0,-SN,,,...,8,-3.0,-8.0,29.83,71,0,0,,9.396926,-3.420201e+00
