In [1]:
import pandas as pd

In [2]:
data_extent = 'box'
data_base_path = '../data/NOAA_weather_data/'

start_year = 1992
end_year = 1998
data_years = range(start_year, end_year, 1)

data_types = [
    'air.sfc',  # Surface air temp
    'air.2m',   # Air temp. at 2 meters above surface
    'apcp',     # Accumulated precipitation
    'crain',    # Catagorical rain at surface
    'rhum.2m',  # Relative humidity 2 meters above surface
    'dpt.2m',   # Dew point temp. 2 meters above surface
    'pres.sfc', # Pressure at surface
    'uwnd.10m', # u component of wind (positive = from west) 10 meters above surface
    'vwnd.10m', # v component of wind (positive = from south) 10 meters above surface
    'veg',      # Vegitation at surface
    'dlwrf',    # Downward long wave radiation flux at surface
    'dswrf',    # Downward short wave radiation flux at surface
    'lcdc',     # Low cloud area fraction
    'hcdc',     # High cloud area fraction
    'mcdc',     # Medium cloud area fraction
    'hpbl',     # Planetary boundry layer height
    'prate',    # Precipitation rate
    'vis',      # Visibility
    'ulwrf.sfc' # Upward long wave radiation flux at surface
]

In [3]:
master_df = pd.DataFrame()

for data_year in data_years:
    
    # read first dataset into dataframe so we have something to join with
    first_data_type = data_types[0]
    input_file = f'{data_base_path}california_{data_extent}/{data_year}_california_{data_extent}_{first_data_type}.csv'
    df = pd.read_csv(input_file)

    for data_type in data_types[1:]: # start loop on second data_type, used first to initalize dataframe
        input_file = f'{data_base_path}california_{data_extent}/{data_year}_california_{data_extent}_{data_type}.csv'
        incomming_df = pd.read_csv(input_file)
        df = pd.merge(df, incomming_df, on=['lat', 'lon', 'time'], how='outer')
        
    master_df = pd.concat([master_df, df])

In [4]:
output_file = f'{data_base_path}{start_year}-{end_year - 1}_california_{data_extent}_all.csv'
master_df.reset_index(drop=True, inplace=True)
master_df.set_index(['time', 'lat', 'lon'], drop=True, inplace=True)
master_df = master_df.fillna(0)
master_df.to_csv(output_file, header=True, index=True)

In [5]:
master_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,air.sfc,air.2m,apcp,crain,rhum.2m,dpt.2m,pres.sfc,uwnd.10m,vwnd.10m,veg,dlwrf,dswrf,lcdc,hcdc,mcdc,hpbl,prate,vis,ulwrf.sfc
time,lat,lon,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1992-01-01 00:00:00,31.63322,-125.3449,288.66638,289.13074,0.367188,1.0,77.1875,285.05447,101573.56,-1.135603,6.42775,0.0,345.125,41.9375,46.0,100.0,17.0,802.15784,3.7e-05,20007.979,382.25
1992-01-01 00:00:00,31.90358,-125.4247,288.66638,289.13074,0.304688,1.0,77.1875,285.05447,101573.56,-1.268416,6.084,0.0,345.125,41.9375,46.0,100.0,6.0,538.5578,2.7e-05,20007.979,382.25
1992-01-01 00:00:00,31.70029,-125.0272,288.66638,289.06824,0.304688,1.0,78.8125,285.3123,101573.56,-1.323103,6.130875,0.0,338.4375,44.8125,34.0,100.0,4.0,439.7578,2.7e-05,20007.979,382.3125
1992-01-01 00:00:00,31.97103,-125.1057,288.41638,288.88074,0.210938,1.0,79.6875,285.33572,101573.56,-1.502791,5.880875,0.0,338.375,42.5625,31.0,98.0,0.0,63.85781,1.7e-05,20007.979,381.1875
1992-01-01 00:00:00,32.24205,-125.1849,288.22888,288.69324,0.210938,1.0,81.6875,285.4998,101573.56,-1.573103,5.779312,0.0,338.4375,40.625,34.0,88.0,0.0,75.157814,1.7e-05,20007.979,380.0625


In [6]:
master_df.tail()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,air.sfc,air.2m,apcp,crain,rhum.2m,dpt.2m,pres.sfc,uwnd.10m,vwnd.10m,veg,dlwrf,dswrf,lcdc,hcdc,mcdc,hpbl,prate,vis,ulwrf.sfc
time,lat,lon,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1997-12-31 21:00:00,41.91082,-113.4242,273.59113,273.79834,0.015625,0.0,73.33948,269.53833,79997.05,-2.000885,5.253265,15.3,264.9375,221.8125,0.0,100.0,99.0,434.36115,6e-06,20008.086,314.875
1997-12-31 21:00:00,42.19902,-113.4577,278.34113,277.61084,0.023438,0.0,54.901978,269.31958,82397.05,-2.008697,5.253265,25.7,271.5625,209.625,0.0,100.0,99.0,685.4611,6e-06,20008.086,330.9375
1997-12-31 21:00:00,42.48738,-113.4915,274.27863,274.04834,0.015625,0.0,83.65198,271.5852,85497.05,-3.821197,1.11264,30.1,268.9375,215.75,0.0,100.0,99.0,681.16113,6e-06,20008.086,317.875
1997-12-31 21:00:00,42.77593,-113.5258,276.77863,274.92334,0.015625,0.0,73.83948,270.73364,86697.05,-2.446197,0.17514,23.9,269.8125,204.0625,0.0,96.0,93.0,904.2611,-4e-06,20008.086,325.1875
1997-12-31 21:00:00,42.80042,-113.1324,276.46613,274.23584,0.015625,0.0,73.58948,270.03052,85297.05,-2.086822,0.839203,15.6,265.0,200.25,0.0,98.0,98.0,1021.9611,-4e-06,20008.086,323.625
