In [14]:
import pandas as pd
import numpy as np
import glob
from netCDF4 import Dataset, num2date
from datetime import datetime as dt

In [15]:
# Helper function to get the index of longitude and latitude
def get_index(_val, _array):
    return (np.abs(_array - _val)).argmin()


In [16]:
#load the file containing centroid of the 21 pixels
locations_df = pd.read_csv('station_07AA002.csv')

In [17]:
# load the netcdf files for all of Jan 
files = glob.glob('Jan1981/*.nc4')

In [18]:
variable_extracted = "AvgSurfT_inst" # variable to extract from netcdfs

In [19]:
lats = locations_df.latitude.values
lons = locations_df.longitude.values
pixel_ids = locations_df.PIXEL_ID.values

In [20]:
hourly_dfs_list = []
for _, file in enumerate(files):
    nc_data = Dataset(file, 'r')
    temp_pxl = []
    temp_var = []
    temp_hr = []
    temp_yr = []
    temp_mon = []
    temp_day = []
    for lat, lon, pixel_id in zip(lats, lons, pixel_ids):
        times = num2date(nc_data.variables['time'][:], nc_data.variables['time'].units, nc_data.variables['time'].calendar)[:]
        times_formatted = [dt(x.year,x.month,x.day,x.hour,x.minute) for x in times]
        _hr = times_formatted[0].hour
        _day = times_formatted[0].day
        _yr = times_formatted[0].year
        _mon = times_formatted[0].month
        nc_lats = nc_data['lat'][:]
        nc_lons = nc_data['lon'][:]
        lat_index = get_index(lat, nc_lats)
        lon_index = get_index(lon, nc_lons)
        extracted_value = nc_data[variable_extracted][:, lat_index, lon_index].data[0]
        
        temp_var.append(extracted_value)
        temp_pxl.append(pixel_id)
        temp_hr.append(_hr)
        temp_mon.append(_mon)
        temp_yr.append(_yr)
        temp_day.append(_day)
    temp_df = pd.DataFrame()
    temp_df['pixel_id'] = temp_pxl
    temp_df['year'] = temp_yr
    temp_df['month'] =temp_mon
    temp_df['day'] = temp_day
    temp_df['time'] = temp_hr
    temp_df[variable_extracted] = temp_var
    hourly_dfs_list.append(temp_df)
merged_df = pd.concat(hourly_dfs_list, axis=0)

In [21]:
merged_df['Date'] = pd.to_datetime(merged_df[['year', 'month', 'day']])
merged_df = merged_df.drop(['year', 'month', 'day', 'time', 'pixel_id'], axis=1)
merged_df = merged_df.sort_values(by=["Date"])
merged_df = merged_df.groupby(['Date']).mean().reset_index()
merged_df.to_csv('{}_Jan_1981_fullmonth_data.csv'.format(variable_extracted), index=False)