In [3]:
import numpy as np
import pandas as pd
from netCDF4 import Dataset

#import function for state geo data boundaries
import state_locations

#functions for datetime management
import datetime
from cftime import num2pydate

In [38]:
#### Loads netcdf file and important variables ####
# Note: operates over one day of data based on the daily nature of each MERRA file # 
def get_data_from_netcdf(filename):
    data = Dataset(filename)
    
    ### Extracts location variables and other important variables from "data" ###
    lons = data.variables['lon'][:]
    lats = data.variables['lat'][:]
    tasmin = data.variables['tasmin'][:,:,:]
    
    # uses numbers and atributes from the CF time library to get the actual date
    times = data.variables['time']
    
    # this function just creates an object with python data objects
    time = num2pydate(times,units=times.units,calendar=times.calendar)
    
    # cycle to get every date from the object from the previous function and turn it to string
    #for i in range(len(time)):
    #    time[i] = time[i].strftime('%m/%d/%Y')
    
    # print the variables saved on the dataset
    #print(data.variables)
        
    # Consolidates time, location, and variables of interest into one dataframe ###
    names = ['t','y','x']
    index = pd.MultiIndex.from_product([time,lats,lons], names=names)
    df = pd.DataFrame({'tasmin': tasmin.flatten()}, index=index)['tasmin']
    df.index.names = ['time', 'lats', 'lons']
    df = df.reset_index(level=['time', 'lats', 'lons'])
    #print(df)
    
    # get the data for the state of interest
    W, E, S, N = state_locations.get_state_loc('IN')
    
    # create a subset from the bigger dataframe
    INDIANA= df[((df["lats"]> S)&(df["lats"]< N))&((df["lons"]> W)&(df["lons"]< E))]
    
    # getting the first point for each day
    point = INDIANA.drop_duplicates("time")
    
    # filter by single year
    for i in range(2041,2051):
        # convert range to str 
        i = str(i)
        # select every subset by year
        out = point[point['time'].dt.strftime('%Y') == i]
        # output to a csv file
        out.to_csv(f'SII-CLIM-TASMIN-{i}.csv',index=False)
        
    
    # output to a csv file
    #INDIANA.to_csv('indiana.csv',index=False)

    
    
    


In [39]:
get_data_from_netcdf("/Users/inunezfe/Documents/data/TAS_MIN/tasmin_day_HadGEM2-ES_rcp45_r1i1p1_EWEMBI_20410101-20501231.nc4")