In [None]:
!pip install xarray netCDF4 pandas

Collecting netCDF4
  Downloading netCDF4-1.7.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.8 kB)
Collecting cftime (from netCDF4)
  Downloading cftime-1.6.4.post1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.7 kB)
Downloading netCDF4-1.7.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (9.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.3/9.3 MB[0m [31m45.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading cftime-1.6.4.post1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m55.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: cftime, netCDF4
Successfully installed cftime-1.6.4.post1 netCDF4-1.7.2


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import xarray as xr

# Combining Weather + Soil Data (from Cal-Adapt)

### Load Data

In [None]:
evapo_df = pd.read_csv("/content/fresno_evapotranspiration_1950_2013.csv")
evapo_df.rename(columns={'time': 'date', 'et_day_livneh_vic': 'daily_evapotranspiration'}, inplace=True)
evapo_df['date'] = pd.to_datetime(evapo_df['date'])
evapo_df.set_index('date', inplace=True)
evapo_df.head(3)

1950-01-01 00:00:00+00:00 2013-12-31 00:00:00+00:00


Unnamed: 0_level_0,daily_evapotranspiration
date,Unnamed: 1_level_1
1950-01-01 00:00:00+00:00,0.586743
1950-01-02 00:00:00+00:00,0.841818
1950-01-03 00:00:00+00:00,0.358946


In [None]:
maxTemp_df = pd.read_csv("/content/fresno_maxTemp_1950_2013.csv")
maxTemp_df.rename(columns={'time': 'date', 'tasmax_day_livneh': 'daily_maxTemp'}, inplace=True)
maxTemp_df['date'] = pd.to_datetime(maxTemp_df['date'])
maxTemp_df.set_index('date', inplace=True)
maxTemp_df.head(3)

Unnamed: 0_level_0,daily_maxTemp
date,Unnamed: 1_level_1
1950-01-01 00:00:00+00:00,6.173168
1950-01-02 00:00:00+00:00,3.339462
1950-01-03 00:00:00+00:00,1.377536


In [None]:
minTemp_df = pd.read_csv("/content/fresno_minTemp_1950_2013.csv")
minTemp_df.rename(columns={'time': 'date', 'tasmin_day_livneh': 'daily_minTemp'}, inplace=True)
minTemp_df['date'] = pd.to_datetime(minTemp_df['date'])
minTemp_df.set_index('date', inplace=True)
minTemp_df.head(3)

Unnamed: 0_level_0,daily_minTemp
date,Unnamed: 1_level_1
1950-01-01 00:00:00+00:00,-2.329379
1950-01-02 00:00:00+00:00,-5.007868
1950-01-03 00:00:00+00:00,-10.638012


In [None]:
precip_df = pd.read_csv("/content/fresno_precipitation_1950_2013.csv")
precip_df.rename(columns={'time': 'date', 'pr_day_livneh': 'daily_precipitation'}, inplace=True)
precip_df['date'] = pd.to_datetime(precip_df['date'])
precip_df.set_index('date', inplace=True)
precip_df.head(3)

Unnamed: 0_level_0,daily_precipitation
date,Unnamed: 1_level_1
1950-01-01 00:00:00+00:00,1.233334
1950-01-02 00:00:00+00:00,4.585143
1950-01-03 00:00:00+00:00,0.351538


In [None]:
soilMoisture_df = pd.read_csv("/content/fresno_soilMoisture_1950_2013.csv")
soilMoisture_df.rename(columns={'time': 'date', 'soilmoist1_day_livneh_vic': 'daily_soilMoisture'}, inplace=True)
soilMoisture_df['date'] = pd.to_datetime(soilMoisture_df['date'])
soilMoisture_df.set_index('date', inplace=True)
soilMoisture_df.head(3)

Unnamed: 0_level_0,daily_soilMoisture
date,Unnamed: 1_level_1
1950-01-01 00:00:00+00:00,18.382278
1950-01-02 00:00:00+00:00,19.044817
1950-01-03 00:00:00+00:00,19.073204


In [None]:
windSpeed = pd.read_csv("/content/fresno_windspeed_1940_2021.csv")
windSpeed.rename(columns={'time': 'date', 'wspeed_day_hadisd': 'daily_windSpeed'}, inplace=True)
windSpeed.set_index('date', inplace=True)
windSpeed.head(3)

Unnamed: 0_level_0,daily_windSpeed
date,Unnamed: 1_level_1
1961-07-01 00:00:00+00:00,4.9
1961-07-02 00:00:00+00:00,5.2
1961-07-03 00:00:00+00:00,6.45


In [None]:
# combine datasets

combined_df = pd.concat([evapo_df, maxTemp_df, minTemp_df, precip_df], axis=1)
combined_df.head(13)

Unnamed: 0_level_0,daily_evapotranspiration,daily_maxTemp,daily_minTemp,daily_precipitation
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1950-01-01 00:00:00+00:00,0.586743,6.173168,-2.329379,1.233334
1950-01-02 00:00:00+00:00,0.841818,3.339462,-5.007868,4.585143
1950-01-03 00:00:00+00:00,0.358946,1.377536,-10.638012,0.351538
1950-01-04 00:00:00+00:00,0.212138,0.540207,-11.244327,0.086464
1950-01-05 00:00:00+00:00,0.194017,3.423975,-10.967143,0.007825
1950-01-06 00:00:00+00:00,0.228749,6.741926,-9.041491,0.0
1950-01-07 00:00:00+00:00,0.877428,7.178426,-8.083727,3.225253
1950-01-08 00:00:00+00:00,1.028151,4.994618,-3.684161,13.458984
1950-01-09 00:00:00+00:00,0.785838,5.247143,-7.846625,5.030817
1950-01-10 00:00:00+00:00,1.130882,7.073271,-6.853043,5.297155


In [None]:
combined_df.to_csv('merged_weather_dataset_fresno.csv', index=True)

# Climate Data from CRU TS Dataset
- Climatic Research Unit gridded Time Series

## Extracting county-specific data

In [None]:
PET_data = xr.open_dataset("/content/CRU_PET_1901.2023.pet.dat.nc")
print(PET_data)

<xarray.Dataset> Size: 2GB
Dimensions:  (lon: 720, lat: 360, time: 1476)
Coordinates:
  * lon      (lon) float32 3kB -179.8 -179.2 -178.8 -178.2 ... 178.8 179.2 179.8
  * lat      (lat) float32 1kB -89.75 -89.25 -88.75 -88.25 ... 88.75 89.25 89.75
  * time     (time) datetime64[ns] 12kB 1901-01-16 1901-02-15 ... 2023-12-16
Data variables:
    pet      (time, lat, lon) float32 2GB ...
Attributes:
    Conventions:  CF-1.4
    title:        CRU TS4.08 Potential Evapotranspiration
    institution:  Data held at British Atmospheric Data Centre, RAL, UK.
    source:       Run ID = 2406270856. Data generated from:PET derived from m...
    history:      Thu 27 Jun 13:56:56 BST 2024 : User f098 : Program makegrid...
    references:   Information on the data is available at http://badc.nerc.ac...
    comment:      Access to these data is available to any registered CEDA user.
    contact:      support@ceda.ac.uk


In [43]:
# enter central coords for a California county
county_name = "Tulare"
county_lat = 36.25
county_lon = -118.75

# dataset has a 0.5 x 0.5 spatial resolution meaning each grid cell is an area of size 55 km x 55 km
county_pet = PET_data['pet'].sel(lat=county_lat, lon=county_lon, method='nearest')

# filter data for the time range
county_pet_1950_to_2023 = county_pet.sel(time=slice('1950-01', '2023-12'))

# convert to df
county_pet_1950_to_2023_df = county_pet_1950_to_2023.to_dataframe().reset_index()

In [44]:
county_pet_1950_to_2023_df.head()

Unnamed: 0,time,lon,lat,pet
0,1950-01-16,-118.75,36.25,1.3
1,1950-02-15,-118.75,36.25,1.9
2,1950-03-16,-118.75,36.25,2.6
3,1950-04-16,-118.75,36.25,4.2
4,1950-05-16,-118.75,36.25,5.5


In [45]:
filename = 'Monthly_PET_1950_2023_' + county_name + '.csv'
county_pet_1950_to_2023_df.to_csv(filename, index=False)