In [24]:
import pandas as pd
import numpy as np
import os
import rasterio 
from rasterio.transform import xy
import geopandas as gpd
from rasterio.features import geometry_mask
from shapely.geometry import Point

In [16]:

bil_file = r'C:\Users\yegor\Desktop\Honors Thesis\Data\Temperature\PRISM_tmean_stable_4kmD2_20190101_20191231_bil\PRISM_tmean_stable_4kmD2_20190101_bil.bil'

with rasterio.open(bil_file) as dataset:
    tmean_data = dataset.read(1)  # temp data
    affine = dataset.transform  # transformation matrix
    crs = dataset.crs  # coordinates
    bounds = dataset.bounds  # geo bounds
    
    print(f"Temperature Data Shape: {tmean_data.shape}")
    print(f"Affine Transformation: {affine}")
    print(f"Coordinate Reference System: {crs}")
    print(f"Geographic Bounds: {bounds}")
    
    print("First 5 rows and columns of temperature data:")
    print(tmean_data[:5, :5])  #top left 5x5 tempt data


Temperature Data Shape: (621, 1405)
Affine Transformation: | 0.04, 0.00,-125.02|
| 0.00,-0.04, 49.94|
| 0.00, 0.00, 1.00|
Coordinate Reference System: OGC:CRS83
Geographic Bounds: BoundingBox(left=-125.0208333333335, bottom=24.0624999997925, right=-66.4791666661985, top=49.9374999999995)
First 5 rows and columns of temperature data:
[[-9999. -9999. -9999. -9999. -9999.]
 [-9999. -9999. -9999. -9999. -9999.]
 [-9999. -9999. -9999. -9999. -9999.]
 [-9999. -9999. -9999. -9999. -9999.]
 [-9999. -9999. -9999. -9999. -9999.]]


In [14]:
bil_file = r'C:\Users\yegor\Desktop\Honors Thesis\Data\Temperature\PRISM_tmean_stable_4kmD2_20190101_20191231_bil\PRISM_tmean_stable_4kmD2_20190101_bil.bil'
with rasterio.open(bil_file) as dataset:
    tmean_data = dataset.read(1)  #first band is temp data
    affine = dataset.transform  # transformation matric
    crs = dataset.crs  # coordinate refernce system

    valid_temperature_data = np.ma.masked_equal(tmean_data, -9999) # mask "missing values"
    
    # row col of missing
    valid_indices = np.argwhere(valid_temperature_data.mask == False)

    # conversion to coordinates
    row, col = valid_indices[0]
    lon, lat = rasterio.transform.xy(affine, row, col, offset='center')
    
    print(f"First valid temperature value: {valid_temperature_data[row, col]}")
    print(f"Corresponding coordinates: Longitude = {lon}, Latitude = {lat}")


First valid temperature value: -24.99700164794922
Corresponding coordinates: Longitude = -95.124999999761, Latitude = 49.416666666662


In [15]:
bil_file = r'C:\Users\yegor\Desktop\Honors Thesis\Data\Temperature\PRISM_tmean_stable_4kmD2_20190101_20191231_bil\PRISM_tmean_stable_4kmD2_20190402_bil.bil'
with rasterio.open(bil_file) as dataset:
    tmean_data = dataset.read(1)  #first band is temp data
    affine = dataset.transform  # transformation matric
    crs = dataset.crs  # coordinate refernce system

    valid_temperature_data = np.ma.masked_equal(tmean_data, -9999) # mask "missing values"
    
    # row col of missing
    valid_indices = np.argwhere(valid_temperature_data.mask == False)

    # conversion to coordinates
    row, col = valid_indices[0]
    lon, lat = rasterio.transform.xy(affine, row, col, offset='center')
    
    print(f"First valid temperature value: {valid_temperature_data[row, col]}")
    print(f"Corresponding coordinates: Longitude = {lon}, Latitude = {lat}")

First valid temperature value: 0.0024999999441206455
Corresponding coordinates: Longitude = -95.124999999761, Latitude = 49.416666666662


In [20]:
temperature_records = []

# Read the .bil file (example: January 1, 2019)
bil_file = r'C:\Users\yegor\Desktop\Honors Thesis\Data\Temperature\PRISM_tmean_stable_4kmD2_20190101_20191231_bil\PRISM_tmean_stable_4kmD2_20190101_bil.bil'

with rasterio.open(bil_file) as dataset:
    tmean_data = dataset.read(1)  
    affine = dataset.transform  

    #mask data
    tmean_data = np.ma.masked_equal(tmean_data, -9999)
    
    #loop through the grid and store valid data
    for row in range(tmean_data.shape[0]):
        for col in range(tmean_data.shape[1]):
            if not tmean_data.mask[row, col]:  #valid data only
                lon, lat = xy(affine, row, col)  #geo coordinate conversion
                temperature = tmean_data[row, col]
                #store as dictionary 
                temperature_records.append({
                    'latitude': lat,
                    'longitude': lon,
                    'temperature': temperature,
                    'date': '2019-01-01'  
                })


df = pd.DataFrame(temperature_records)

In [21]:
df

Unnamed: 0,latitude,longitude,temperature,date
0,49.416667,-95.125000,-24.997002,2019-01-01
1,49.375000,-95.166667,-24.939001,2019-01-01
2,49.375000,-95.125000,-24.968000,2019-01-01
3,49.375000,-95.083333,-25.041000,2019-01-01
4,49.375000,-95.041667,-25.163002,2019-01-01
...,...,...,...,...
481626,24.541667,-81.666667,25.906002,2019-01-01
481627,24.541667,-81.625000,25.937002,2019-01-01
481628,24.500000,-82.166667,26.131001,2019-01-01
481629,24.500000,-82.125000,26.119001,2019-01-01


**Takes about 15-20 seconds to run one day into full output, need to aggregate by county first. County lines can change slightly over time, multiple solutions (depends on how mortality data would be defined). for instance, if they just give address for mortality, then can just aggregate to modern county lines.**



In [None]:


counties = gpd.read_file(county_shapefile)

print(counties.columns) 

counties = counties.to_crs('OGC:CRS83') # would have to loop this in for given FILE
for row in range(valid_temperature_data.shape[0]):
            for col in range(valid_temperature_data.shape[1]):
                if not valid_temperature_data.mask[row, col]:  # only process valid 
                    lon, lat = rasterio.transform.xy(affine, row, col)  #convert to coords
                    temperature = valid_temperature_data[row, col]

                    # point for gril cell
                    point = Point(lon, lat)

                    #find which county this point falls into using spatial join
                    for county in counties.itertuples():
                        if county.geometry.contains(point):
                            #append temperature and county FIPS code
                            county_temperatures.append((county.GEOID, temperature))