In [3]:
import rasterio
import pandas as pd
import numpy as np
import zipfile
import os

# Extracting zip files

In [6]:
# Creating list of zip files to loop through
working_dir = r"C:\Users\matta\Desktop\Documents\Python\Geolocation\climate_data\climate_files"  
os.chdir(working_dir)
zip_files = [f for f in os.listdir() if f.endswith('.zip')]

In [7]:
# Extracting zip files by looping through them 
for zip_file in zip_files:
    extract_folder = zip_file.replace('.zip', '') 
    with zipfile.ZipFile(zip_file, 'r') as zip_ref:
        zip_ref.extractall(extract_folder)
        print(f"Extracted: {zip_file} → {extract_folder}")

In [4]:
# Delete each zip file after extraction
for zip_file in zip_files:
    os.remove(zip_file)
    print(f"Deleted: {zip_file}")

# .bil file extraction to pandas

In [8]:
# Function for converting .bil file to a pandas dataframe
def bil_to_df(location, column_name):
    # Open the .bil file
    with rasterio.open(location) as src:
        data = src.read(1)  # Read the first band
        transform = src.transform  # Affine transform

    # Get row/col indices
    rows, cols = np.indices(data.shape)

    # Convert indices to coordinates (lon, lat)
    xs, ys = rasterio.transform.xy(transform, rows, cols, offset='center')

    # Flatten all arrays
    flat_data = data.ravel()
    flat_xs = np.array(xs).ravel()
    flat_ys = np.array(ys).ravel()

    # Create DataFrame
    df = pd.DataFrame({
        'longitude': flat_xs,
        'latitude': flat_ys,
        column_name: flat_data
    })

    # Filter out missing data (-9999 or nodata value)
    df = df[df[column_name] != -9999]

    return df

In [10]:
folder = r"C:\Users\matta\Desktop\Documents\Python\Geolocation\climate_data\climate_files"

files = os.listdir(folder) 
print(files)

['PRISM_ppt_30yr_normal_4kmM4_all_bil', 'PRISM_soltotal_30yr_normal_4kmM3_all_bil', 'PRISM_soltrans_30yr_normal_4kmM3_all_bil', 'PRISM_tdmean_30yr_normal_4kmM5_all_bil', 'PRISM_tmax_30yr_normal_4kmM5_all_bil', 'PRISM_tmean_30yr_normal_4kmM5_all_bil', 'PRISM_tmin_30yr_normal_4kmM5_all_bil', 'PRISM_vpdmax_30yr_normal_4kmM5_all_bil', 'PRISM_vpdmin_30yr_normal_4kmM5_all_bil']


All .bil files from https://prism.oregonstate.edu/normals/ 

# Precipitation data

In [6]:
# Creating list of .bil files for looping through
working_dir = r'C:\Users\matta\OneDrive\Documents\Python\Geolocation\climate_data\PRISM_ppt_30yr_normal_4kmM4_all_bil'
os.chdir(working_dir)
bil_files = [os.path.join(working_dir, f) for f in os.listdir() if f.endswith('.bil')]

In [7]:
label_prefixs = ['jan_', 'feb_', 'mar_', 'apr_', 'may_', 'jun_', 'jul_', 'aug_', 'sep_', 'oct_', 'nov_', 'dec_', 'annual_']
labels = [i + 'precip' for i in label_prefixs]

df = bil_to_df(bil_files[0], labels[0])

# Loop through the rest
for i, j in zip(bil_files[1:], labels[1:]): 
    temp_df = bil_to_df(i, j)
    df = df.merge(temp_df, on=['longitude', 'latitude'], how='inner')

In [8]:
df.sample(5)

Unnamed: 0,longitude,latitude,jan_precip,feb_precip,mar_precip,apr_precip,may_precip,jun_precip,jul_precip,aug_precip,sep_precip,oct_precip,nov_precip,dec_precip,annual_precip
39894,-102.5,46.958333,8.3294,9.717999,15.437699,31.216,66.096001,78.733299,61.9786,47.630497,46.385201,33.605598,13.6409,8.8207,421.593475
182066,-78.0,41.625,76.737099,59.4986,80.350098,93.014,94.2341,104.5065,113.490601,97.594795,103.644997,99.731499,85.976295,81.959801,1090.73999
75791,-88.25,45.458333,32.2938,28.7486,44.652901,72.777,86.2248,104.045296,95.162697,80.522202,88.244499,79.854599,51.808701,40.702999,805.03949
404220,-110.083333,33.416667,42.919701,38.698601,28.562698,11.4821,11.0661,9.8501,65.445099,69.904297,39.875099,27.4419,24.2516,35.584801,405.083496
361161,-87.0,35.208333,130.611801,140.070801,143.55069,132.645401,121.513893,125.843796,122.573196,108.573898,106.8526,95.332596,106.080498,160.072693,1493.723511


# Average minimum January temperature

In [9]:
# Average minimum January temperature
location = 'PRISM_tmin_30yr_normal_4kmM5_01_bil/PRISM_tmin_30yr_normal_4kmM5_01_bil.bil'

# Open the .bil file
with rasterio.open(location) as src:
    data = src.read(1)  # Read the raster values (1st band)
    transform = src.transform  # Affine transform to get coordinates

# Get row/col indices
rows, cols = np.meshgrid(np.arange(data.shape[0]), np.arange(data.shape[1]), indexing='ij')

# Convert row/col to lat/lon using the affine transform
xs, ys = rasterio.transform.xy(transform, rows, cols)

# Flatten all arrays
flat_data = data.flatten()
flat_xs = np.array(xs).flatten()
flat_ys = np.array(ys).flatten()

# Create a DataFrame
min_jan = pd.DataFrame({
          'longitude': flat_xs,
          'latitude': flat_ys,
          'value': flat_data
})

# Remove missing data (often marked with -9999)
min_jan = pd.DataFrame(min_jan[min_jan['value'] != -9999])
min_jan.rename(columns = {'value' : 'avg_min_jan_temp'}, inplace = True)
min_jan.head()

RasterioIOError: PRISM_tmin_30yr_normal_4kmM5_01_bil/PRISM_tmin_30yr_normal_4kmM5_01_bil.bil: No such file or directory

# Average annual precipitation 

In [None]:
# Average annual precipitation 
location = 'PRISM_ppt_30yr_normal_4kmM4_annual_bil/PRISM_ppt_30yr_normal_4kmM4_annual_bil.bil'

# Open the .bil file
with rasterio.open(location) as src:
    data = src.read(1)  # Read the raster values (1st band)
    transform = src.transform  # Affine transform to get coordinates

# Get row/col indices
rows, cols = np.meshgrid(np.arange(data.shape[0]), np.arange(data.shape[1]), indexing='ij')

# Convert row/col to lat/lon using the affine transform
xs, ys = rasterio.transform.xy(transform, rows, cols)

# Flatten all arrays
flat_data = data.flatten()
flat_xs = np.array(xs).flatten()
flat_ys = np.array(ys).flatten()

# Create a DataFrame
precip = pd.DataFrame({
          'longitude': flat_xs,
          'latitude': flat_ys,
          'value': flat_data
})

# Remove missing data (often marked with -9999)
precip = pd.DataFrame(precip[precip['value'] != -9999])
precip.rename(columns = {'value' : 'precipitation_mm'}, inplace = True)
precip['precipitation_inches'] = precip.precipitation_mm / 25.5 
precip.head()

# Average maximum July temperature

In [None]:
# Average maximum July temperature
location = 'PRISM_tmax_30yr_normal_4kmM5_07_bil/PRISM_tmax_30yr_normal_4kmM5_07_bil.bil'

# Open the .bil file
with rasterio.open(location) as src:
    data = src.read(1)  # Read the raster values (1st band)
    transform = src.transform  # Affine transform to get coordinates

# Get row/col indices
rows, cols = np.meshgrid(np.arange(data.shape[0]), np.arange(data.shape[1]), indexing='ij')

# Convert row/col to lat/lon using the affine transform
xs, ys = rasterio.transform.xy(transform, rows, cols)

# Flatten all arrays
flat_data = data.flatten()
flat_xs = np.array(xs).flatten()
flat_ys = np.array(ys).flatten()

# Create a DataFrame
max_jul = pd.DataFrame({
          'longitude': flat_xs,
          'latitude': flat_ys,
          'value': flat_data
})

# Remove missing data (often marked with -9999)
max_jul = pd.DataFrame(max_jul[max_jul['value'] != -9999])
max_jul.rename(columns = {'value' : 'avg_max_july_temp'}, inplace = True)
max_jul.head()

# Evaporation

# Annual snow fall