In [1]:
import rasterio
import pandas as pd
import numpy as np
import zipfile
import os

# Extracting zip files

In [2]:
# Creating list of zip files to loop through
working_dir = r"C:\Users\matta\Desktop\Documents\Python\Geolocation\climate_data\climate_files"  
os.chdir(working_dir)
zip_files = [f for f in os.listdir() if f.endswith('.zip')]

In [3]:
# Extracting zip files by looping through them 
for zip_file in zip_files:
    extract_folder = zip_file.replace('.zip', '') 
    with zipfile.ZipFile(zip_file, 'r') as zip_ref:
        zip_ref.extractall(extract_folder)
        print(f"Extracted: {zip_file} → {extract_folder}")

In [4]:
# Delete each zip file after extraction
for zip_file in zip_files:
    os.remove(zip_file)
    print(f"Deleted: {zip_file}")

# .bil file extraction to pandas

In [5]:
# Function for converting .bil file to a pandas dataframe
def bil_to_df(location, column_name):
    # Open the .bil file
    with rasterio.open(location) as src:
        data = src.read(1)  # Read the first band
        transform = src.transform  # Affine transform

    # Get row/col indices
    rows, cols = np.indices(data.shape)

    # Convert indices to coordinates (lon, lat)
    xs, ys = rasterio.transform.xy(transform, rows, cols, offset='center')

    # Flatten all arrays
    flat_data = data.ravel()
    flat_xs = np.array(xs).ravel()
    flat_ys = np.array(ys).ravel()

    # Create DataFrame
    df = pd.DataFrame({
        'longitude': flat_xs,
        'latitude': flat_ys,
        column_name: flat_data
    })

    # Filter out missing data (-9999 or nodata value)
    df = df[df[column_name] != -9999]

    return df

All .bil files from https://prism.oregonstate.edu/normals/ 

In [6]:
folder = r"C:\Users\matta\Desktop\Documents\Python\Geolocation\climate_data\climate_files"

files = os.listdir(folder) 
files

['PRISM_ppt_30yr_normal_4kmM4_all_bil',
 'PRISM_soltotal_30yr_normal_4kmM3_all_bil',
 'PRISM_soltrans_30yr_normal_4kmM3_all_bil',
 'PRISM_tdmean_30yr_normal_4kmM5_all_bil',
 'PRISM_tmax_30yr_normal_4kmM5_all_bil',
 'PRISM_tmean_30yr_normal_4kmM5_all_bil',
 'PRISM_tmin_30yr_normal_4kmM5_all_bil',
 'PRISM_vpdmax_30yr_normal_4kmM5_all_bil',
 'PRISM_vpdmin_30yr_normal_4kmM5_all_bil']

# Precipitation data

In [7]:
folder = r"C:\Users\matta\Desktop\Documents\Python\Geolocation\climate_data\climate_files"
working_dir = folder + "\\" + files[0]
print(working_dir)

os.chdir(working_dir)
bil_files = [os.path.join(working_dir, f) for f in os.listdir() if f.endswith('.bil')]

label_prefixs = ['jan_', 'feb_', 'mar_', 'apr_', 'may_', 'jun_', 'jul_', 'aug_', 'sep_', 'oct_', 'nov_', 'dec_', 'annual_']
labels = [i + 'precip' for i in label_prefixs]

precip_df = bil_to_df(bil_files[0], labels[0])

# Loop through the rest
for i, j in zip(bil_files[1:], labels[1:]): 
    temp_df = bil_to_df(i, j)
    precip_df = precip_df.merge(temp_df, on=['longitude', 'latitude'], how='inner')

C:\Users\matta\Desktop\Documents\Python\Geolocation\climate_data\climate_files\PRISM_ppt_30yr_normal_4kmM4_all_bil


In [8]:
precip_df.head()

Unnamed: 0,longitude,latitude,jan_precip,feb_precip,mar_precip,apr_precip,may_precip,jun_precip,jul_precip,aug_precip,sep_precip,oct_precip,nov_precip,dec_precip,annual_precip
0,-95.125,49.416667,23.644299,17.8645,23.2388,33.131901,80.193497,108.2164,93.256996,78.835396,78.800697,57.489098,32.655701,28.477699,655.806702
1,-95.166667,49.375,23.340399,17.845299,23.2768,33.335999,80.111397,108.382996,93.154297,78.872101,78.739998,57.233898,32.302399,28.246599,654.843872
2,-95.125,49.375,23.3284,17.859999,23.2094,33.333099,80.141296,108.263199,93.254898,79.079102,78.951897,57.322598,32.318699,28.115599,655.179871
3,-95.083333,49.375,23.3076,17.8799,23.156898,33.369598,80.173401,108.18,93.333496,79.300697,79.153198,57.379898,32.320301,27.922499,655.47876
4,-95.041667,49.375,23.2824,17.8561,23.1716,33.519501,80.2285,108.062798,93.358597,79.424301,79.313301,57.4104,32.3521,27.7759,655.756287


In [9]:
precip_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 481631 entries, 0 to 481630
Data columns (total 15 columns):
 #   Column         Non-Null Count   Dtype  
---  ------         --------------   -----  
 0   longitude      481631 non-null  float64
 1   latitude       481631 non-null  float64
 2   jan_precip     481631 non-null  float32
 3   feb_precip     481631 non-null  float32
 4   mar_precip     481631 non-null  float32
 5   apr_precip     481631 non-null  float32
 6   may_precip     481631 non-null  float32
 7   jun_precip     481631 non-null  float32
 8   jul_precip     481631 non-null  float32
 9   aug_precip     481631 non-null  float32
 10  sep_precip     481631 non-null  float32
 11  oct_precip     481631 non-null  float32
 12  nov_precip     481631 non-null  float32
 13  dec_precip     481631 non-null  float32
 14  annual_precip  481631 non-null  float32
dtypes: float32(13), float64(2)
memory usage: 31.2 MB


In [10]:
precip_df.describe()

Unnamed: 0,longitude,latitude,jan_precip,feb_precip,mar_precip,apr_precip,may_precip,jun_precip,jul_precip,aug_precip,sep_precip,oct_precip,nov_precip,dec_precip,annual_precip
count,481631.0,481631.0,481631.0,481631.0,481631.0,481631.0,481631.0,481631.0,481631.0,481631.0,481631.0,481631.0,481631.0,481631.0,481631.0
mean,-99.222637,39.394686,61.122406,55.047993,65.330551,69.843147,79.661163,80.372795,72.859894,68.370918,65.707687,65.012291,59.615902,65.76416,808.71051
std,13.678842,5.383087,60.988171,48.99704,50.324203,42.143887,39.755749,46.600842,45.214645,43.340912,39.550987,38.865005,55.202122,64.518089,463.698181
min,-124.791667,24.5,4.1087,4.32,4.8805,1.4356,0.3801,0.0,0.0,0.0,0.5437,1.7043,1.3885,3.5005,46.257401
25%,-110.416667,35.208333,18.0832,18.5364,25.784599,34.354799,48.826248,36.641249,33.06045,31.1626,32.45805,32.621849,19.551049,20.63785,411.196991
50%,-99.333333,39.625,41.351799,40.016899,50.854397,67.839096,83.038498,90.027802,75.322197,71.412201,66.224602,63.010098,47.1693,44.906898,755.414001
75%,-88.833333,43.791667,89.171448,77.50835,96.253048,98.050697,110.197445,115.113148,106.4702,96.954597,93.809601,89.893246,85.793446,95.244499,1172.529968
max,-66.958333,49.416667,863.510071,710.085205,707.518799,461.249695,286.316101,282.603394,244.163193,276.665405,306.139404,554.030701,1152.823975,941.520386,5611.161133


# Solar total

In [11]:
folder = r"C:\Users\matta\Desktop\Documents\Python\Geolocation\climate_data\climate_files"
working_dir = folder + "\\" + files[1]
print(working_dir)

os.chdir(working_dir)
bil_files = [os.path.join(working_dir, f) for f in os.listdir() if f.endswith('.bil')]

label_prefixs = ['jan_', 'feb_', 'mar_', 'apr_', 'may_', 'jun_', 'jul_', 'aug_', 'sep_', 'oct_', 'nov_', 'dec_', 'annual_']
labels = [i + 'solar' for i in label_prefixs]

solar_df = bil_to_df(bil_files[0], labels[0])

# Loop through the rest
for i, j in zip(bil_files[1:], labels[1:]): 
    temp_df = bil_to_df(i, j)
    solar_df = solar_df.merge(temp_df, on=['longitude', 'latitude'], how='inner')

C:\Users\matta\Desktop\Documents\Python\Geolocation\climate_data\climate_files\PRISM_soltotal_30yr_normal_4kmM3_all_bil


In [12]:
solar_df.head()

Unnamed: 0,longitude,latitude,jan_solar,feb_solar,mar_solar,apr_solar,may_solar,jun_solar,jul_solar,aug_solar,sep_solar,oct_solar,nov_solar,dec_solar,annual_solar
0,-95.125,49.416667,4.163,7.7521,12.2248,17.154499,18.936399,20.132399,21.454699,17.6728,12.2616,7.1016,4.1895,3.109,12.179299
1,-95.166667,49.375,4.2034,7.7808,12.2378,17.135099,18.9596,20.141899,21.4953,17.682699,12.2856,7.1332,4.2125,3.1365,12.200299
2,-95.125,49.375,4.173,7.7821,12.247399,17.2092,18.9349,20.121,21.455599,17.670599,12.2525,7.0981,4.185,3.1233,12.187699
3,-95.083333,49.375,4.1495,7.7736,12.2477,17.2521,18.9135,20.104,21.439899,17.6607,12.2185,7.0544,4.1484,3.1084,12.1725
4,-95.041667,49.375,4.137,7.7938,12.2661,17.320601,18.896,20.086,21.429499,17.658499,12.1896,7.0096,4.1237,3.1109,12.1684


In [13]:
solar_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 481631 entries, 0 to 481630
Data columns (total 15 columns):
 #   Column        Non-Null Count   Dtype  
---  ------        --------------   -----  
 0   longitude     481631 non-null  float64
 1   latitude      481631 non-null  float64
 2   jan_solar     481631 non-null  float32
 3   feb_solar     481631 non-null  float32
 4   mar_solar     481631 non-null  float32
 5   apr_solar     481631 non-null  float32
 6   may_solar     481631 non-null  float32
 7   jun_solar     481631 non-null  float32
 8   jul_solar     481631 non-null  float32
 9   aug_solar     481631 non-null  float32
 10  sep_solar     481631 non-null  float32
 11  oct_solar     481631 non-null  float32
 12  nov_solar     481631 non-null  float32
 13  dec_solar     481631 non-null  float32
 14  annual_solar  481631 non-null  float32
dtypes: float32(13), float64(2)
memory usage: 31.2 MB


In [14]:
solar_df.describe()

Unnamed: 0,longitude,latitude,jan_solar,feb_solar,mar_solar,apr_solar,may_solar,jun_solar,jul_solar,aug_solar,sep_solar,oct_solar,nov_solar,dec_solar,annual_solar
count,481631.0,481631.0,481631.0,481631.0,481631.0,481631.0,481631.0,481631.0,481631.0,481631.0,481631.0,481631.0,481631.0,481631.0,481631.0
mean,-99.222637,39.394686,7.54903,10.500313,14.727626,18.923887,21.580488,23.488953,23.168907,20.544647,16.742414,12.272196,8.52984,6.522726,15.379212
std,13.678842,5.383087,2.350804,2.301095,2.494786,2.559572,2.562268,2.575814,1.990998,1.876166,2.111038,2.658449,2.57042,2.318405,2.122556
min,-124.791667,24.5,1.7757,4.2086,7.4312,12.014899,15.2876,15.2147,16.397699,14.323899,11.3673,5.9017,2.3738,1.2202,9.5755
25%,-110.416667,35.208333,5.6185,8.8152,12.7893,17.089699,19.698399,21.5977,21.5389,19.1042,15.11775,10.0362,6.349,4.5526,13.7125
50%,-99.333333,39.625,7.3754,10.178699,14.248699,18.418499,20.8773,22.6152,23.009699,20.101099,16.599699,12.3108,8.4931,6.39,15.1803
75%,-88.833333,43.791667,9.3045,12.008599,16.296049,20.3046,23.007299,25.470798,24.539099,21.9319,18.2006,14.3571,10.58115,8.3144,16.777599
max,-66.958333,49.416667,14.085799,17.2159,21.9795,26.301199,29.4107,30.789598,29.104898,26.908899,22.623499,18.495199,14.713699,12.88,20.643999


# Solar transmission

In [15]:
files

['PRISM_ppt_30yr_normal_4kmM4_all_bil',
 'PRISM_soltotal_30yr_normal_4kmM3_all_bil',
 'PRISM_soltrans_30yr_normal_4kmM3_all_bil',
 'PRISM_tdmean_30yr_normal_4kmM5_all_bil',
 'PRISM_tmax_30yr_normal_4kmM5_all_bil',
 'PRISM_tmean_30yr_normal_4kmM5_all_bil',
 'PRISM_tmin_30yr_normal_4kmM5_all_bil',
 'PRISM_vpdmax_30yr_normal_4kmM5_all_bil',
 'PRISM_vpdmin_30yr_normal_4kmM5_all_bil']

In [16]:
folder = r"C:\Users\matta\Desktop\Documents\Python\Geolocation\climate_data\climate_files"
working_dir = folder + "\\" + files[2]
print(working_dir)

os.chdir(working_dir)
bil_files = [os.path.join(working_dir, f) for f in os.listdir() if f.endswith('.bil')]

label_prefixs = ['jan_', 'feb_', 'mar_', 'apr_', 'may_', 'jun_', 'jul_', 'aug_', 'sep_', 'oct_', 'nov_', 'dec_', 'annual_']
labels = [i + 'solarTrans' for i in label_prefixs]

solarTransmission_df = bil_to_df(bil_files[0], labels[0])

# Loop through the rest
for i, j in zip(bil_files[1:], labels[1:]): 
    temp_df = bil_to_df(i, j)
    solarTransmission_df = solarTransmission_df.merge(temp_df, on=['longitude', 'latitude'], how='inner')

print(solarTransmission_df.head())
print(solarTransmission_df.info())
print(solarTransmission_df.describe())

C:\Users\matta\Desktop\Documents\Python\Geolocation\climate_data\climate_files\PRISM_soltrans_30yr_normal_4kmM3_all_bil
   longitude   latitude  jan_solarTrans  feb_solarTrans  mar_solarTrans  \
0 -95.125000  49.416667          0.6210          0.6896          0.6970   
1 -95.166667  49.375000          0.6253          0.6911          0.6974   
2 -95.125000  49.375000          0.6208          0.6913          0.6978   
3 -95.083333  49.375000          0.6172          0.6905          0.6977   
4 -95.041667  49.375000          0.6152          0.6920          0.6985   

   apr_solarTrans  may_solarTrans  jun_solarTrans  jul_solarTrans  \
0          0.7129          0.6565          0.6575          0.7384   
1          0.7118          0.6572          0.6579          0.7396   
2          0.7148          0.6563          0.6571          0.7383   
3          0.7165          0.6555          0.6565          0.7377   
4          0.7192          0.6547          0.6558          0.7371   

   aug_solarTr

# Temp dew point mean

In [17]:
files

['PRISM_ppt_30yr_normal_4kmM4_all_bil',
 'PRISM_soltotal_30yr_normal_4kmM3_all_bil',
 'PRISM_soltrans_30yr_normal_4kmM3_all_bil',
 'PRISM_tdmean_30yr_normal_4kmM5_all_bil',
 'PRISM_tmax_30yr_normal_4kmM5_all_bil',
 'PRISM_tmean_30yr_normal_4kmM5_all_bil',
 'PRISM_tmin_30yr_normal_4kmM5_all_bil',
 'PRISM_vpdmax_30yr_normal_4kmM5_all_bil',
 'PRISM_vpdmin_30yr_normal_4kmM5_all_bil']

In [18]:
folder = r"C:\Users\matta\Desktop\Documents\Python\Geolocation\climate_data\climate_files"
working_dir = folder + "\\" + files[3]
print(working_dir)

os.chdir(working_dir)
bil_files = [os.path.join(working_dir, f) for f in os.listdir() if f.endswith('.bil')]

label_prefixs = ['jan_', 'feb_', 'mar_', 'apr_', 'may_', 'jun_', 'jul_', 'aug_', 'sep_', 'oct_', 'nov_', 'dec_', 'annual_']
labels = [i + 'dptmean' for i in label_prefixs]

dptemp_df = bil_to_df(bil_files[0], labels[0])

# Loop through the rest
for i, j in zip(bil_files[1:], labels[1:]): 
    temp_df = bil_to_df(i, j)
    dptemp_df = dptemp_df.merge(temp_df, on=['longitude', 'latitude'], how='inner')

print(dptemp_df.head())
print(dptemp_df.info())
print(dptemp_df.describe())

C:\Users\matta\Desktop\Documents\Python\Geolocation\climate_data\climate_files\PRISM_tdmean_30yr_normal_4kmM5_all_bil
   longitude   latitude  jan_dptmean  feb_dptmean  mar_dptmean  apr_dptmean  \
0 -95.125000  49.416667   -18.071100   -16.452999   -10.067300      -4.0575   
1 -95.083333  49.416667   -18.043400   -16.477900   -10.096000      -4.0703   
2 -95.041667  49.416667   -18.003700   -16.474699   -10.097600      -4.0699   
3 -95.000000  49.416667   -17.925499   -16.366100   -10.044499      -4.0347   
4 -94.958333  49.416667   -17.896400   -16.411699   -10.100200      -4.0858   

   may_dptmean  jun_dptmean  jul_dptmean  aug_dptmean  sep_dptmean  \
0       3.8343    11.376699    14.682500    13.656799       8.9100   
1       3.8254    11.345099    14.637099    13.646299       8.9439   
2       3.8044    11.324900    14.609799    13.628600       9.0013   
3       3.7653    11.292700    14.542200    13.594900       9.1046   
4       3.7463    11.273700    14.529099    13.591700    

# Max temperature

In [19]:
files

['PRISM_ppt_30yr_normal_4kmM4_all_bil',
 'PRISM_soltotal_30yr_normal_4kmM3_all_bil',
 'PRISM_soltrans_30yr_normal_4kmM3_all_bil',
 'PRISM_tdmean_30yr_normal_4kmM5_all_bil',
 'PRISM_tmax_30yr_normal_4kmM5_all_bil',
 'PRISM_tmean_30yr_normal_4kmM5_all_bil',
 'PRISM_tmin_30yr_normal_4kmM5_all_bil',
 'PRISM_vpdmax_30yr_normal_4kmM5_all_bil',
 'PRISM_vpdmin_30yr_normal_4kmM5_all_bil']

In [20]:
folder = r"C:\Users\matta\Desktop\Documents\Python\Geolocation\climate_data\climate_files"
working_dir = folder + "\\" + files[4]
print(working_dir)

os.chdir(working_dir)
bil_files = [os.path.join(working_dir, f) for f in os.listdir() if f.endswith('.bil')]

label_prefixs = ['jan_', 'feb_', 'mar_', 'apr_', 'may_', 'jun_', 'jul_', 'aug_', 'sep_', 'oct_', 'nov_', 'dec_', 'annual_']
labels = [i + 'maxt' for i in label_prefixs]

maxt_df = bil_to_df(bil_files[0], labels[0])

# Loop through the rest
for i, j in zip(bil_files[1:], labels[1:]): 
    temp_df = bil_to_df(i, j)
    maxt_df = maxt_df.merge(temp_df, on=['longitude', 'latitude'], how='inner')

print(maxt_df.head())
print(maxt_df.info())
print(maxt_df.describe())

C:\Users\matta\Desktop\Documents\Python\Geolocation\climate_data\climate_files\PRISM_tmax_30yr_normal_4kmM5_all_bil
   longitude   latitude   jan_maxt  feb_maxt  mar_maxt  apr_maxt   may_maxt  \
0 -95.125000  49.416667 -10.470099   -7.2620    0.2340    8.2640  16.135300   
1 -95.083333  49.416667 -10.485100   -7.3082    0.1911    8.1180  16.006300   
2 -95.041667  49.416667 -10.531500   -7.3599    0.1448    7.9604  15.890699   
3 -95.000000  49.416667 -10.636800   -7.4696    0.0557    7.7942  15.778299   
4 -94.958333  49.416667 -10.624599   -7.4935    0.0178    7.6593  15.678499   

    jun_maxt   jul_maxt   aug_maxt   sep_maxt  oct_maxt  nov_maxt  dec_maxt  \
0  21.900499  24.420500  23.888899  18.666399  9.727300    0.5070   -7.1084   
1  21.781099  24.318300  23.829100  18.566799  9.632400    0.4985   -7.1318   
2  21.652599  24.214399  23.753599  18.432600  9.526999    0.4672   -7.1865   
3  21.527399  24.108599  23.666599  18.264599  9.397200    0.3757   -7.3022   
4  21.421600  

# Mean temperature

In [21]:
files

['PRISM_ppt_30yr_normal_4kmM4_all_bil',
 'PRISM_soltotal_30yr_normal_4kmM3_all_bil',
 'PRISM_soltrans_30yr_normal_4kmM3_all_bil',
 'PRISM_tdmean_30yr_normal_4kmM5_all_bil',
 'PRISM_tmax_30yr_normal_4kmM5_all_bil',
 'PRISM_tmean_30yr_normal_4kmM5_all_bil',
 'PRISM_tmin_30yr_normal_4kmM5_all_bil',
 'PRISM_vpdmax_30yr_normal_4kmM5_all_bil',
 'PRISM_vpdmin_30yr_normal_4kmM5_all_bil']

In [22]:
folder = r"C:\Users\matta\Desktop\Documents\Python\Geolocation\climate_data\climate_files"
working_dir = folder + "\\" + files[5]
print(working_dir)

os.chdir(working_dir)
bil_files = [os.path.join(working_dir, f) for f in os.listdir() if f.endswith('.bil')]

label_prefixs = ['jan_', 'feb_', 'mar_', 'apr_', 'may_', 'jun_', 'jul_', 'aug_', 'sep_', 'oct_', 'nov_', 'dec_', 'annual_']
labels = [i + 'meant' for i in label_prefixs]

meant_df = bil_to_df(bil_files[0], labels[0])

# Loop through the rest
for i, j in zip(bil_files[1:], labels[1:]): 
    temp_df = bil_to_df(i, j)
    meant_df = meant_df.merge(temp_df, on=['longitude', 'latitude'], how='inner')

print(meant_df.head())
print(meant_df.info())
print(meant_df.describe())

C:\Users\matta\Desktop\Documents\Python\Geolocation\climate_data\climate_files\PRISM_tmean_30yr_normal_4kmM5_all_bil
   longitude   latitude  jan_meant  feb_meant  mar_meant  apr_meant  \
0 -95.125000  49.416667 -16.006199   -13.6732    -5.9882     2.4166   
1 -95.083333  49.416667 -15.998600   -13.7078    -6.0155     2.3461   
2 -95.041667  49.416667 -15.983100   -13.7245    -6.0243     2.2886   
3 -95.000000  49.416667 -15.934099   -13.6977    -6.0012     2.2672   
4 -94.958333  49.416667 -15.907499   -13.7485    -6.0480     2.1781   

   may_meant  jun_meant  jul_meant  aug_meant  sep_meant  oct_meant  \
0  10.306399  16.400700  18.667000  17.867300  12.898299     5.0186   
1  10.270400  16.388199  18.639500  17.904200  12.902400     5.0013   
2  10.254000  16.383600  18.637199  17.962999  12.925400     5.0007   
3  10.268200  16.390100  18.686499  18.070599  12.983700     5.0366   
4  10.241300  16.385099  18.615499  18.084200  13.021199     5.0160   

   nov_meant  dec_meant  annu

# Minimum temperature

In [23]:
files

['PRISM_ppt_30yr_normal_4kmM4_all_bil',
 'PRISM_soltotal_30yr_normal_4kmM3_all_bil',
 'PRISM_soltrans_30yr_normal_4kmM3_all_bil',
 'PRISM_tdmean_30yr_normal_4kmM5_all_bil',
 'PRISM_tmax_30yr_normal_4kmM5_all_bil',
 'PRISM_tmean_30yr_normal_4kmM5_all_bil',
 'PRISM_tmin_30yr_normal_4kmM5_all_bil',
 'PRISM_vpdmax_30yr_normal_4kmM5_all_bil',
 'PRISM_vpdmin_30yr_normal_4kmM5_all_bil']

In [24]:
folder = r"C:\Users\matta\Desktop\Documents\Python\Geolocation\climate_data\climate_files"
working_dir = folder + "\\" + files[6]
print(working_dir)

os.chdir(working_dir)
bil_files = [os.path.join(working_dir, f) for f in os.listdir() if f.endswith('.bil')]

label_prefixs = ['jan_', 'feb_', 'mar_', 'apr_', 'may_', 'jun_', 'jul_', 'aug_', 'sep_', 'oct_', 'nov_', 'dec_', 'annual_']
labels = [i + 'tmin' for i in label_prefixs]

tmin_df = bil_to_df(bil_files[0], labels[0])

# Loop through the rest
for i, j in zip(bil_files[1:], labels[1:]): 
    temp_df = bil_to_df(i, j)
    tmin_df = tmin_df.merge(temp_df, on=['longitude', 'latitude'], how='inner')

print(tmin_df.head())
print(tmin_df.info())
print(tmin_df.describe())

C:\Users\matta\Desktop\Documents\Python\Geolocation\climate_data\climate_files\PRISM_tmin_30yr_normal_4kmM5_all_bil
   longitude   latitude   jan_tmin   feb_tmin   mar_tmin  apr_tmin  may_tmin  \
0 -95.125000  49.416667 -21.542400 -20.084499 -12.210700   -3.4306    4.4777   
1 -95.083333  49.416667 -21.512299 -20.107500 -12.222300   -3.4257    4.5347   
2 -95.041667  49.416667 -21.434799 -20.089199 -12.193600   -3.3831    4.6174   
3 -95.000000  49.416667 -21.231600 -19.925999 -12.058300   -3.2595    4.7582   
4 -94.958333  49.416667 -21.190399 -20.003599 -12.113999   -3.3029    4.8042   

    jun_tmin   jul_tmin   aug_tmin  sep_tmin  oct_tmin  nov_tmin   dec_tmin  \
0  10.901000  12.913700  11.845799    7.1305    0.3099   -7.6436 -16.616100   
1  10.995399  12.960899  11.979400    7.2380    0.3702   -7.5720 -16.570499   
2  11.114600  13.060100  12.172500    7.4183    0.4745   -7.4736 -16.495600   
3  11.252999  13.264500  12.474800    7.7030    0.6760   -7.3482 -16.338499   
4  11.34

# vpdmax

In [25]:
folder = r"C:\Users\matta\Desktop\Documents\Python\Geolocation\climate_data\climate_files"
working_dir = folder + "\\" + files[7]
print(working_dir)

os.chdir(working_dir)
bil_files = [os.path.join(working_dir, f) for f in os.listdir() if f.endswith('.bil')]

label_prefixs = ['jan_', 'feb_', 'mar_', 'apr_', 'may_', 'jun_', 'jul_', 'aug_', 'sep_', 'oct_', 'nov_', 'dec_', 'annual_']
labels = [i + 'vpdmax' for i in label_prefixs]

vpdmax_df = bil_to_df(bil_files[0], labels[0])

# Loop through the rest
for i, j in zip(bil_files[1:], labels[1:]): 
    temp_df = bil_to_df(i, j)
    vpdmax_df = vpdmax_df.merge(temp_df, on=['longitude', 'latitude'], how='inner')

print(vpdmax_df.head())
print(vpdmax_df.info())
print(vpdmax_df.describe())

C:\Users\matta\Desktop\Documents\Python\Geolocation\climate_data\climate_files\PRISM_vpdmax_30yr_normal_4kmM5_all_bil
   longitude   latitude  jan_vpdmax  feb_vpdmax  mar_vpdmax  apr_vpdmax  \
0 -95.125000  49.416667      0.8649      1.5272      3.2420      6.7880   
1 -95.083333  49.416667      0.8625      1.5243      3.2226      6.6504   
2 -95.041667  49.416667      0.8566      1.5174      3.1900      6.4959   
3 -95.000000  49.416667      0.8447      1.4926      3.1230      6.3244   
4 -94.958333  49.416667      0.8443      1.4983      3.1159      6.2153   

   may_vpdmax  jun_vpdmax  jul_vpdmax  aug_vpdmax  sep_vpdmax  oct_vpdmax  \
0   10.382199   12.898299   14.316799   13.940499   10.122900      5.5701   
1   10.216599   12.730700   14.247299   13.883699    9.966700      5.4826   
2   10.081500   12.543600   14.165100   13.811000    9.747000      5.3524   
3    9.964399   12.372300   14.109099   13.733200    9.441400      5.1795   
4    9.850200   12.223300   14.008500   13.686

# vpdmin

In [26]:
folder = r"C:\Users\matta\Desktop\Documents\Python\Geolocation\climate_data\climate_files"
working_dir = folder + "\\" + files[8]
print(working_dir)

os.chdir(working_dir)
bil_files = [os.path.join(working_dir, f) for f in os.listdir() if f.endswith('.bil')]

label_prefixs = ['jan_', 'feb_', 'mar_', 'apr_', 'may_', 'jun_', 'jul_', 'aug_', 'sep_', 'oct_', 'nov_', 'dec_', 'annual_']
labels = [i + 'vpdmin' for i in label_prefixs]

vpdmin_df = bil_to_df(bil_files[0], labels[0])

# Loop through the rest
for i, j in zip(bil_files[1:], labels[1:]): 
    temp_df = bil_to_df(i, j)
    vpdmin_df = vpdmin_df.merge(temp_df, on=['longitude', 'latitude'], how='inner')

print(vpdmin_df.head())
print(vpdmin_df.info())
print(vpdmin_df.describe())

C:\Users\matta\Desktop\Documents\Python\Geolocation\climate_data\climate_files\PRISM_vpdmin_30yr_normal_4kmM5_all_bil
   longitude   latitude  jan_vpdmin  feb_vpdmin  mar_vpdmin  apr_vpdmin  \
0 -95.125000  49.416667      0.1542      0.1737      0.2861      0.6837   
1 -95.083333  49.416667      0.1526      0.1731      0.2850      0.6849   
2 -95.041667  49.416667      0.1510      0.1718      0.2840      0.6868   
3 -95.000000  49.416667      0.1497      0.1701      0.2836      0.6922   
4 -94.958333  49.416667      0.1481      0.1694      0.2830      0.6907   

   may_vpdmin  jun_vpdmin  jul_vpdmin  aug_vpdmin  sep_vpdmin  oct_vpdmin  \
0      1.0216      1.1085      0.8892      0.8468      0.7396      0.5602   
1      1.0500      1.1567      0.9392      0.9178      0.7602      0.5666   
2      1.0923      1.2118      1.0033      1.0203      0.7882      0.5530   
3      1.1601      1.2775      1.1256      1.1805      0.8237      0.5158   
4      1.1878      1.3234      1.1339      1.2

# Merging dataframes

In [27]:
%who DataFrame

dptemp_df	 maxt_df	 meant_df	 precip_df	 solarTransmission_df	 solar_df	 temp_df	 tmin_df	 vpdmax_df	 
vpdmin_df	 


In [28]:
maxt_df.columns

Index(['longitude', 'latitude', 'jan_maxt', 'feb_maxt', 'mar_maxt', 'apr_maxt',
       'may_maxt', 'jun_maxt', 'jul_maxt', 'aug_maxt', 'sep_maxt', 'oct_maxt',
       'nov_maxt', 'dec_maxt', 'annual_maxt'],
      dtype='object')

In [32]:
for i in [dptemp_df, maxt_df, meant_df, precip_df, solarTransmission_df, solar_df, temp_df, tmin_df, vpdmax_df]:
    print(i.shape)

(485367, 15)
(485367, 15)
(485367, 15)
(481631, 15)
(481631, 15)
(481631, 15)
(485367, 3)
(485367, 15)
(485367, 15)


In [33]:
maxt_df.merge(meant_df, left_on = ['longitude', 'latitude'], right_on = ['longitude', 'latitude']).info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 485367 entries, 0 to 485366
Data columns (total 28 columns):
 #   Column        Non-Null Count   Dtype  
---  ------        --------------   -----  
 0   longitude     485367 non-null  float64
 1   latitude      485367 non-null  float64
 2   jan_maxt      485367 non-null  float32
 3   feb_maxt      485367 non-null  float32
 4   mar_maxt      485367 non-null  float32
 5   apr_maxt      485367 non-null  float32
 6   may_maxt      485367 non-null  float32
 7   jun_maxt      485367 non-null  float32
 8   jul_maxt      485367 non-null  float32
 9   aug_maxt      485367 non-null  float32
 10  sep_maxt      485367 non-null  float32
 11  oct_maxt      485367 non-null  float32
 12  nov_maxt      485367 non-null  float32
 13  dec_maxt      485367 non-null  float32
 14  annual_maxt   485367 non-null  float32
 15  jan_meant     485367 non-null  float32
 16  feb_meant     485367 non-null  float32
 17  mar_meant     485367 non-null  float32
 18  apr_