Includes:

Getting a list of weather stations for the regions

Downloading weather data from SILO APIs for these weather stations

Calculating the area of the regions + statistics such as stations per region

Voronoi Regions calculations (but not used in the final pipeline)

Mapping dates to different seasons of cotton growth

Dropping out-of-season observations

Aggregating weather statistics to region-year-season level

### Loading Libraries

In [2]:
import pandas as pd
import numpy as np
import seaborn as sns

In [3]:
from statsmodels.stats.weightstats import DescrStatsW

In [4]:
from shapely.geometry import Point, Polygon
import geopandas as gpd
from scipy.spatial import Voronoi
from shapely.validation import explain_validity

In [5]:
import requests
import os

In [6]:
import matplotlib.pyplot as plt

In [7]:
coordinates = pd.read_fwf("bom_stations.txt", skiprows = 2).drop(index = 0).reset_index(drop = True)
coordinates.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19497 entries, 0 to 19496
Data columns (total 12 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Site        19497 non-null  object
 1   Dist        19475 non-null  object
 2   Site name   19496 non-null  object
 3   Start       19496 non-null  object
 4   End         19496 non-null  object
 5   Lat         19496 non-null  object
 6   Lon         19496 non-null  object
 7   Source      19495 non-null  object
 8   STA         19495 non-null  object
 9   Height (m)  19495 non-null  object
 10  Bar_ht      19494 non-null  object
 11  WMO         19494 non-null  object
dtypes: object(12)
memory usage: 1.8+ MB


In [8]:
coordinates.head()

Unnamed: 0,Site,Dist,Site name,Start,End,Lat,Lon,Source,STA,Height (m),Bar_ht,WMO
0,1000,1,KARUNJIE,1940,1983,-16.2919,127.1956,.....,WA,320.0,..,..
1,1001,1,OOMBULGURRI,1914,2012,-15.1806,127.8456,GPS,WA,2.0,..,..
2,1002,1,BEVERLEY SP,1959,1967,-16.5825,125.4828,.....,WA,..,..,..
3,1003,1,PAGO MISSION,1908,1940,-14.1331,126.7158,.....,WA,5.0,24.4,..
4,1004,1,KUNMUNYA,1915,1948,-15.4167,124.7167,.....,WA,47.0,..,..


In [9]:
coordinates[coordinates.isnull().any(axis=1)]

Unnamed: 0,Site,Dist,Site name,Start,End,Lat,Lon,Source,STA,Height (m),Bar_ht,WMO
362,005026,,WITTENOOM,1949,2019,-22.2425,118.3358,Unknown,WA,463.0,464.0,94313
395,005059,,MOUNT LIONEL AIRSTRIP,1967,..,-22.6087,117.6313,.....,WA,..,0.0,..
3343,014325,,SHOAL BAY (AMBLS),2025,..,-12.3850,130.9749,MAP 1:250 000,NT,6.0,..,94113
3377,014519,,CAPE WILBERFORCE,1999,..,-11.9171,136.5317,.....,NT,..,..,..
4120,017136,,LEIGH CREEK (MAYNARDS WELL),2005,..,-30.6024,138.6888,.....,SA,..,..,..
4121,017137,,LEIGH CREEK (NORTH MOOLOOLOO),2006,..,-30.6245,138.5271,.....,SA,..,..,..
4122,017138,,LEIGH CREEK (PFITZNERS WELL),2006,..,-30.6881,138.5665,.....,SA,..,..,..
4922,023139,,SALISBURY COUNCIL BASESTATION,2005,2011,-34.7602,138.6446,MAP 1:50 000,SA,39.0,..,..
10925,051068,,GULARGAMBONE (THE MAZE),1961,2016,-31.3799,148.2644,GPS,NSW,215.0,..,..
11103,052081,,MACINTYRE RIVER (BOONANGAR BRIDGE),1986,..,-28.5750,149.5597,.....,NSW,..,..,..


### Dropping missing coordinates and site names

In [10]:
missing_values = coordinates[(coordinates['Site name'].isna()) | (coordinates['Lat'].isna()) | (coordinates['Lon'].isna())].index
coordinates.loc[missing_values]

Unnamed: 0,Site,Dist,Site name,Start,End,Lat,Lon,Source,STA,Height (m),Bar_ht,WMO
19494,19494 s,ation,,,,,,,,,,


In [11]:
coordinates.iloc[[(len(coordinates) - 1), (len(coordinates) - 2)]]

Unnamed: 0,Site,Dist,Site name,Start,End,Lat,Lon,Source,STA,Height (m),Bar_ht,WMO
19496,Please,ote C,"pyright, Disclaimer and Privacy Notice,",ccessib,e at <h,tp://www,bom.gov.a,/other/copyrig,t.s,tml>,,
19495,(c) Cop,right,"Commonwealth of Australia 2025, Bureau o",Meteor,logy (A,N 92 637,533 532),,,,,


In [12]:
coordinates.drop(index = [(len(coordinates) - 1), (len(coordinates) - 2)], inplace=True)

In [13]:
coordinates.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19495 entries, 0 to 19494
Data columns (total 12 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Site        19495 non-null  object
 1   Dist        19473 non-null  object
 2   Site name   19494 non-null  object
 3   Start       19494 non-null  object
 4   End         19494 non-null  object
 5   Lat         19494 non-null  object
 6   Lon         19494 non-null  object
 7   Source      19494 non-null  object
 8   STA         19494 non-null  object
 9   Height (m)  19494 non-null  object
 10  Bar_ht      19494 non-null  object
 11  WMO         19494 non-null  object
dtypes: object(12)
memory usage: 1.8+ MB


### Data Type transformation

In [14]:
coordinates.drop(index = missing_values, inplace = True)
coordinates.reset_index(drop = True, inplace=True)
coordinates.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19494 entries, 0 to 19493
Data columns (total 12 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Site        19494 non-null  object
 1   Dist        19472 non-null  object
 2   Site name   19494 non-null  object
 3   Start       19494 non-null  object
 4   End         19494 non-null  object
 5   Lat         19494 non-null  object
 6   Lon         19494 non-null  object
 7   Source      19494 non-null  object
 8   STA         19494 non-null  object
 9   Height (m)  19494 non-null  object
 10  Bar_ht      19494 non-null  object
 11  WMO         19494 non-null  object
dtypes: object(12)
memory usage: 1.8+ MB


In [15]:
coordinates["Lat"] = coordinates["Lat"].apply(float)
coordinates["Lon"] = coordinates["Lon"].apply(float)
coordinates["Site"] = coordinates["Site"].apply(int)
coordinates.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19494 entries, 0 to 19493
Data columns (total 12 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Site        19494 non-null  int64  
 1   Dist        19472 non-null  object 
 2   Site name   19494 non-null  object 
 3   Start       19494 non-null  object 
 4   End         19494 non-null  object 
 5   Lat         19494 non-null  float64
 6   Lon         19494 non-null  float64
 7   Source      19494 non-null  object 
 8   STA         19494 non-null  object 
 9   Height (m)  19494 non-null  object 
 10  Bar_ht      19494 non-null  object 
 11  WMO         19494 non-null  object 
dtypes: float64(2), int64(1), object(9)
memory usage: 1.8+ MB


In [16]:
coordinates["Height (m)"].replace({".." : np.nan}, inplace = True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  coordinates["Height (m)"].replace({".." : np.nan}, inplace = True)


In [17]:
coordinates["Height (m)"].isna().sum() / len(coordinates)

np.float64(0.2179645018980199)

### Geodata

In [21]:
regions = gpd.read_file("regions_selected_labeled.gpkg")

In [22]:
print(regions.head())

   id             RegionName  \
0   1  QLD Condamine-Balonne   
1   2     NSW Barwon Darling   
2   3             QLD Moonie   
3   4       NSW Murray Above   
4   5      NSW Lower Darling   

                                            geometry  
0  MULTIPOLYGON (((146.90174 -28.95673, 146.5 -28...  
1  MULTIPOLYGON (((144.18743 -29.03522, 148.2859 ...  
2  MULTIPOLYGON (((148.60325 -28.94576, 148.77205...  
3  MULTIPOLYGON (((147.31699 -35.96701, 147.34906...  
4  MULTIPOLYGON (((141.05399 -33.28225, 141.08446...  


In [23]:
regions.drop("id", axis = 1,  inplace = True)

In [24]:
geometry = [Point(xy) for xy in zip(coordinates.Lon, coordinates.Lat)]
gdf_points = gpd.GeoDataFrame(coordinates[["Lon", "Lat", "Site", "Height (m)"]], geometry=geometry)

In [25]:
gdf_points.head()

Unnamed: 0,Lon,Lat,Site,Height (m),geometry
0,127.1956,-16.2919,1000,320.0,POINT (127.1956 -16.2919)
1,127.8456,-15.1806,1001,2.0,POINT (127.8456 -15.1806)
2,125.4828,-16.5825,1002,,POINT (125.4828 -16.5825)
3,126.7158,-14.1331,1003,5.0,POINT (126.7158 -14.1331)
4,124.7167,-15.4167,1004,47.0,POINT (124.7167 -15.4167)


In [26]:
gdf_points.crs = "EPSG:4326"

In [27]:
if regions.crs != gdf_points.crs:
    regions = regions.to_crs(gdf_points.crs)

Fixing invalid plygons of regions

In [28]:
regions["geometry"] = regions["geometry"].apply(lambda geom: geom.buffer(0) if not geom.is_valid else geom)
regions["geometry"]

0     MULTIPOLYGON (((146.53957 -27.23892, 146.52025...
1     MULTIPOLYGON (((144.18743 -29.03522, 148.2859 ...
2     MULTIPOLYGON (((148.60325 -28.94576, 148.77205...
3     MULTIPOLYGON (((147.31699 -35.96701, 147.34906...
4     MULTIPOLYGON (((141.05399 -33.28225, 141.08446...
5     MULTIPOLYGON (((149.00561 -28.92837, 148.99289...
6     MULTIPOLYGON (((149.15254 -28.85159, 149.23521...
7     MULTIPOLYGON (((151.68274 -29.95479, 151.68618...
8     MULTIPOLYGON (((148.33494 -29.93474, 148.42915...
9     MULTIPOLYGON (((146.52897 -32.31278, 146.4993 ...
10    MULTIPOLYGON (((143.84798 -34.24589, 143.91864...
11    MULTIPOLYGON (((143.26761 -34.66477, 143.2878 ...
Name: geometry, dtype: geometry

In [29]:
points_with_regions = gpd.sjoin(gdf_points, regions, how="left", predicate="within").drop("index_right", axis = 1)
points_with_regions

Unnamed: 0,Lon,Lat,Site,Height (m),geometry,RegionName
0,127.1956,-16.2919,1000,320.0,POINT (127.1956 -16.2919),
1,127.8456,-15.1806,1001,2.0,POINT (127.8456 -15.1806),
2,125.4828,-16.5825,1002,,POINT (125.4828 -16.5825),
3,126.7158,-14.1331,1003,5.0,POINT (126.7158 -14.1331),
4,124.7167,-15.4167,1004,47.0,POINT (124.7167 -15.4167),
...,...,...,...,...,...,...
19489,72.6400,-69.7133,300054,47.0,POINT (72.64 -69.7133),
19490,78.8735,-68.4723,300055,577.0,POINT (78.8735 -68.4723),
19491,110.7615,-66.2803,300060,390.0,POINT (110.7615 -66.2803),
19492,100.6000,-66.2510,300061,12.0,POINT (100.6 -66.251),


In [30]:
points_with_regions = points_with_regions[points_with_regions["RegionName"].notna()].copy().reset_index(drop=True)
points_with_regions

Unnamed: 0,Lon,Lat,Site,Height (m),geometry,RegionName
0,148.1000,-25.3333,35152,660.0,POINT (148.1 -25.3333),QLD Condamine-Balonne
1,147.9167,-25.1667,35163,,POINT (147.9167 -25.1667),QLD Condamine-Balonne
2,148.2667,-25.5333,35245,460.0,POINT (148.2667 -25.5333),QLD Condamine-Balonne
3,148.2025,-25.2444,35296,540.0,POINT (148.2025 -25.2444),QLD Condamine-Balonne
4,152.4775,-28.2367,40030,780.0,POINT (152.4775 -28.2367),QLD Condamine-Balonne
...,...,...,...,...,...,...
3730,147.9389,-36.0508,82062,250.0,POINT (147.9389 -36.0508),NSW Murray Above
3731,148.0500,-36.3000,82075,,POINT (148.05 -36.3),NSW Murray Above
3732,147.9333,-36.0500,82123,,POINT (147.9333 -36.05),NSW Murray Above
3733,147.8957,-36.0424,82171,,POINT (147.8957 -36.0424),NSW Murray Above


In [31]:
points_with_regions.groupby("RegionName").count()

Unnamed: 0_level_0,Lon,Lat,Site,Height (m),geometry
RegionName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
NSW Barwon Darling,408,408,408,272,408
NSW Border Rivers,194,194,194,158,194
NSW Gwydir,173,173,173,132,173
NSW Lachlan,465,465,465,309,465
NSW Lower Darling,142,142,142,101,142
NSW Macquarie-Castlereagh,465,465,465,332,465
NSW Murray Above,55,55,55,44,55
NSW Murrumbidgee,597,597,597,422,597
NSW Namoi,410,410,410,345,410
QLD Border Rivers,149,149,149,118,149


In [32]:
points_with_regions.head()

Unnamed: 0,Lon,Lat,Site,Height (m),geometry,RegionName
0,148.1,-25.3333,35152,660.0,POINT (148.1 -25.3333),QLD Condamine-Balonne
1,147.9167,-25.1667,35163,,POINT (147.9167 -25.1667),QLD Condamine-Balonne
2,148.2667,-25.5333,35245,460.0,POINT (148.2667 -25.5333),QLD Condamine-Balonne
3,148.2025,-25.2444,35296,540.0,POINT (148.2025 -25.2444),QLD Condamine-Balonne
4,152.4775,-28.2367,40030,780.0,POINT (152.4775 -28.2367),QLD Condamine-Balonne


In [33]:
points_with_regions.rename(columns={"geometry" : "geometry_point"}, inplace= True)
points_with_regions

Unnamed: 0,Lon,Lat,Site,Height (m),geometry_point,RegionName
0,148.1000,-25.3333,35152,660.0,POINT (148.1 -25.3333),QLD Condamine-Balonne
1,147.9167,-25.1667,35163,,POINT (147.9167 -25.1667),QLD Condamine-Balonne
2,148.2667,-25.5333,35245,460.0,POINT (148.2667 -25.5333),QLD Condamine-Balonne
3,148.2025,-25.2444,35296,540.0,POINT (148.2025 -25.2444),QLD Condamine-Balonne
4,152.4775,-28.2367,40030,780.0,POINT (152.4775 -28.2367),QLD Condamine-Balonne
...,...,...,...,...,...,...
3730,147.9389,-36.0508,82062,250.0,POINT (147.9389 -36.0508),NSW Murray Above
3731,148.0500,-36.3000,82075,,POINT (148.05 -36.3),NSW Murray Above
3732,147.9333,-36.0500,82123,,POINT (147.9333 -36.05),NSW Murray Above
3733,147.8957,-36.0424,82171,,POINT (147.8957 -36.0424),NSW Murray Above


In [34]:
regions

Unnamed: 0,RegionName,geometry
0,QLD Condamine-Balonne,"MULTIPOLYGON (((146.53957 -27.23892, 146.52025..."
1,NSW Barwon Darling,"MULTIPOLYGON (((144.18743 -29.03522, 148.2859 ..."
2,QLD Moonie,"MULTIPOLYGON (((148.60325 -28.94576, 148.77205..."
3,NSW Murray Above,"MULTIPOLYGON (((147.31699 -35.96701, 147.34906..."
4,NSW Lower Darling,"MULTIPOLYGON (((141.05399 -33.28225, 141.08446..."
5,QLD Border Rivers,"MULTIPOLYGON (((149.00561 -28.92837, 148.99289..."
6,NSW Border Rivers,"MULTIPOLYGON (((149.15254 -28.85159, 149.23521..."
7,NSW Gwydir,"MULTIPOLYGON (((151.68274 -29.95479, 151.68618..."
8,NSW Namoi,"MULTIPOLYGON (((148.33494 -29.93474, 148.42915..."
9,NSW Macquarie-Castlereagh,"MULTIPOLYGON (((146.52897 -32.31278, 146.4993 ..."


In [35]:
points_with_regions_full = points_with_regions.merge(regions, on = "RegionName")
points_with_regions_full

Unnamed: 0,Lon,Lat,Site,Height (m),geometry_point,RegionName,geometry
0,148.1000,-25.3333,35152,660.0,POINT (148.1 -25.3333),QLD Condamine-Balonne,"MULTIPOLYGON (((146.53957 -27.23892, 146.52025..."
1,147.9167,-25.1667,35163,,POINT (147.9167 -25.1667),QLD Condamine-Balonne,"MULTIPOLYGON (((146.53957 -27.23892, 146.52025..."
2,148.2667,-25.5333,35245,460.0,POINT (148.2667 -25.5333),QLD Condamine-Balonne,"MULTIPOLYGON (((146.53957 -27.23892, 146.52025..."
3,148.2025,-25.2444,35296,540.0,POINT (148.2025 -25.2444),QLD Condamine-Balonne,"MULTIPOLYGON (((146.53957 -27.23892, 146.52025..."
4,152.4775,-28.2367,40030,780.0,POINT (152.4775 -28.2367),QLD Condamine-Balonne,"MULTIPOLYGON (((146.53957 -27.23892, 146.52025..."
...,...,...,...,...,...,...,...
3730,147.9389,-36.0508,82062,250.0,POINT (147.9389 -36.0508),NSW Murray Above,"MULTIPOLYGON (((147.31699 -35.96701, 147.34906..."
3731,148.0500,-36.3000,82075,,POINT (148.05 -36.3),NSW Murray Above,"MULTIPOLYGON (((147.31699 -35.96701, 147.34906..."
3732,147.9333,-36.0500,82123,,POINT (147.9333 -36.05),NSW Murray Above,"MULTIPOLYGON (((147.31699 -35.96701, 147.34906..."
3733,147.8957,-36.0424,82171,,POINT (147.8957 -36.0424),NSW Murray Above,"MULTIPOLYGON (((147.31699 -35.96701, 147.34906..."


In [36]:
points_with_regions_full.rename(columns = {"geometry" : "polygon"}, inplace = True)
points_with_regions_full

Unnamed: 0,Lon,Lat,Site,Height (m),geometry_point,RegionName,polygon
0,148.1000,-25.3333,35152,660.0,POINT (148.1 -25.3333),QLD Condamine-Balonne,"MULTIPOLYGON (((146.53957 -27.23892, 146.52025..."
1,147.9167,-25.1667,35163,,POINT (147.9167 -25.1667),QLD Condamine-Balonne,"MULTIPOLYGON (((146.53957 -27.23892, 146.52025..."
2,148.2667,-25.5333,35245,460.0,POINT (148.2667 -25.5333),QLD Condamine-Balonne,"MULTIPOLYGON (((146.53957 -27.23892, 146.52025..."
3,148.2025,-25.2444,35296,540.0,POINT (148.2025 -25.2444),QLD Condamine-Balonne,"MULTIPOLYGON (((146.53957 -27.23892, 146.52025..."
4,152.4775,-28.2367,40030,780.0,POINT (152.4775 -28.2367),QLD Condamine-Balonne,"MULTIPOLYGON (((146.53957 -27.23892, 146.52025..."
...,...,...,...,...,...,...,...
3730,147.9389,-36.0508,82062,250.0,POINT (147.9389 -36.0508),NSW Murray Above,"MULTIPOLYGON (((147.31699 -35.96701, 147.34906..."
3731,148.0500,-36.3000,82075,,POINT (148.05 -36.3),NSW Murray Above,"MULTIPOLYGON (((147.31699 -35.96701, 147.34906..."
3732,147.9333,-36.0500,82123,,POINT (147.9333 -36.05),NSW Murray Above,"MULTIPOLYGON (((147.31699 -35.96701, 147.34906..."
3733,147.8957,-36.0424,82171,,POINT (147.8957 -36.0424),NSW Murray Above,"MULTIPOLYGON (((147.31699 -35.96701, 147.34906..."


In [37]:
points_with_regions_full["polygon"] = points_with_regions_full["polygon"].to_crs("EPSG:3857")
points_with_regions_full

Unnamed: 0,Lon,Lat,Site,Height (m),geometry_point,RegionName,polygon
0,148.1000,-25.3333,35152,660.0,POINT (148.1 -25.3333),QLD Condamine-Balonne,"MULTIPOLYGON (((16312710.747 -3153353.267, 163..."
1,147.9167,-25.1667,35163,,POINT (147.9167 -25.1667),QLD Condamine-Balonne,"MULTIPOLYGON (((16312710.747 -3153353.267, 163..."
2,148.2667,-25.5333,35245,460.0,POINT (148.2667 -25.5333),QLD Condamine-Balonne,"MULTIPOLYGON (((16312710.747 -3153353.267, 163..."
3,148.2025,-25.2444,35296,540.0,POINT (148.2025 -25.2444),QLD Condamine-Balonne,"MULTIPOLYGON (((16312710.747 -3153353.267, 163..."
4,152.4775,-28.2367,40030,780.0,POINT (152.4775 -28.2367),QLD Condamine-Balonne,"MULTIPOLYGON (((16312710.747 -3153353.267, 163..."
...,...,...,...,...,...,...,...
3730,147.9389,-36.0508,82062,250.0,POINT (147.9389 -36.0508),NSW Murray Above,"MULTIPOLYGON (((16399252.466 -4296082.394, 164..."
3731,148.0500,-36.3000,82075,,POINT (148.05 -36.3),NSW Murray Above,"MULTIPOLYGON (((16399252.466 -4296082.394, 164..."
3732,147.9333,-36.0500,82123,,POINT (147.9333 -36.05),NSW Murray Above,"MULTIPOLYGON (((16399252.466 -4296082.394, 164..."
3733,147.8957,-36.0424,82171,,POINT (147.8957 -36.0424),NSW Murray Above,"MULTIPOLYGON (((16399252.466 -4296082.394, 164..."


In [38]:
points_with_regions_full["region_centroid"] = points_with_regions_full["polygon"].centroid
points_with_regions_full

Unnamed: 0,Lon,Lat,Site,Height (m),geometry_point,RegionName,polygon,region_centroid
0,148.1000,-25.3333,35152,660.0,POINT (148.1 -25.3333),QLD Condamine-Balonne,"MULTIPOLYGON (((16312710.747 -3153353.267, 163...",POINT (16565392.281 -3164332.02)
1,147.9167,-25.1667,35163,,POINT (147.9167 -25.1667),QLD Condamine-Balonne,"MULTIPOLYGON (((16312710.747 -3153353.267, 163...",POINT (16565392.281 -3164332.02)
2,148.2667,-25.5333,35245,460.0,POINT (148.2667 -25.5333),QLD Condamine-Balonne,"MULTIPOLYGON (((16312710.747 -3153353.267, 163...",POINT (16565392.281 -3164332.02)
3,148.2025,-25.2444,35296,540.0,POINT (148.2025 -25.2444),QLD Condamine-Balonne,"MULTIPOLYGON (((16312710.747 -3153353.267, 163...",POINT (16565392.281 -3164332.02)
4,152.4775,-28.2367,40030,780.0,POINT (152.4775 -28.2367),QLD Condamine-Balonne,"MULTIPOLYGON (((16312710.747 -3153353.267, 163...",POINT (16565392.281 -3164332.02)
...,...,...,...,...,...,...,...,...
3730,147.9389,-36.0508,82062,250.0,POINT (147.9389 -36.0508),NSW Murray Above,"MULTIPOLYGON (((16399252.466 -4296082.394, 164...",POINT (16476974.939 -4310406.529)
3731,148.0500,-36.3000,82075,,POINT (148.05 -36.3),NSW Murray Above,"MULTIPOLYGON (((16399252.466 -4296082.394, 164...",POINT (16476974.939 -4310406.529)
3732,147.9333,-36.0500,82123,,POINT (147.9333 -36.05),NSW Murray Above,"MULTIPOLYGON (((16399252.466 -4296082.394, 164...",POINT (16476974.939 -4310406.529)
3733,147.8957,-36.0424,82171,,POINT (147.8957 -36.0424),NSW Murray Above,"MULTIPOLYGON (((16399252.466 -4296082.394, 164...",POINT (16476974.939 -4310406.529)


In [39]:
points_with_regions_full.region_centroid.unique()

<GeometryArray>
[ <POINT (16565392.281 -3164332.02)>, <POINT (16642660.877 -3249948.395)>,
 <POINT (16781490.832 -3290121.696)>, <POINT (16811052.818 -3395401.705)>,
 <POINT (16221446.237 -3576958.271)>,  <POINT (15869348.102 -3905016.31)>,
   <POINT (16327642.7 -4159966.465)>,  <POINT (16517313.31 -3771017.642)>,
 <POINT (16350624.076 -3959016.816)>,  <POINT (16704119.608 -3604172.98)>,
 <POINT (16730976.577 -3459305.985)>, <POINT (16476974.939 -4310406.529)>]
Length: 12, dtype: geometry

In [40]:
points_with_regions_full["region_area"] = points_with_regions_full["polygon"].area
points_with_regions_full

Unnamed: 0,Lon,Lat,Site,Height (m),geometry_point,RegionName,polygon,region_centroid,region_area
0,148.1000,-25.3333,35152,660.0,POINT (148.1 -25.3333),QLD Condamine-Balonne,"MULTIPOLYGON (((16312710.747 -3153353.267, 163...",POINT (16565392.281 -3164332.02),1.494580e+11
1,147.9167,-25.1667,35163,,POINT (147.9167 -25.1667),QLD Condamine-Balonne,"MULTIPOLYGON (((16312710.747 -3153353.267, 163...",POINT (16565392.281 -3164332.02),1.494580e+11
2,148.2667,-25.5333,35245,460.0,POINT (148.2667 -25.5333),QLD Condamine-Balonne,"MULTIPOLYGON (((16312710.747 -3153353.267, 163...",POINT (16565392.281 -3164332.02),1.494580e+11
3,148.2025,-25.2444,35296,540.0,POINT (148.2025 -25.2444),QLD Condamine-Balonne,"MULTIPOLYGON (((16312710.747 -3153353.267, 163...",POINT (16565392.281 -3164332.02),1.494580e+11
4,152.4775,-28.2367,40030,780.0,POINT (152.4775 -28.2367),QLD Condamine-Balonne,"MULTIPOLYGON (((16312710.747 -3153353.267, 163...",POINT (16565392.281 -3164332.02),1.494580e+11
...,...,...,...,...,...,...,...,...,...
3730,147.9389,-36.0508,82062,250.0,POINT (147.9389 -36.0508),NSW Murray Above,"MULTIPOLYGON (((16399252.466 -4296082.394, 164...",POINT (16476974.939 -4310406.529),5.774198e+09
3731,148.0500,-36.3000,82075,,POINT (148.05 -36.3),NSW Murray Above,"MULTIPOLYGON (((16399252.466 -4296082.394, 164...",POINT (16476974.939 -4310406.529),5.774198e+09
3732,147.9333,-36.0500,82123,,POINT (147.9333 -36.05),NSW Murray Above,"MULTIPOLYGON (((16399252.466 -4296082.394, 164...",POINT (16476974.939 -4310406.529),5.774198e+09
3733,147.8957,-36.0424,82171,,POINT (147.8957 -36.0424),NSW Murray Above,"MULTIPOLYGON (((16399252.466 -4296082.394, 164...",POINT (16476974.939 -4310406.529),5.774198e+09


In [41]:
points_with_regions_full.region_area.unique()

array([1.49457954e+11, 1.58136841e+10, 2.77863299e+10, 2.37399738e+10,
       2.15651124e+11, 1.14681959e+11, 1.25671571e+11, 9.93725399e+10,
       1.21602106e+11, 5.15750292e+10, 3.06009148e+10, 5.77419764e+09])

In [42]:
points_with_regions_full["geometry_point"] = points_with_regions_full["geometry_point"].to_crs("EPSG:3857")
points_with_regions_full

Unnamed: 0,Lon,Lat,Site,Height (m),geometry_point,RegionName,polygon,region_centroid,region_area
0,148.1000,-25.3333,35152,660.0,POINT (16486416.586 -2916738.876),QLD Condamine-Balonne,"MULTIPOLYGON (((16312710.747 -3153353.267, 163...",POINT (16565392.281 -3164332.02),1.494580e+11
1,147.9167,-25.1667,35163,,POINT (16466011.724 -2896233.894),QLD Condamine-Balonne,"MULTIPOLYGON (((16312710.747 -3153353.267, 163...",POINT (16565392.281 -3164332.02),1.494580e+11
2,148.2667,-25.5333,35245,460.0,POINT (16504973.546 -2941392.031),QLD Condamine-Balonne,"MULTIPOLYGON (((16312710.747 -3153353.267, 163...",POINT (16565392.281 -3164332.02),1.494580e+11
3,148.2025,-25.2444,35296,540.0,POINT (16497826.834 -2905793.644),QLD Condamine-Balonne,"MULTIPOLYGON (((16312710.747 -3153353.267, 163...",POINT (16565392.281 -3164332.02),1.494580e+11
4,152.4775,-28.2367,40030,780.0,POINT (16973717.657 -3278849.153),QLD Condamine-Balonne,"MULTIPOLYGON (((16312710.747 -3153353.267, 163...",POINT (16565392.281 -3164332.02),1.494580e+11
...,...,...,...,...,...,...,...,...,...
3730,147.9389,-36.0508,82062,250.0,POINT (16468483.017 -4307613.627),NSW Murray Above,"MULTIPOLYGON (((16399252.466 -4296082.394, 164...",POINT (16476974.939 -4310406.529),5.774198e+09
3731,148.0500,-36.3000,82075,,POINT (16480850.612 -4341979.816),NSW Murray Above,"MULTIPOLYGON (((16399252.466 -4296082.394, 164...",POINT (16476974.939 -4310406.529),5.774198e+09
3732,147.9333,-36.0500,82123,,POINT (16467859.627 -4307503.478),NSW Murray Above,"MULTIPOLYGON (((16399252.466 -4296082.394, 164...",POINT (16476974.939 -4310406.529),5.774198e+09
3733,147.8957,-36.0424,82171,,POINT (16463674.015 -4306457.116),NSW Murray Above,"MULTIPOLYGON (((16399252.466 -4296082.394, 164...",POINT (16476974.939 -4310406.529),5.774198e+09


In [43]:
points_with_regions_full["distance_from_centroid_meters_m"] = points_with_regions_full.apply(lambda row: row["geometry_point"].distance(row["region_centroid"]), axis = 1)
points_with_regions_full

Unnamed: 0,Lon,Lat,Site,Height (m),geometry_point,RegionName,polygon,region_centroid,region_area,distance_from_centroid_meters_m
0,148.1000,-25.3333,35152,660.0,POINT (16486416.586 -2916738.876),QLD Condamine-Balonne,"MULTIPOLYGON (((16312710.747 -3153353.267, 163...",POINT (16565392.281 -3164332.02),1.494580e+11,259883.676399
1,147.9167,-25.1667,35163,,POINT (16466011.724 -2896233.894),QLD Condamine-Balonne,"MULTIPOLYGON (((16312710.747 -3153353.267, 163...",POINT (16565392.281 -3164332.02),1.494580e+11,285924.990785
2,148.2667,-25.5333,35245,460.0,POINT (16504973.546 -2941392.031),QLD Condamine-Balonne,"MULTIPOLYGON (((16312710.747 -3153353.267, 163...",POINT (16565392.281 -3164332.02),1.494580e+11,230981.953064
3,148.2025,-25.2444,35296,540.0,POINT (16497826.834 -2905793.644),QLD Condamine-Balonne,"MULTIPOLYGON (((16312710.747 -3153353.267, 163...",POINT (16565392.281 -3164332.02),1.494580e+11,267221.222803
4,152.4775,-28.2367,40030,780.0,POINT (16973717.657 -3278849.153),QLD Condamine-Balonne,"MULTIPOLYGON (((16312710.747 -3153353.267, 163...",POINT (16565392.281 -3164332.02),1.494580e+11,424079.929941
...,...,...,...,...,...,...,...,...,...,...
3730,147.9389,-36.0508,82062,250.0,POINT (16468483.017 -4307613.627),NSW Murray Above,"MULTIPOLYGON (((16399252.466 -4296082.394, 164...",POINT (16476974.939 -4310406.529),5.774198e+09,8939.410047
3731,148.0500,-36.3000,82075,,POINT (16480850.612 -4341979.816),NSW Murray Above,"MULTIPOLYGON (((16399252.466 -4296082.394, 164...",POINT (16476974.939 -4310406.529),5.774198e+09,31810.270154
3732,147.9333,-36.0500,82123,,POINT (16467859.627 -4307503.478),NSW Murray Above,"MULTIPOLYGON (((16399252.466 -4296082.394, 164...",POINT (16476974.939 -4310406.529),5.774198e+09,9566.431708
3733,147.8957,-36.0424,82171,,POINT (16463674.015 -4306457.116),NSW Murray Above,"MULTIPOLYGON (((16399252.466 -4296082.394, 164...",POINT (16476974.939 -4310406.529),5.774198e+09,13874.885895


In [44]:
points_with_regions_full.groupby("RegionName")["region_area"].sum()

RegionName
NSW Barwon Darling           8.798566e+13
NSW Border Rivers            4.605555e+12
NSW Gwydir                   5.293958e+12
NSW Lachlan                  5.654498e+13
NSW Lower Darling            1.628484e+13
NSW Macquarie-Castlereagh    4.620823e+13
NSW Murray Above             3.175809e+11
NSW Murrumbidgee             7.502593e+13
NSW Namoi                    2.114576e+13
QLD Border Rivers            4.140163e+12
QLD Condamine-Balonne        9.371014e+13
QLD Moonie                   7.906842e+11
Name: region_area, dtype: float64

In [45]:
number = points_with_regions_full.groupby('RegionName')["Site"].count().reset_index().rename(columns = {"Site" : "number_of_stations"})
number

Unnamed: 0,RegionName,number_of_stations
0,NSW Barwon Darling,408
1,NSW Border Rivers,194
2,NSW Gwydir,173
3,NSW Lachlan,465
4,NSW Lower Darling,142
5,NSW Macquarie-Castlereagh,465
6,NSW Murray Above,55
7,NSW Murrumbidgee,597
8,NSW Namoi,410
9,QLD Border Rivers,149


In [46]:
points_with_regions_full = pd.merge(points_with_regions_full, points_with_regions_full.groupby('RegionName')["Site"].count().reset_index().rename(columns = {"Site" : "number_of_stations"}), on = "RegionName")
points_with_regions_full

Unnamed: 0,Lon,Lat,Site,Height (m),geometry_point,RegionName,polygon,region_centroid,region_area,distance_from_centroid_meters_m,number_of_stations
0,148.1000,-25.3333,35152,660.0,POINT (16486416.586 -2916738.876),QLD Condamine-Balonne,"MULTIPOLYGON (((16312710.747 -3153353.267, 163...",POINT (16565392.281 -3164332.02),1.494580e+11,259883.676399,627
1,147.9167,-25.1667,35163,,POINT (16466011.724 -2896233.894),QLD Condamine-Balonne,"MULTIPOLYGON (((16312710.747 -3153353.267, 163...",POINT (16565392.281 -3164332.02),1.494580e+11,285924.990785,627
2,148.2667,-25.5333,35245,460.0,POINT (16504973.546 -2941392.031),QLD Condamine-Balonne,"MULTIPOLYGON (((16312710.747 -3153353.267, 163...",POINT (16565392.281 -3164332.02),1.494580e+11,230981.953064,627
3,148.2025,-25.2444,35296,540.0,POINT (16497826.834 -2905793.644),QLD Condamine-Balonne,"MULTIPOLYGON (((16312710.747 -3153353.267, 163...",POINT (16565392.281 -3164332.02),1.494580e+11,267221.222803,627
4,152.4775,-28.2367,40030,780.0,POINT (16973717.657 -3278849.153),QLD Condamine-Balonne,"MULTIPOLYGON (((16312710.747 -3153353.267, 163...",POINT (16565392.281 -3164332.02),1.494580e+11,424079.929941,627
...,...,...,...,...,...,...,...,...,...,...,...
3730,147.9389,-36.0508,82062,250.0,POINT (16468483.017 -4307613.627),NSW Murray Above,"MULTIPOLYGON (((16399252.466 -4296082.394, 164...",POINT (16476974.939 -4310406.529),5.774198e+09,8939.410047,55
3731,148.0500,-36.3000,82075,,POINT (16480850.612 -4341979.816),NSW Murray Above,"MULTIPOLYGON (((16399252.466 -4296082.394, 164...",POINT (16476974.939 -4310406.529),5.774198e+09,31810.270154,55
3732,147.9333,-36.0500,82123,,POINT (16467859.627 -4307503.478),NSW Murray Above,"MULTIPOLYGON (((16399252.466 -4296082.394, 164...",POINT (16476974.939 -4310406.529),5.774198e+09,9566.431708,55
3733,147.8957,-36.0424,82171,,POINT (16463674.015 -4306457.116),NSW Murray Above,"MULTIPOLYGON (((16399252.466 -4296082.394, 164...",POINT (16476974.939 -4310406.529),5.774198e+09,13874.885895,55


In [47]:
points_with_regions_full["stations_per_area"] = points_with_regions_full["region_area"] / points_with_regions_full["number_of_stations"]
points_with_regions_full

Unnamed: 0,Lon,Lat,Site,Height (m),geometry_point,RegionName,polygon,region_centroid,region_area,distance_from_centroid_meters_m,number_of_stations,stations_per_area
0,148.1000,-25.3333,35152,660.0,POINT (16486416.586 -2916738.876),QLD Condamine-Balonne,"MULTIPOLYGON (((16312710.747 -3153353.267, 163...",POINT (16565392.281 -3164332.02),1.494580e+11,259883.676399,627,2.383699e+08
1,147.9167,-25.1667,35163,,POINT (16466011.724 -2896233.894),QLD Condamine-Balonne,"MULTIPOLYGON (((16312710.747 -3153353.267, 163...",POINT (16565392.281 -3164332.02),1.494580e+11,285924.990785,627,2.383699e+08
2,148.2667,-25.5333,35245,460.0,POINT (16504973.546 -2941392.031),QLD Condamine-Balonne,"MULTIPOLYGON (((16312710.747 -3153353.267, 163...",POINT (16565392.281 -3164332.02),1.494580e+11,230981.953064,627,2.383699e+08
3,148.2025,-25.2444,35296,540.0,POINT (16497826.834 -2905793.644),QLD Condamine-Balonne,"MULTIPOLYGON (((16312710.747 -3153353.267, 163...",POINT (16565392.281 -3164332.02),1.494580e+11,267221.222803,627,2.383699e+08
4,152.4775,-28.2367,40030,780.0,POINT (16973717.657 -3278849.153),QLD Condamine-Balonne,"MULTIPOLYGON (((16312710.747 -3153353.267, 163...",POINT (16565392.281 -3164332.02),1.494580e+11,424079.929941,627,2.383699e+08
...,...,...,...,...,...,...,...,...,...,...,...,...
3730,147.9389,-36.0508,82062,250.0,POINT (16468483.017 -4307613.627),NSW Murray Above,"MULTIPOLYGON (((16399252.466 -4296082.394, 164...",POINT (16476974.939 -4310406.529),5.774198e+09,8939.410047,55,1.049854e+08
3731,148.0500,-36.3000,82075,,POINT (16480850.612 -4341979.816),NSW Murray Above,"MULTIPOLYGON (((16399252.466 -4296082.394, 164...",POINT (16476974.939 -4310406.529),5.774198e+09,31810.270154,55,1.049854e+08
3732,147.9333,-36.0500,82123,,POINT (16467859.627 -4307503.478),NSW Murray Above,"MULTIPOLYGON (((16399252.466 -4296082.394, 164...",POINT (16476974.939 -4310406.529),5.774198e+09,9566.431708,55,1.049854e+08
3733,147.8957,-36.0424,82171,,POINT (16463674.015 -4306457.116),NSW Murray Above,"MULTIPOLYGON (((16399252.466 -4296082.394, 164...",POINT (16476974.939 -4310406.529),5.774198e+09,13874.885895,55,1.049854e+08


In [48]:
points_with_regions_full.groupby("RegionName").aggregate({"stations_per_area": 'first'})

Unnamed: 0_level_0,stations_per_area
RegionName,Unnamed: 1_level_1
NSW Barwon Darling,528556700.0
NSW Border Rivers,122371000.0
NSW Gwydir,176883900.0
NSW Lachlan,261509900.0
NSW Lower Darling,807619400.0
NSW Macquarie-Castlereagh,213704400.0
NSW Murray Above,104985400.0
NSW Murrumbidgee,210505100.0
NSW Namoi,125792800.0
QLD Border Rivers,186485400.0


In [49]:
points_with_regions_full.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 3735 entries, 0 to 3734
Data columns (total 12 columns):
 #   Column                           Non-Null Count  Dtype   
---  ------                           --------------  -----   
 0   Lon                              3735 non-null   float64 
 1   Lat                              3735 non-null   float64 
 2   Site                             3735 non-null   int64   
 3   Height (m)                       2791 non-null   object  
 4   geometry_point                   3735 non-null   geometry
 5   RegionName                       3735 non-null   object  
 6   polygon                          3735 non-null   geometry
 7   region_centroid                  3735 non-null   geometry
 8   region_area                      3735 non-null   float64 
 9   distance_from_centroid_meters_m  3735 non-null   float64 
 10  number_of_stations               3735 non-null   int64   
 11  stations_per_area                3735 non-null   float64 
dty

Expanding polygons to address border points

In [50]:
results = []

In [51]:
for region_name, group in points_with_regions_full.groupby('RegionName'):
    poly = group.iloc[0].polygon  # polygon for this region
    
    if len(group) < 3:
        continue  # Skip if fewer than 3 points
    
    coords = np.array([[pt.x, pt.y] for pt in group.geometry_point])
    
    vor = Voronoi(coords)
    
    for point_idx, region_idx in enumerate(vor.point_region):
        region = vor.regions[region_idx]
        if -1 in region or len(region) == 0:
            continue
        
        vor_poly = Polygon([vor.vertices[i] for i in region])
        clipped_poly = vor_poly.intersection(poly)
        
        results.append({
            'Site': group.iloc[point_idx].Site,
            'Voronoi': clipped_poly
        })

In [52]:
gdf_voronoi = gpd.GeoDataFrame(results, geometry='Voronoi', crs = points_with_regions_full["polygon"].crs)
gdf_voronoi

Unnamed: 0,Site,Voronoi
0,46004,"POLYGON ((15957911.756 -3726173.932, 15965805...."
1,46008,"POLYGON ((15907236.816 -3667344.537, 15907333...."
2,46012,"POLYGON ((15952333.176 -3694510.032, 15971115...."
3,46017,"POLYGON ((15914395.284 -3674721.738, 15940852...."
4,46022,"POLYGON ((15979912.903 -3677389.391, 16001938...."
...,...,...
3561,42121,"POLYGON ((16598414.293 -3289735.712, 16590765...."
3562,43045,"POLYGON ((16599459.164 -3261612.323, 16599909...."
3563,43089,"POLYGON ((16629084.083 -3230293.245, 16627922...."
3564,43097,"POLYGON ((16641461.603 -3217128.891, 16642561...."


In [53]:
points_with_regions_full = points_with_regions_full.merge(gdf_voronoi, on = "Site", how = "left")
points_with_regions_full.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 3735 entries, 0 to 3734
Data columns (total 13 columns):
 #   Column                           Non-Null Count  Dtype   
---  ------                           --------------  -----   
 0   Lon                              3735 non-null   float64 
 1   Lat                              3735 non-null   float64 
 2   Site                             3735 non-null   int64   
 3   Height (m)                       2791 non-null   object  
 4   geometry_point                   3735 non-null   geometry
 5   RegionName                       3735 non-null   object  
 6   polygon                          3735 non-null   geometry
 7   region_centroid                  3735 non-null   geometry
 8   region_area                      3735 non-null   float64 
 9   distance_from_centroid_meters_m  3735 non-null   float64 
 10  number_of_stations               3735 non-null   int64   
 11  stations_per_area                3735 non-null   float64 
 12

Missing Voronoi

In [54]:
points_with_regions_full[points_with_regions_full['Voronoi'].isna()].groupby("RegionName")["Site"].count() / points_with_regions_full.groupby("RegionName")["number_of_stations"].aggregate("first")

RegionName
NSW Barwon Darling           0.026961
NSW Border Rivers            0.097938
NSW Gwydir                   0.075145
NSW Lachlan                  0.034409
NSW Lower Darling            0.126761
NSW Macquarie-Castlereagh    0.043011
NSW Murray Above             0.145455
NSW Murrumbidgee             0.028476
NSW Namoi                    0.034146
QLD Border Rivers            0.080537
QLD Condamine-Balonne        0.023923
QLD Moonie                   0.120000
dtype: float64

In [55]:
vor_areas = pd.DataFrame({"RegionName" : points_with_regions_full["RegionName"], "Vor Area" : points_with_regions_full['Voronoi'].area}).groupby("RegionName").mean()

In [56]:
vor_areas

Unnamed: 0_level_0,Vor Area
RegionName,Unnamed: 1_level_1
NSW Barwon Darling,534639400.0
NSW Border Rivers,128725800.0
NSW Gwydir,184123100.0
NSW Lachlan,268321700.0
NSW Lower Darling,826503700.0
NSW Macquarie-Castlereagh,220312000.0
NSW Murray Above,108884300.0
NSW Murrumbidgee,215548200.0
NSW Namoi,121060500.0
QLD Border Rivers,180834100.0


In [57]:
mapping_areas = list(vor_areas.to_dict().values())[0]
mapping_areas

{'NSW Barwon Darling': 534639394.02911943,
 'NSW Border Rivers': 128725782.5389003,
 'NSW Gwydir': 184123114.26877087,
 'NSW Lachlan': 268321743.47466582,
 'NSW Lower Darling': 826503686.2609588,
 'NSW Macquarie-Castlereagh': 220311960.85637632,
 'NSW Murray Above': 108884333.75164682,
 'NSW Murrumbidgee': 215548209.01556858,
 'NSW Namoi': 121060479.63599645,
 'QLD Border Rivers': 180834123.04191157,
 'QLD Condamine-Balonne': 233985451.99792665,
 'QLD Moonie': 325913224.58657646}

In [58]:
points_with_regions_full['Missing_Voronoi'] = points_with_regions_full['Voronoi'].isna()
points_with_regions_full

Unnamed: 0,Lon,Lat,Site,Height (m),geometry_point,RegionName,polygon,region_centroid,region_area,distance_from_centroid_meters_m,number_of_stations,stations_per_area,Voronoi,Missing_Voronoi
0,148.1000,-25.3333,35152,660.0,POINT (16486416.586 -2916738.876),QLD Condamine-Balonne,"MULTIPOLYGON (((16312710.747 -3153353.267, 163...",POINT (16565392.281 -3164332.02),1.494580e+11,259883.676399,627,2.383699e+08,"POLYGON ((16482007.003 -2900721.823, 16503534....",False
1,147.9167,-25.1667,35163,,POINT (16466011.724 -2896233.894),QLD Condamine-Balonne,"MULTIPOLYGON (((16312710.747 -3153353.267, 163...",POINT (16565392.281 -3164332.02),1.494580e+11,285924.990785,627,2.383699e+08,,True
2,148.2667,-25.5333,35245,460.0,POINT (16504973.546 -2941392.031),QLD Condamine-Balonne,"MULTIPOLYGON (((16312710.747 -3153353.267, 163...",POINT (16565392.281 -3164332.02),1.494580e+11,230981.953064,627,2.383699e+08,"POLYGON ((16489637.251 -2933625.301, 16503534....",False
3,148.2025,-25.2444,35296,540.0,POINT (16497826.834 -2905793.644),QLD Condamine-Balonne,"MULTIPOLYGON (((16312710.747 -3153353.267, 163...",POINT (16565392.281 -3164332.02),1.494580e+11,267221.222803,627,2.383699e+08,,True
4,152.4775,-28.2367,40030,780.0,POINT (16973717.657 -3278849.153),QLD Condamine-Balonne,"MULTIPOLYGON (((16312710.747 -3153353.267, 163...",POINT (16565392.281 -3164332.02),1.494580e+11,424079.929941,627,2.383699e+08,"POLYGON ((16969277.637 -3280413.955, 16972771....",False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3730,147.9389,-36.0508,82062,250.0,POINT (16468483.017 -4307613.627),NSW Murray Above,"MULTIPOLYGON (((16399252.466 -4296082.394, 164...",POINT (16476974.939 -4310406.529),5.774198e+09,8939.410047,55,1.049854e+08,"POLYGON ((16470179.283 -4296194.502, 16483715....",False
3731,148.0500,-36.3000,82075,,POINT (16480850.612 -4341979.816),NSW Murray Above,"MULTIPOLYGON (((16399252.466 -4296082.394, 164...",POINT (16476974.939 -4310406.529),5.774198e+09,31810.270154,55,1.049854e+08,"POLYGON ((16485487.069 -4338748.535, 16485487....",False
3732,147.9333,-36.0500,82123,,POINT (16467859.627 -4307503.478),NSW Murray Above,"MULTIPOLYGON (((16399252.466 -4296082.394, 164...",POINT (16476974.939 -4310406.529),5.774198e+09,9566.431708,55,1.049854e+08,"POLYGON ((16468856.751 -4294620.083, 16470179....",False
3733,147.8957,-36.0424,82171,,POINT (16463674.015 -4306457.116),NSW Murray Above,"MULTIPOLYGON (((16399252.466 -4296082.394, 164...",POINT (16476974.939 -4310406.529),5.774198e+09,13874.885895,55,1.049854e+08,"POLYGON ((16467498.74 -4294852.499, 16468666.9...",False


In [59]:
points_with_regions_full["vor_area"] = points_with_regions_full["Voronoi"].area
points_with_regions_full

Unnamed: 0,Lon,Lat,Site,Height (m),geometry_point,RegionName,polygon,region_centroid,region_area,distance_from_centroid_meters_m,number_of_stations,stations_per_area,Voronoi,Missing_Voronoi,vor_area
0,148.1000,-25.3333,35152,660.0,POINT (16486416.586 -2916738.876),QLD Condamine-Balonne,"MULTIPOLYGON (((16312710.747 -3153353.267, 163...",POINT (16565392.281 -3164332.02),1.494580e+11,259883.676399,627,2.383699e+08,"POLYGON ((16482007.003 -2900721.823, 16503534....",False,6.363978e+08
1,147.9167,-25.1667,35163,,POINT (16466011.724 -2896233.894),QLD Condamine-Balonne,"MULTIPOLYGON (((16312710.747 -3153353.267, 163...",POINT (16565392.281 -3164332.02),1.494580e+11,285924.990785,627,2.383699e+08,,True,
2,148.2667,-25.5333,35245,460.0,POINT (16504973.546 -2941392.031),QLD Condamine-Balonne,"MULTIPOLYGON (((16312710.747 -3153353.267, 163...",POINT (16565392.281 -3164332.02),1.494580e+11,230981.953064,627,2.383699e+08,"POLYGON ((16489637.251 -2933625.301, 16503534....",False,1.263409e+09
3,148.2025,-25.2444,35296,540.0,POINT (16497826.834 -2905793.644),QLD Condamine-Balonne,"MULTIPOLYGON (((16312710.747 -3153353.267, 163...",POINT (16565392.281 -3164332.02),1.494580e+11,267221.222803,627,2.383699e+08,,True,
4,152.4775,-28.2367,40030,780.0,POINT (16973717.657 -3278849.153),QLD Condamine-Balonne,"MULTIPOLYGON (((16312710.747 -3153353.267, 163...",POINT (16565392.281 -3164332.02),1.494580e+11,424079.929941,627,2.383699e+08,"POLYGON ((16969277.637 -3280413.955, 16972771....",False,4.228012e+07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3730,147.9389,-36.0508,82062,250.0,POINT (16468483.017 -4307613.627),NSW Murray Above,"MULTIPOLYGON (((16399252.466 -4296082.394, 164...",POINT (16476974.939 -4310406.529),5.774198e+09,8939.410047,55,1.049854e+08,"POLYGON ((16470179.283 -4296194.502, 16483715....",False,2.248445e+08
3731,148.0500,-36.3000,82075,,POINT (16480850.612 -4341979.816),NSW Murray Above,"MULTIPOLYGON (((16399252.466 -4296082.394, 164...",POINT (16476974.939 -4310406.529),5.774198e+09,31810.270154,55,1.049854e+08,"POLYGON ((16485487.069 -4338748.535, 16485487....",False,5.977992e+07
3732,147.9333,-36.0500,82123,,POINT (16467859.627 -4307503.478),NSW Murray Above,"MULTIPOLYGON (((16399252.466 -4296082.394, 164...",POINT (16476974.939 -4310406.529),5.774198e+09,9566.431708,55,1.049854e+08,"POLYGON ((16468856.751 -4294620.083, 16470179....",False,4.331613e+07
3733,147.8957,-36.0424,82171,,POINT (16463674.015 -4306457.116),NSW Murray Above,"MULTIPOLYGON (((16399252.466 -4296082.394, 164...",POINT (16476974.939 -4310406.529),5.774198e+09,13874.885895,55,1.049854e+08,"POLYGON ((16467498.74 -4294852.499, 16468666.9...",False,8.452069e+07


Replacing missing voronoi area with average area in the region

In [None]:
points_with_regions_full['vor_area'] = points_with_regions_full.apply(
    lambda row: mapping_areas[row['RegionName']] if pd.isna(row['vor_area']) else row['vor_area'],
    axis = 1
)

In [297]:
points_with_regions_full[points_with_regions_full["Missing_Voronoi"] == 1]

Unnamed: 0,Lon,Lat,Site,Height (m),geometry_point,RegionName,polygon,region_centroid,region_area,distance_from_centroid_meters_m,number_of_stations,stations_per_area,Voronoi,Missing_Voronoi,vor_area
1,147.9167,-25.1667,35163,,POINT (16466011.724 -2896233.894),QLD Condamine-Balonne,"MULTIPOLYGON (((16312710.747 -3153353.267, 163...",POINT (16565392.281 -3164332.02),1.494580e+11,285924.990785,627,2.383699e+08,,True,2.339855e+08
3,148.2025,-25.2444,35296,540.0,POINT (16497826.834 -2905793.644),QLD Condamine-Balonne,"MULTIPOLYGON (((16312710.747 -3153353.267, 163...",POINT (16565392.281 -3164332.02),1.494580e+11,267221.222803,627,2.383699e+08,,True,2.339855e+08
5,151.1922,-26.3828,40071,365.0,POINT (16830638.716 -3046570.132),QLD Condamine-Balonne,"MULTIPOLYGON (((16312710.747 -3153353.267, 163...",POINT (16565392.281 -3164332.02),1.494580e+11,290212.910980,627,2.383699e+08,,True,2.339855e+08
6,152.0542,-27.3042,40170,667.0,POINT (16926596.117 -3161529.198),QLD Condamine-Balonne,"MULTIPOLYGON (((16312710.747 -3153353.267, 163...",POINT (16565392.281 -3164332.02),1.494580e+11,361214.710569,627,2.383699e+08,,True,2.339855e+08
14,152.5232,-28.2490,40485,637.0,POINT (16978804.958 -3280403.419),QLD Condamine-Balonne,"MULTIPOLYGON (((16312710.747 -3153353.267, 163...",POINT (16565392.281 -3164332.02),1.494580e+11,429397.963833,627,2.383699e+08,,True,2.339855e+08
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3701,148.2106,-35.8445,72103,800.0,POINT (16498728.522 -4279245.853),NSW Murray Above,"MULTIPOLYGON (((16399252.466 -4296082.394, 164...",POINT (16476974.939 -4310406.529),5.774198e+09,38002.711495,55,1.049854e+08,,True,1.088843e+08
3704,148.3667,-36.2333,72112,1676.4,POINT (16516105.495 -4332770.76),NSW Murray Above,"MULTIPOLYGON (((16399252.466 -4296082.394, 164...",POINT (16476974.939 -4310406.529),5.774198e+09,45070.602272,55,1.049854e+08,,True,1.088843e+08
3714,148.1333,-36.5333,72125,518.2,POINT (16490123.526 -4374252.944),NSW Murray Above,"MULTIPOLYGON (((16399252.466 -4296082.394, 164...",POINT (16476974.939 -4310406.529),5.774198e+09,65186.272452,55,1.049854e+08,,True,1.088843e+08
3727,147.5075,-35.9532,72167,198.0,POINT (16420459.788 -4294183.674),NSW Murray Above,"MULTIPOLYGON (((16399252.466 -4296082.394, 164...",POINT (16476974.939 -4310406.529),5.774198e+09,58797.477241,55,1.049854e+08,,True,1.088843e+08


In [61]:
points_with_regions_full.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 3735 entries, 0 to 3734
Data columns (total 15 columns):
 #   Column                           Non-Null Count  Dtype   
---  ------                           --------------  -----   
 0   Lon                              3735 non-null   float64 
 1   Lat                              3735 non-null   float64 
 2   Site                             3735 non-null   int64   
 3   Height (m)                       2791 non-null   object  
 4   geometry_point                   3735 non-null   geometry
 5   RegionName                       3735 non-null   object  
 6   polygon                          3735 non-null   geometry
 7   region_centroid                  3735 non-null   geometry
 8   region_area                      3735 non-null   float64 
 9   distance_from_centroid_meters_m  3735 non-null   float64 
 10  number_of_stations               3735 non-null   int64   
 11  stations_per_area                3735 non-null   float64 
 12

In [299]:
points_with_regions_full.to_csv("site_geometry.csv", index = False)

In [62]:
points_with_regions_full = pd.read_csv("site_geometry.csv", index_col=False)
points_with_regions_full

Unnamed: 0,Lon,Lat,Site,Height (m),geometry_point,RegionName,polygon,region_centroid,region_area,distance_from_centroid_meters_m,number_of_stations,stations_per_area,Voronoi,Missing_Voronoi,vor_area
0,148.1000,-25.3333,35152,660.0,POINT (16486416.586483816 -2916738.8764374424),QLD Condamine-Balonne,MULTIPOLYGON (((16312710.746744394 -3153353.26...,POINT (16565392.28073292 -3164332.020479314),1.494580e+11,259883.676399,627,2.383699e+08,POLYGON ((16482007.002564713 -2900721.82263475...,False,6.363978e+08
1,147.9167,-25.1667,35163,,POINT (16466011.723821407 -2896233.8942950936),QLD Condamine-Balonne,MULTIPOLYGON (((16312710.746744394 -3153353.26...,POINT (16565392.28073292 -3164332.020479314),1.494580e+11,285924.990785,627,2.383699e+08,,True,2.339855e+08
2,148.2667,-25.5333,35245,460.0,POINT (16504973.545599053 -2941392.0306032943),QLD Condamine-Balonne,MULTIPOLYGON (((16312710.746744394 -3153353.26...,POINT (16565392.28073292 -3164332.020479314),1.494580e+11,230981.953064,627,2.383699e+08,"POLYGON ((16489637.250531323 -2933625.3014811,...",False,1.263409e+09
3,148.2025,-25.2444,35296,540.0,POINT (16497826.834290124 -2905793.6435146336),QLD Condamine-Balonne,MULTIPOLYGON (((16312710.746744394 -3153353.26...,POINT (16565392.28073292 -3164332.020479314),1.494580e+11,267221.222803,627,2.383699e+08,,True,2.339855e+08
4,152.4775,-28.2367,40030,780.0,POINT (16973717.65743137 -3278849.153358936),QLD Condamine-Balonne,MULTIPOLYGON (((16312710.746744394 -3153353.26...,POINT (16565392.28073292 -3164332.020479314),1.494580e+11,424079.929941,627,2.383699e+08,POLYGON ((16969277.637199324 -3280413.95538110...,False,4.228012e+07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3730,147.7366,-35.9631,82052,215.0,POINT (16445963.083529541 -4295545.1761145815),NSW Murray Above,MULTIPOLYGON (((16399252.465663942 -4296082.39...,POINT (16476974.939223811 -4310406.528816415),5.774198e+09,34388.878983,55,1.049854e+08,MULTIPOLYGON (((16435875.071669418 -4299303.11...,False,7.068900e+07
3731,147.9389,-36.0508,82062,250.0,POINT (16468483.016517017 -4307613.626969586),NSW Murray Above,MULTIPOLYGON (((16399252.465663942 -4296082.39...,POINT (16476974.939223811 -4310406.528816415),5.774198e+09,8939.410047,55,1.049854e+08,POLYGON ((16470179.282985391 -4296194.50192736...,False,2.248445e+08
3732,148.0500,-36.3000,82075,,POINT (16480850.611944152 -4341979.815756792),NSW Murray Above,MULTIPOLYGON (((16399252.465663942 -4296082.39...,POINT (16476974.939223811 -4310406.528816415),5.774198e+09,31810.270154,55,1.049854e+08,POLYGON ((16485487.068735693 -4338748.53488402...,False,5.977992e+07
3733,147.9333,-36.0500,82123,,POINT (16467859.627368577 -4307503.477764327),NSW Murray Above,MULTIPOLYGON (((16399252.465663942 -4296082.39...,POINT (16476974.939223811 -4310406.528816415),5.774198e+09,9566.431708,55,1.049854e+08,POLYGON ((16468856.75148963 -4294620.083498342...,False,4.331613e+07


### Getting data

In [26]:
base_url = "https://www.longpaddock.qld.gov.au/cgi-bin/silo/PatchedPointDataset.php"

In [None]:
def load_data(region):
    os.mkdir(region)
    counter = 0
    for id in points_with_regions[points_with_regions["RegionName"] == region]["Site"]:
        params = {
            'station': id,
            'start': '20060101',
            'finish': '20211231',
            'format': 'csv',
            'username': 'ereb.molod@gmail.com',
            'comment': 'rxnjhgf', # daily rain, max temp, min temp, radiation, rel humidity at max temp, rel humidity at min temp, FAO56 short crop evapotranspiration
        }
        
        response = requests.get(base_url, params = params)

        if "Invalid station number." in response.text:
            continue
        elif response.status_code == 200:
            with open(region+"/"+"station_"+str(id)+"_data.csv", "wb") as file: # Save to csv
                file.write(response.content)
            counter += 1
        else:
            with open(region+"/"+"station_"+str(id)+"_error.csv", "wb") as file: # Errors
                file.write(response.status_code)
            counter += 1

In [33]:
for reg in points_with_regions.RegionName.unique():
    print(reg)

QLD Condamine-Balonne
QLD Moonie
QLD Border Rivers
NSW Border Rivers
NSW Barwon Darling
NSW Lower Darling
NSW Murrumbidgee
NSW Macquarie-Castlereagh
NSW Lachlan
NSW Namoi
NSW Gwydir
NSW Murray Above


In [34]:
load_data("QLD Moonie")

Downlaoded 16 files


In [35]:
load_data("QLD Condamine-Balonne")

Downlaoded 265 files


In [36]:
load_data("QLD Border Rivers")

Downlaoded 62 files


In [37]:
load_data("NSW Border Rivers")

Downlaoded 62 files


In [38]:
load_data("NSW Barwon Darling")

Downlaoded 172 files


In [39]:
load_data("NSW Lower Darling")

Downlaoded 70 files


In [40]:
load_data("NSW Murrumbidgee")

Downlaoded 279 files


In [41]:
load_data("NSW Macquarie-Castlereagh")

Downlaoded 180 files


In [42]:
load_data("NSW Lachlan")

Downlaoded 197 files


In [43]:
load_data("NSW Namoi")

Downlaoded 161 files


In [44]:
load_data("NSW Gwydir")

Downlaoded 61 files


In [45]:
load_data("NSW Murray Above")

Downlaoded 13 files


In [46]:
for station in os.listdir("NSW Murray Above"):
    print(station.split("_")[1])

71003
72009
72010
72025
72030
72033
72038
72053
72060
72065
72162
82007
82052


### Creating with dataframes

In [65]:
def create_frame(region):
    frame = pd.DataFrame()
    for station in os.listdir(region):
        csv = pd.read_csv(region+"/"+station)
        frame = pd.concat([frame, csv], axis = 0)
    frame.drop(["metadata", "daily_rain_source", "max_temp_source", "min_temp_source", "radiation_source", "rh_tmax_source", "rh_tmin_source", "et_short_crop_source"], axis = 1, inplace = True)
    frame["Date"] = pd.to_datetime(frame["YYYY-MM-DD"])
    frame["Year"] = frame["Date"].dt.year
    frame["Month"] = frame["Date"].dt.month
    frame["Region"] = pd.Series([region]*len(frame))
    return frame

In [66]:
def group_frame(frame):
    new_frame_grouped_1 = frame.groupby("Year")[["daily_rain", "max_temp", "min_temp", "radiation"]].mean().reset_index().rename({"daily_rain" : "mean_daily_rain", 
                                                                                                                                "max_temp" : "mean_max_temp", 
                                                                                                                                "min_temp" : "mean_min_temp",
                                                                                                                                "radiation" : "mean_radiation"}, axis = 1)
    
    new_frame_grouped_2 = frame.groupby("Year")[["daily_rain", "max_temp", "min_temp", "radiation"]].median().reset_index().rename({"daily_rain" : "median_daily_rain", 
                                                                                                                                "max_temp" : "median_max_temp", 
                                                                                                                                "min_temp" : "median_min_temp",
                                                                                                                                "radiation" : "median_radiation"}, axis = 1).drop(["Year"], axis = 1)
    new_frame = pd.concat([new_frame_grouped_1, new_frame_grouped_2], axis = 1)
    new_frame["Region"] = pd.Series([frame.iloc[0].Region]*len(new_frame))

    return new_frame

In [27]:
dataframe = pd.DataFrame()

In [28]:
for reg in points_with_regions.RegionName.unique():
    region_frame = create_frame(reg)
    region_frame_groupped = group_frame(region_frame)
    dataframe = pd.concat([dataframe, region_frame_groupped], axis = 0)

In [29]:
dataframe.info()

<class 'pandas.core.frame.DataFrame'>
Index: 192 entries, 0 to 15
Data columns (total 10 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Year               192 non-null    int32  
 1   mean_daily_rain    192 non-null    float64
 2   mean_max_temp      192 non-null    float64
 3   mean_min_temp      192 non-null    float64
 4   mean_radiation     192 non-null    float64
 5   median_daily_rain  192 non-null    float64
 6   median_max_temp    192 non-null    float64
 7   median_min_temp    192 non-null    float64
 8   median_radiation   192 non-null    float64
 9   Region             192 non-null    object 
dtypes: float64(8), int32(1), object(1)
memory usage: 15.8+ KB


In [30]:
dataframe.head()

Unnamed: 0,Year,mean_daily_rain,mean_max_temp,mean_min_temp,mean_radiation,median_daily_rain,median_max_temp,median_min_temp,median_radiation,Region
0,2006,1.076766,27.018827,12.158717,20.367608,0.0,27.0,12.7,19.9,QLD Condamine-Balonne
1,2007,1.572086,26.404323,12.632449,19.360575,0.0,27.2,13.4,19.5,QLD Condamine-Balonne
2,2008,1.760073,25.602605,11.457728,19.161984,0.0,25.7,12.1,18.5,QLD Condamine-Balonne
3,2009,1.322562,27.276941,12.444979,19.603434,0.0,27.3,13.2,18.9,QLD Condamine-Balonne
4,2010,2.900269,24.591251,12.569008,17.408252,0.0,24.7,13.6,17.3,QLD Condamine-Balonne


In [31]:
dataframe = dataframe[dataframe["Year"] != 2021]
dataframe.info()

<class 'pandas.core.frame.DataFrame'>
Index: 180 entries, 0 to 14
Data columns (total 10 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Year               180 non-null    int32  
 1   mean_daily_rain    180 non-null    float64
 2   mean_max_temp      180 non-null    float64
 3   mean_min_temp      180 non-null    float64
 4   mean_radiation     180 non-null    float64
 5   median_daily_rain  180 non-null    float64
 6   median_max_temp    180 non-null    float64
 7   median_min_temp    180 non-null    float64
 8   median_radiation   180 non-null    float64
 9   Region             180 non-null    object 
dtypes: float64(8), int32(1), object(1)
memory usage: 14.8+ KB


In [32]:
dataframe.groupby('Region').count()

Unnamed: 0_level_0,Year,mean_daily_rain,mean_max_temp,mean_min_temp,mean_radiation,median_daily_rain,median_max_temp,median_min_temp,median_radiation
Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
NSW Barwon Darling,15,15,15,15,15,15,15,15,15
NSW Border Rivers,15,15,15,15,15,15,15,15,15
NSW Gwydir,15,15,15,15,15,15,15,15,15
NSW Lachlan,15,15,15,15,15,15,15,15,15
NSW Lower Darling,15,15,15,15,15,15,15,15,15
NSW Macquarie-Castlereagh,15,15,15,15,15,15,15,15,15
NSW Murray Above,15,15,15,15,15,15,15,15,15
NSW Murrumbidgee,15,15,15,15,15,15,15,15,15
NSW Namoi,15,15,15,15,15,15,15,15,15
QLD Border Rivers,15,15,15,15,15,15,15,15,15


In [36]:
dataframe.to_csv("climate_data_processed.csv", index = False)

In [34]:
dataframe.Year.unique()

array([2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016,
       2017, 2018, 2019, 2020])

### Creating dataframe with per day

In [63]:
dailyframe = pd.DataFrame()

In [None]:
for reg in points_with_regions.RegionName.unique():
    region_frame = create_frame(reg)
    dailyframe = pd.concat([dailyframe, region_frame], axis = 0)

In [35]:
dailyframe.info()

<class 'pandas.core.frame.DataFrame'>
Index: 8988072 entries, 0 to 5843
Data columns (total 13 columns):
 #   Column         Dtype         
---  ------         -----         
 0   station        int64         
 1   YYYY-MM-DD     object        
 2   daily_rain     float64       
 3   max_temp       float64       
 4   min_temp       float64       
 5   radiation      float64       
 6   rh_tmax        float64       
 7   rh_tmin        float64       
 8   et_short_crop  float64       
 9   Date           datetime64[ns]
 10  Year           int32         
 11  Month          int32         
 12  Region         object        
dtypes: datetime64[ns](1), float64(7), int32(2), int64(1), object(2)
memory usage: 891.5+ MB


In [36]:
dailyframe.groupby('Region').count()

Unnamed: 0_level_0,station,YYYY-MM-DD,daily_rain,max_temp,min_temp,radiation,rh_tmax,rh_tmin,et_short_crop,Date,Year,Month
Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
NSW Barwon Darling,1005168,1005168,1005168,1005168,1005168,1005168,1005168,1005168,1005168,1005168,1005168,1005168
NSW Border Rivers,362328,362328,362328,362328,362328,362328,362328,362328,362328,362328,362328,362328
NSW Gwydir,356484,356484,356484,356484,356484,356484,356484,356484,356484,356484,356484,356484
NSW Lachlan,1151268,1151268,1151268,1151268,1151268,1151268,1151268,1151268,1151268,1151268,1151268,1151268
NSW Lower Darling,409080,409080,409080,409080,409080,409080,409080,409080,409080,409080,409080,409080
NSW Macquarie-Castlereagh,1051920,1051920,1051920,1051920,1051920,1051920,1051920,1051920,1051920,1051920,1051920,1051920
NSW Murray Above,75972,75972,75972,75972,75972,75972,75972,75972,75972,75972,75972,75972
NSW Murrumbidgee,1630476,1630476,1630476,1630476,1630476,1630476,1630476,1630476,1630476,1630476,1630476,1630476
NSW Namoi,940884,940884,940884,940884,940884,940884,940884,940884,940884,940884,940884,940884
QLD Border Rivers,362328,362328,362328,362328,362328,362328,362328,362328,362328,362328,362328,362328


In [37]:
dailyframe.reset_index(drop = True, inplace = True)
dailyframe.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8988072 entries, 0 to 8988071
Data columns (total 13 columns):
 #   Column         Dtype         
---  ------         -----         
 0   station        int64         
 1   YYYY-MM-DD     object        
 2   daily_rain     float64       
 3   max_temp       float64       
 4   min_temp       float64       
 5   radiation      float64       
 6   rh_tmax        float64       
 7   rh_tmin        float64       
 8   et_short_crop  float64       
 9   Date           datetime64[ns]
 10  Year           int32         
 11  Month          int32         
 12  Region         object        
dtypes: datetime64[ns](1), float64(7), int32(2), int64(1), object(2)
memory usage: 822.9+ MB


In [38]:
dailyframe[dailyframe.duplicated()]

Unnamed: 0,station,YYYY-MM-DD,daily_rain,max_temp,min_temp,radiation,rh_tmax,rh_tmin,et_short_crop,Date,Year,Month,Region


In [39]:
dailyframe.Region.unique()

array(['QLD Condamine-Balonne', 'QLD Moonie', 'QLD Border Rivers',
       'NSW Border Rivers', 'NSW Barwon Darling', 'NSW Lower Darling',
       'NSW Murrumbidgee', 'NSW Macquarie-Castlereagh', 'NSW Lachlan',
       'NSW Namoi', 'NSW Gwydir', 'NSW Murray Above'], dtype=object)

In [40]:
dailyframe = dailyframe[dailyframe["Year"] != 2021]

In [41]:
total = len(dailyframe)
total

8426702

In [42]:
dailyframe.drop("YYYY-MM-DD", axis = 1, inplace = True)

In [None]:
dailyframe["Day"] = dailyframe["Date"].dt.day

In [44]:
dailyframe.info()

<class 'pandas.core.frame.DataFrame'>
Index: 8426702 entries, 0 to 8987706
Data columns (total 13 columns):
 #   Column         Dtype         
---  ------         -----         
 0   station        int64         
 1   daily_rain     float64       
 2   max_temp       float64       
 3   min_temp       float64       
 4   radiation      float64       
 5   rh_tmax        float64       
 6   rh_tmin        float64       
 7   et_short_crop  float64       
 8   Date           datetime64[ns]
 9   Year           int32         
 10  Month          int32         
 11  Region         object        
 12  Day            int32         
dtypes: datetime64[ns](1), float64(7), int32(3), int64(1), object(1)
memory usage: 803.6+ MB


In [45]:
nan_count_per_column = dailyframe.isna().sum()
print((len(dailyframe) - nan_count_per_column[nan_count_per_column > 0]) / len(dailyframe))

Series([], dtype: float64)


In [46]:
dailyframe.isna().sum()

station          0
daily_rain       0
max_temp         0
min_temp         0
radiation        0
rh_tmax          0
rh_tmin          0
et_short_crop    0
Date             0
Year             0
Month            0
Region           0
Day              0
dtype: int64

In [47]:
dailyframe[dailyframe.isnull().sum(axis=1) > 0]

Unnamed: 0,station,daily_rain,max_temp,min_temp,radiation,rh_tmax,rh_tmin,et_short_crop,Date,Year,Month,Region,Day


In [48]:
dailyframe["Day"].unique()

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31])

### Create a Frame of Growing Seasons

Seasons : Planting, Growing, Flowering, Boll development & maturation, Harvest

In [49]:
dailyframe["Region"].unique()

array(['QLD Condamine-Balonne', 'QLD Moonie', 'QLD Border Rivers',
       'NSW Border Rivers', 'NSW Barwon Darling', 'NSW Lower Darling',
       'NSW Murrumbidgee', 'NSW Macquarie-Castlereagh', 'NSW Lachlan',
       'NSW Namoi', 'NSW Gwydir', 'NSW Murray Above'], dtype=object)

In [50]:
def map_north(dates):
    if ((dates.Month == 9) and (dates.Day > 20)) or (dates.Month == 10):
        return "Planting"
    elif (dates.Month == 11) or (dates.Month == 12):
        return "Growing"
    elif (dates.Month == 1):
        return "Flowering"
    elif (dates.Month == 2) or (dates.Month == 3):
        return "Maturation"
    elif (dates.Month == 4):
        return "Harvesting"
    else:
        return "NotSeason"

In [51]:
def map_south(dates):
    if ((dates.Month == 10) and (dates.Day >= 15)) or (dates.Month == 11):
        return "Planting"
    elif (dates.Month == 12):
        return "Growing"
    elif (dates.Month == 1):
        return "Flowering"
    elif (dates.Month == 2) or (dates.Month == 3):
        return "Maturation"
    elif (dates.Month == 4) or (dates.Month == 5):
        return "Harvesting"
    else:
        return "NotSeason"

In [52]:
def season_mapping(frame):
    north = ["QLD Condamine-Balonne", "QLD Moonie" ,"QLD Border Rivers", "NSW Border Rivers", "NSW Barwon-Darling", "NSW Lower Darling", "NSW Macquarie-Castlereagh", "NSW Namoi", "NSW Gwydir"]
    # south = ["NSW Murrumbidgee", "NSW Lachlan", "NSW Murray Above"]

    if frame.Region in north:
        return map_north(frame)
    else:
        return map_south(frame)

In [53]:
daily_frame_test = dailyframe.copy()

In [54]:
daily_frame_test["Season"] = dailyframe[["Region", "Month", "Day"]].apply(season_mapping, axis=1)

In [55]:
daily_frame_test["Season"].value_counts()

Season
NotSeason     3229605
Maturation    1367282
Growing       1109820
Planting      1005360
Harvesting     999465
Flowering      715170
Name: count, dtype: int64

In [None]:
daily_frame_test = daily_frame_test[daily_frame_test["Season"] != "NotSeason"].reset_index(drop= True)
daily_frame_test.info()

<class 'pandas.core.frame.DataFrame'>
Index: 5197097 entries, 0 to 8987706
Data columns (total 14 columns):
 #   Column         Dtype         
---  ------         -----         
 0   station        int64         
 1   daily_rain     float64       
 2   max_temp       float64       
 3   min_temp       float64       
 4   radiation      float64       
 5   rh_tmax        float64       
 6   rh_tmin        float64       
 7   et_short_crop  float64       
 8   Date           datetime64[ns]
 9   Year           int32         
 10  Month          int32         
 11  Region         object        
 12  Day            int32         
 13  Season         object        
dtypes: datetime64[ns](1), float64(7), int32(3), int64(1), object(2)
memory usage: 535.3+ MB


In [59]:
daily_frame_test.to_csv("combined_daily_frame_with_seasons_to_be_aggr.csv", index = False)

### Loading And Combining to aggregate

In [11]:
points_with_regions_full = pd.read_csv("site_geometry.csv", index_col=False)

In [12]:
daily_frame_test = pd.read_csv("combined_daily_frame_with_seasons_to_be_aggr.csv")
daily_frame_test

Unnamed: 0,station,daily_rain,max_temp,min_temp,radiation,rh_tmax,rh_tmin,et_short_crop,Date,Year,Month,Region,Day,Season
0,40030,0.0,30.8,15.2,34.6,37.1,95.6,7.2,2006-01-01,2006,1,QLD Condamine-Balonne,1,Flowering
1,40030,0.0,34.0,16.1,32.9,38.0,100.0,7.2,2006-01-02,2006,1,QLD Condamine-Balonne,2,Flowering
2,40030,0.0,36.7,18.1,30.4,31.9,94.9,7.5,2006-01-03,2006,1,QLD Condamine-Balonne,3,Flowering
3,40030,5.0,24.0,19.3,10.1,78.4,100.0,2.2,2006-01-04,2006,1,QLD Condamine-Balonne,4,Flowering
4,40030,16.0,27.7,17.3,14.0,58.4,100.0,3.4,2006-01-05,2006,1,QLD Condamine-Balonne,5,Flowering
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5197092,82052,0.0,34.2,10.8,23.9,26.8,100.0,6.3,2020-12-27,2020,12,NSW Murray Above,27,Growing
5197093,82052,0.0,31.5,17.5,29.1,29.9,69.0,6.9,2020-12-28,2020,12,NSW Murray Above,28,Growing
5197094,82052,0.0,29.9,10.1,30.8,27.3,93.1,6.5,2020-12-29,2020,12,NSW Murray Above,29,Growing
5197095,82052,0.0,31.5,12.8,28.1,30.3,94.8,6.4,2020-12-30,2020,12,NSW Murray Above,30,Growing


In [None]:
points_with_regions_full.rename(columns = {"Site" : "station"}, inplace = True)

In [15]:
daily_frame_test = daily_frame_test.merge(points_with_regions_full[["station", "vor_area"]], on = "station", how = "left")
daily_frame_test

Unnamed: 0,station,daily_rain,max_temp,min_temp,radiation,rh_tmax,rh_tmin,et_short_crop,Date,Year,Month,Region,Day,Season,vor_area
0,40030,0.0,30.8,15.2,34.6,37.1,95.6,7.2,2006-01-01,2006,1,QLD Condamine-Balonne,1,Flowering,4.228012e+07
1,40030,0.0,34.0,16.1,32.9,38.0,100.0,7.2,2006-01-02,2006,1,QLD Condamine-Balonne,2,Flowering,4.228012e+07
2,40030,0.0,36.7,18.1,30.4,31.9,94.9,7.5,2006-01-03,2006,1,QLD Condamine-Balonne,3,Flowering,4.228012e+07
3,40030,5.0,24.0,19.3,10.1,78.4,100.0,2.2,2006-01-04,2006,1,QLD Condamine-Balonne,4,Flowering,4.228012e+07
4,40030,16.0,27.7,17.3,14.0,58.4,100.0,3.4,2006-01-05,2006,1,QLD Condamine-Balonne,5,Flowering,4.228012e+07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5197092,82052,0.0,34.2,10.8,23.9,26.8,100.0,6.3,2020-12-27,2020,12,NSW Murray Above,27,Growing,7.068900e+07
5197093,82052,0.0,31.5,17.5,29.1,29.9,69.0,6.9,2020-12-28,2020,12,NSW Murray Above,28,Growing,7.068900e+07
5197094,82052,0.0,29.9,10.1,30.8,27.3,93.1,6.5,2020-12-29,2020,12,NSW Murray Above,29,Growing,7.068900e+07
5197095,82052,0.0,31.5,12.8,28.1,30.3,94.8,6.4,2020-12-30,2020,12,NSW Murray Above,30,Growing,7.068900e+07


Turning area into normalised weights

In [17]:
daily_frame_test["weights"] = daily_frame_test["vor_area"] / daily_frame_test["vor_area"].unique().sum()
daily_frame_test["weights"].unique()

array([9.98756237e-05, 5.52728906e-04, 1.00955627e-04, ...,
       2.53270827e-04, 2.17144675e-05, 1.66984114e-04])

### Aggregating per season

#### (Unweighted) Aggrigates Functions

In [25]:
def get_weighted_stats(frame, value_c, weight_c = None):
    values = frame[value_c]
    weights = frame[weight_c] if weight_c else None
    weighted_stats = DescrStatsW(values, weights=weights, ddof=0)
    quantiles = weighted_stats.quantile([0.1, 0.9])
    return pd.Series({
        'weighted_mean': weighted_stats.mean,
        'weighted_std': weighted_stats.std,
        'weighted_sum': weighted_stats.sum,
        'weighted_nobs': weighted_stats.nobs,
        'weighted_p10': quantiles.loc[0.1],
        'weighted_p90': quantiles.loc[0.9]
    })

#### RainFall Aggr

In [26]:
list_of_frames = []

In [27]:
rain_aggrs = daily_frame_test.groupby(["Region", "Year", "Season"]).apply(get_weighted_stats, "daily_rain").add_prefix("rain_").reset_index()
rain_aggrs

  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.g

Unnamed: 0,Region,Year,Season,rain_weighted_mean,rain_weighted_std,rain_weighted_sum,rain_weighted_nobs,rain_weighted_p10,rain_weighted_p90
0,NSW Barwon Darling,2006,Flowering,0.947149,3.553518,5050.2,5332.0,0.0,2.00
1,NSW Barwon Darling,2006,Growing,0.593642,2.477509,3165.3,5332.0,0.0,0.60
2,NSW Barwon Darling,2006,Harvesting,0.215650,1.965174,2262.6,10492.0,0.0,0.00
3,NSW Barwon Darling,2006,Maturation,0.928952,3.893367,9427.0,10148.0,0.0,1.80
4,NSW Barwon Darling,2006,Planting,0.385205,2.212524,3114.0,8084.0,0.0,0.10
...,...,...,...,...,...,...,...,...,...
895,QLD Moonie,2020,Flowering,2.212702,5.715057,1097.5,496.0,0.0,7.20
896,QLD Moonie,2020,Growing,0.948873,4.296768,926.1,976.0,0.0,2.10
897,QLD Moonie,2020,Harvesting,0.282917,1.146480,135.8,480.0,0.0,0.35
898,QLD Moonie,2020,Maturation,3.364792,7.029660,3230.2,960.0,0.0,11.50


In [28]:
rain_aggrs["rain_weighted_p10"].unique()

array([0.])

In [29]:
list_of_frames.append(rain_aggrs)

In [30]:
rainy_days = daily_frame_test[daily_frame_test["daily_rain"] > 0].groupby(["Region", "Year", "Season"]).apply(get_weighted_stats, "daily_rain", "weights").reset_index()[["Year", "Region", "Season", "weighted_nobs"]].rename(columns = {"weighted_nobs" : "num_rainy_days"})
rainy_days

  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.g

Unnamed: 0,Year,Region,Season,num_rainy_days
0,2006,NSW Barwon Darling,Flowering,1.137696
1,2006,NSW Barwon Darling,Growing,1.007839
2,2006,NSW Barwon Darling,Harvesting,0.585017
3,2006,NSW Barwon Darling,Maturation,1.900653
4,2006,NSW Barwon Darling,Planting,0.868655
...,...,...,...,...
895,2020,QLD Moonie,Flowering,0.145756
896,2020,QLD Moonie,Growing,0.261785
897,2020,QLD Moonie,Harvesting,0.069522
898,2020,QLD Moonie,Maturation,0.405143


In [31]:
list_of_frames.append(rainy_days)

In [32]:
for frame in list_of_frames:
    print(frame.columns)

Index(['Region', 'Year', 'Season', 'rain_weighted_mean', 'rain_weighted_std',
       'rain_weighted_sum', 'rain_weighted_nobs', 'rain_weighted_p10',
       'rain_weighted_p90'],
      dtype='object')
Index(['Year', 'Region', 'Season', 'num_rainy_days'], dtype='object')


#### Radiation Aggr

In [33]:
radiation_aggrs = daily_frame_test.groupby(["Region", "Year", "Season"]).apply(get_weighted_stats, "radiation").add_prefix("rad_").reset_index()
radiation_aggrs

  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.g

Unnamed: 0,Region,Year,Season,rad_weighted_mean,rad_weighted_std,rad_weighted_sum,rad_weighted_nobs,rad_weighted_p10,rad_weighted_p90
0,NSW Barwon Darling,2006,Flowering,27.034059,5.804725,144145.6,5332.0,19.0,32.50
1,NSW Barwon Darling,2006,Growing,25.498668,7.579642,135958.9,5332.0,13.9,32.80
2,NSW Barwon Darling,2006,Harvesting,16.255814,3.435585,170556.0,10492.0,12.7,21.20
3,NSW Barwon Darling,2006,Maturation,23.181898,5.875273,235249.9,10148.0,14.7,29.80
4,NSW Barwon Darling,2006,Planting,26.479095,6.481446,214057.0,8084.0,16.6,32.40
...,...,...,...,...,...,...,...,...,...
895,QLD Moonie,2020,Flowering,26.452016,5.710876,13120.2,496.0,16.0,30.80
896,QLD Moonie,2020,Growing,25.797029,5.822036,25177.9,976.0,17.0,30.40
897,QLD Moonie,2020,Harvesting,16.591458,3.312991,7963.9,480.0,11.5,19.95
898,QLD Moonie,2020,Maturation,20.844271,6.364620,20010.5,960.0,10.3,27.60


In [34]:
list_of_frames.append(radiation_aggrs)

In [35]:
for frame in list_of_frames:
    print(frame.columns)

Index(['Region', 'Year', 'Season', 'rain_weighted_mean', 'rain_weighted_std',
       'rain_weighted_sum', 'rain_weighted_nobs', 'rain_weighted_p10',
       'rain_weighted_p90'],
      dtype='object')
Index(['Year', 'Region', 'Season', 'num_rainy_days'], dtype='object')
Index(['Region', 'Year', 'Season', 'rad_weighted_mean', 'rad_weighted_std',
       'rad_weighted_sum', 'rad_weighted_nobs', 'rad_weighted_p10',
       'rad_weighted_p90'],
      dtype='object')


### ET Aggr

In [36]:
et_aggrs = daily_frame_test.groupby(["Region", "Year", "Season"]).apply(get_weighted_stats, "et_short_crop").add_prefix("et_").reset_index()
et_aggrs

  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.g

Unnamed: 0,Region,Year,Season,et_weighted_mean,et_weighted_std,et_weighted_sum,et_weighted_nobs,et_weighted_p10,et_weighted_p90
0,NSW Barwon Darling,2006,Flowering,7.709640,1.304110,41107.8,5332.0,5.80,9.00
1,NSW Barwon Darling,2006,Growing,7.010990,1.367786,37382.6,5332.0,5.20,8.40
2,NSW Barwon Darling,2006,Harvesting,3.314297,0.825336,34773.6,10492.0,2.30,4.50
3,NSW Barwon Darling,2006,Maturation,6.364249,1.531751,64584.4,10148.0,4.30,8.20
4,NSW Barwon Darling,2006,Planting,6.792578,1.386486,54911.2,8084.0,5.30,8.70
...,...,...,...,...,...,...,...,...,...
895,QLD Moonie,2020,Flowering,7.418750,1.461092,3679.7,496.0,5.40,8.80
896,QLD Moonie,2020,Growing,6.929201,1.401082,6762.9,976.0,5.20,8.50
897,QLD Moonie,2020,Harvesting,3.910625,0.543284,1877.1,480.0,3.35,4.45
898,QLD Moonie,2020,Maturation,5.085938,1.385295,4882.5,960.0,2.90,6.65


In [37]:
list_of_frames.append(et_aggrs)

In [38]:
for frame in list_of_frames:
    print(frame.columns)

Index(['Region', 'Year', 'Season', 'rain_weighted_mean', 'rain_weighted_std',
       'rain_weighted_sum', 'rain_weighted_nobs', 'rain_weighted_p10',
       'rain_weighted_p90'],
      dtype='object')
Index(['Year', 'Region', 'Season', 'num_rainy_days'], dtype='object')
Index(['Region', 'Year', 'Season', 'rad_weighted_mean', 'rad_weighted_std',
       'rad_weighted_sum', 'rad_weighted_nobs', 'rad_weighted_p10',
       'rad_weighted_p90'],
      dtype='object')
Index(['Region', 'Year', 'Season', 'et_weighted_mean', 'et_weighted_std',
       'et_weighted_sum', 'et_weighted_nobs', 'et_weighted_p10',
       'et_weighted_p90'],
      dtype='object')


In [39]:
daily_frame_test.columns

Index(['station', 'daily_rain', 'max_temp', 'min_temp', 'radiation', 'rh_tmax',
       'rh_tmin', 'et_short_crop', 'Date', 'Year', 'Month', 'Region', 'Day',
       'Season', 'vor_area', 'weights'],
      dtype='object')

### Temp Aggrs

In [40]:
maxt_aggrs = daily_frame_test.groupby(["Region", "Year", "Season"]).apply(get_weighted_stats, "max_temp").add_prefix("maxt_").reset_index()
maxt_aggrs

  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.g

Unnamed: 0,Region,Year,Season,maxt_weighted_mean,maxt_weighted_std,maxt_weighted_sum,maxt_weighted_nobs,maxt_weighted_p10,maxt_weighted_p90
0,NSW Barwon Darling,2006,Flowering,38.279745,2.881907,204107.6,5332.0,34.70,41.80
1,NSW Barwon Darling,2006,Growing,33.891092,4.193323,180707.3,5332.0,28.00,38.40
2,NSW Barwon Darling,2006,Harvesting,23.591708,4.075638,247524.2,10492.0,18.30,29.60
3,NSW Barwon Darling,2006,Maturation,34.817994,4.503472,353333.0,10148.0,29.10,40.70
4,NSW Barwon Darling,2006,Planting,32.623342,5.342851,263727.1,8084.0,25.60,39.90
...,...,...,...,...,...,...,...,...,...
895,QLD Moonie,2020,Flowering,37.533468,3.473118,18616.6,496.0,33.40,41.30
896,QLD Moonie,2020,Growing,34.981557,4.498409,34142.0,976.0,29.30,40.70
897,QLD Moonie,2020,Harvesting,28.231458,2.502072,13551.1,480.0,24.65,31.00
898,QLD Moonie,2020,Maturation,31.176875,3.593644,29929.8,960.0,26.50,36.05


In [41]:
list_of_frames.append(maxt_aggrs)

In [42]:
mint_aggrs = daily_frame_test.groupby(["Region", "Year", "Season"]).apply(get_weighted_stats, "min_temp").add_prefix("mint_").reset_index()
mint_aggrs

  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.g

Unnamed: 0,Region,Year,Season,mint_weighted_mean,mint_weighted_std,mint_weighted_sum,mint_weighted_nobs,mint_weighted_p10,mint_weighted_p90
0,NSW Barwon Darling,2006,Flowering,24.316317,2.493527,129654.6,5332.0,21.20,27.40
1,NSW Barwon Darling,2006,Growing,19.527794,3.880290,104122.2,5332.0,13.50,24.20
2,NSW Barwon Darling,2006,Harvesting,7.864401,4.985365,82513.3,10492.0,1.30,15.50
3,NSW Barwon Darling,2006,Maturation,20.565885,3.806556,208702.6,10148.0,15.90,25.30
4,NSW Barwon Darling,2006,Planting,16.590042,4.885436,134113.9,8084.0,9.60,22.80
...,...,...,...,...,...,...,...,...,...
895,QLD Moonie,2020,Flowering,24.178831,2.611369,11992.7,496.0,20.00,27.70
896,QLD Moonie,2020,Growing,19.920389,3.847874,19442.3,976.0,14.70,24.60
897,QLD Moonie,2020,Harvesting,13.626667,3.828114,6540.8,480.0,9.05,19.45
898,QLD Moonie,2020,Maturation,19.373646,3.031997,18598.7,960.0,15.20,22.70


In [43]:
list_of_frames.append(mint_aggrs)

In [44]:
for frame in list_of_frames:
    print(frame.columns)

Index(['Region', 'Year', 'Season', 'rain_weighted_mean', 'rain_weighted_std',
       'rain_weighted_sum', 'rain_weighted_nobs', 'rain_weighted_p10',
       'rain_weighted_p90'],
      dtype='object')
Index(['Year', 'Region', 'Season', 'num_rainy_days'], dtype='object')
Index(['Region', 'Year', 'Season', 'rad_weighted_mean', 'rad_weighted_std',
       'rad_weighted_sum', 'rad_weighted_nobs', 'rad_weighted_p10',
       'rad_weighted_p90'],
      dtype='object')
Index(['Region', 'Year', 'Season', 'et_weighted_mean', 'et_weighted_std',
       'et_weighted_sum', 'et_weighted_nobs', 'et_weighted_p10',
       'et_weighted_p90'],
      dtype='object')
Index(['Region', 'Year', 'Season', 'maxt_weighted_mean', 'maxt_weighted_std',
       'maxt_weighted_sum', 'maxt_weighted_nobs', 'maxt_weighted_p10',
       'maxt_weighted_p90'],
      dtype='object')
Index(['Region', 'Year', 'Season', 'mint_weighted_mean', 'mint_weighted_std',
       'mint_weighted_sum', 'mint_weighted_nobs', 'mint_weighted_p10',

Hot Days

In [47]:
hot_days = daily_frame_test[daily_frame_test["max_temp"] > 35].groupby(["Region", "Year", "Season"]).apply(get_weighted_stats, "max_temp").reset_index()[["Year", "Region", "Season", "weighted_nobs"]].rename(columns = {"weighted_nobs" : "num_hot_days"})
hot_days

  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.g

Unnamed: 0,Year,Region,Season,num_hot_days
0,2006,NSW Barwon Darling,Flowering,4688.0
1,2006,NSW Barwon Darling,Growing,2453.0
2,2006,NSW Barwon Darling,Maturation,4929.0
3,2006,NSW Barwon Darling,Planting,2701.0
4,2007,NSW Barwon Darling,Flowering,3210.0
...,...,...,...,...
725,2019,QLD Moonie,Planting,112.0
726,2020,QLD Moonie,Flowering,406.0
727,2020,QLD Moonie,Growing,470.0
728,2020,QLD Moonie,Maturation,119.0


In [48]:
list_of_frames.append(hot_days)

In [49]:
for frame in list_of_frames:
    print(frame.columns)

Index(['Region', 'Year', 'Season', 'rain_weighted_mean', 'rain_weighted_std',
       'rain_weighted_sum', 'rain_weighted_nobs', 'rain_weighted_p10',
       'rain_weighted_p90'],
      dtype='object')
Index(['Year', 'Region', 'Season', 'num_rainy_days'], dtype='object')
Index(['Region', 'Year', 'Season', 'rad_weighted_mean', 'rad_weighted_std',
       'rad_weighted_sum', 'rad_weighted_nobs', 'rad_weighted_p10',
       'rad_weighted_p90'],
      dtype='object')
Index(['Region', 'Year', 'Season', 'et_weighted_mean', 'et_weighted_std',
       'et_weighted_sum', 'et_weighted_nobs', 'et_weighted_p10',
       'et_weighted_p90'],
      dtype='object')
Index(['Region', 'Year', 'Season', 'maxt_weighted_mean', 'maxt_weighted_std',
       'maxt_weighted_sum', 'maxt_weighted_nobs', 'maxt_weighted_p10',
       'maxt_weighted_p90'],
      dtype='object')
Index(['Region', 'Year', 'Season', 'mint_weighted_mean', 'mint_weighted_std',
       'mint_weighted_sum', 'mint_weighted_nobs', 'mint_weighted_p10',

Cold Days

In [51]:
frost_days = daily_frame_test[daily_frame_test["min_temp"] < 0].groupby(["Region", "Year", "Season"]).apply(get_weighted_stats, "min_temp").reset_index()[["Year", "Region", "Season", "weighted_nobs"]].rename(columns = {"weighted_nobs" : "num_frost_days"})
frost_days

  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.g

Unnamed: 0,Year,Region,Season,num_frost_days
0,2006,NSW Barwon Darling,Harvesting,404.0
1,2008,NSW Barwon Darling,Harvesting,16.0
2,2010,NSW Barwon Darling,Harvesting,9.0
3,2011,NSW Barwon Darling,Harvesting,156.0
4,2012,NSW Barwon Darling,Harvesting,16.0
...,...,...,...,...
286,2008,QLD Condamine-Balonne,Harvesting,96.0
287,2009,QLD Condamine-Balonne,Planting,1.0
288,2011,QLD Condamine-Balonne,Planting,1.0
289,2015,QLD Condamine-Balonne,Planting,1.0


In [52]:
list_of_frames.append(frost_days)

In [53]:
for frame in list_of_frames:
    print(frame.columns)

Index(['Region', 'Year', 'Season', 'rain_weighted_mean', 'rain_weighted_std',
       'rain_weighted_sum', 'rain_weighted_nobs', 'rain_weighted_p10',
       'rain_weighted_p90'],
      dtype='object')
Index(['Year', 'Region', 'Season', 'num_rainy_days'], dtype='object')
Index(['Region', 'Year', 'Season', 'rad_weighted_mean', 'rad_weighted_std',
       'rad_weighted_sum', 'rad_weighted_nobs', 'rad_weighted_p10',
       'rad_weighted_p90'],
      dtype='object')
Index(['Region', 'Year', 'Season', 'et_weighted_mean', 'et_weighted_std',
       'et_weighted_sum', 'et_weighted_nobs', 'et_weighted_p10',
       'et_weighted_p90'],
      dtype='object')
Index(['Region', 'Year', 'Season', 'maxt_weighted_mean', 'maxt_weighted_std',
       'maxt_weighted_sum', 'maxt_weighted_nobs', 'maxt_weighted_p10',
       'maxt_weighted_p90'],
      dtype='object')
Index(['Region', 'Year', 'Season', 'mint_weighted_mean', 'mint_weighted_std',
       'mint_weighted_sum', 'mint_weighted_nobs', 'mint_weighted_p10',

GDD

In [54]:
base_temp = 15.6
max_temp_cap = 32

In [55]:
daily_frame_test["Tmax_capped"] = daily_frame_test["max_temp"].clip(upper=max_temp_cap)
daily_frame_test["GDD"] = ((daily_frame_test["Tmax_capped"] + daily_frame_test["min_temp"]) / 2 - base_temp).clip(lower=0)

In [56]:
gdd_num = daily_frame_test.groupby(["Region", "Year", "Season"]).apply(get_weighted_stats, "GDD").reset_index()[["Region", "Year", "Season", "weighted_sum"]].rename(columns = {"weighted_sum" : "gdd"})
gdd_num

  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.g

Unnamed: 0,Region,Year,Season,gdd
0,NSW Barwon Darling,2006,Flowering,66847.15
1,NSW Barwon Darling,2006,Growing,51556.30
2,NSW Barwon Darling,2006,Harvesting,17874.50
3,NSW Barwon Darling,2006,Maturation,104663.05
4,NSW Barwon Darling,2006,Planting,62968.80
...,...,...,...,...
895,QLD Moonie,2020,Flowering,6133.10
896,QLD Moonie,2020,Growing,9789.10
897,QLD Moonie,2020,Harvesting,2555.60
898,QLD Moonie,2020,Maturation,8801.90


In [57]:
list_of_frames.append(gdd_num)

In [58]:
for frame in list_of_frames:
    print(frame.columns)

Index(['Region', 'Year', 'Season', 'rain_weighted_mean', 'rain_weighted_std',
       'rain_weighted_sum', 'rain_weighted_nobs', 'rain_weighted_p10',
       'rain_weighted_p90'],
      dtype='object')
Index(['Year', 'Region', 'Season', 'num_rainy_days'], dtype='object')
Index(['Region', 'Year', 'Season', 'rad_weighted_mean', 'rad_weighted_std',
       'rad_weighted_sum', 'rad_weighted_nobs', 'rad_weighted_p10',
       'rad_weighted_p90'],
      dtype='object')
Index(['Region', 'Year', 'Season', 'et_weighted_mean', 'et_weighted_std',
       'et_weighted_sum', 'et_weighted_nobs', 'et_weighted_p10',
       'et_weighted_p90'],
      dtype='object')
Index(['Region', 'Year', 'Season', 'maxt_weighted_mean', 'maxt_weighted_std',
       'maxt_weighted_sum', 'maxt_weighted_nobs', 'maxt_weighted_p10',
       'maxt_weighted_p90'],
      dtype='object')
Index(['Region', 'Year', 'Season', 'mint_weighted_mean', 'mint_weighted_std',
       'mint_weighted_sum', 'mint_weighted_nobs', 'mint_weighted_p10',

#### Humidity Aggrs

In [59]:
daily_frame_test.columns

Index(['station', 'daily_rain', 'max_temp', 'min_temp', 'radiation', 'rh_tmax',
       'rh_tmin', 'et_short_crop', 'Date', 'Year', 'Month', 'Region', 'Day',
       'Season', 'vor_area', 'weights', 'Tmax_capped', 'GDD'],
      dtype='object')

In [60]:
rh_tmax_aggrs = daily_frame_test.groupby(["Region", "Year", "Season"]).apply(get_weighted_stats, "rh_tmax").add_prefix("rh_tmax_").reset_index()
rh_tmax_aggrs

  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.g

Unnamed: 0,Region,Year,Season,rh_tmax_weighted_mean,rh_tmax_weighted_std,rh_tmax_weighted_sum,rh_tmax_weighted_nobs,rh_tmax_weighted_p10,rh_tmax_weighted_p90
0,NSW Barwon Darling,2006,Flowering,27.289629,10.010731,145508.3,5332.0,14.70,40.60
1,NSW Barwon Darling,2006,Growing,23.991748,10.789749,127924.0,5332.0,13.20,31.70
2,NSW Barwon Darling,2006,Harvesting,31.990498,8.482967,335644.3,10492.0,22.60,42.30
3,NSW Barwon Darling,2006,Maturation,28.303262,12.016881,287221.5,10148.0,14.00,44.60
4,NSW Barwon Darling,2006,Planting,21.549926,10.871944,174209.6,8084.0,9.90,34.50
...,...,...,...,...,...,...,...,...,...
895,QLD Moonie,2020,Flowering,31.436694,10.896438,15592.6,496.0,20.20,48.50
896,QLD Moonie,2020,Growing,29.099795,10.374092,28401.4,976.0,19.10,42.40
897,QLD Moonie,2020,Harvesting,37.334583,10.494644,17920.6,480.0,27.60,50.80
898,QLD Moonie,2020,Maturation,44.009896,12.204987,42249.5,960.0,29.15,63.55


In [61]:
list_of_frames.append(rh_tmax_aggrs)

In [62]:
for frame in list_of_frames:
    print(frame.columns)

Index(['Region', 'Year', 'Season', 'rain_weighted_mean', 'rain_weighted_std',
       'rain_weighted_sum', 'rain_weighted_nobs', 'rain_weighted_p10',
       'rain_weighted_p90'],
      dtype='object')
Index(['Year', 'Region', 'Season', 'num_rainy_days'], dtype='object')
Index(['Region', 'Year', 'Season', 'rad_weighted_mean', 'rad_weighted_std',
       'rad_weighted_sum', 'rad_weighted_nobs', 'rad_weighted_p10',
       'rad_weighted_p90'],
      dtype='object')
Index(['Region', 'Year', 'Season', 'et_weighted_mean', 'et_weighted_std',
       'et_weighted_sum', 'et_weighted_nobs', 'et_weighted_p10',
       'et_weighted_p90'],
      dtype='object')
Index(['Region', 'Year', 'Season', 'maxt_weighted_mean', 'maxt_weighted_std',
       'maxt_weighted_sum', 'maxt_weighted_nobs', 'maxt_weighted_p10',
       'maxt_weighted_p90'],
      dtype='object')
Index(['Region', 'Year', 'Season', 'mint_weighted_mean', 'mint_weighted_std',
       'mint_weighted_sum', 'mint_weighted_nobs', 'mint_weighted_p10',

In [63]:
rh_tmin_aggrs = daily_frame_test.groupby(["Region", "Year", "Season"]).apply(get_weighted_stats, "rh_tmin").add_prefix("rh_tmin_").reset_index()
rh_tmin_aggrs

  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.groupby("vec").agg(np.sum)
  dfg = df.g

Unnamed: 0,Region,Year,Season,rh_tmin_weighted_mean,rh_tmin_weighted_std,rh_tmin_weighted_sum,rh_tmin_weighted_nobs,rh_tmin_weighted_p10,rh_tmin_weighted_p90
0,NSW Barwon Darling,2006,Flowering,59.398125,18.811649,316710.8,5332.0,34.70,84.5
1,NSW Barwon Darling,2006,Growing,54.034415,16.135505,288111.5,5332.0,33.50,75.6
2,NSW Barwon Darling,2006,Harvesting,84.172093,15.605183,883133.6,10492.0,60.80,100.0
3,NSW Barwon Darling,2006,Maturation,62.687347,19.884141,636151.2,10148.0,36.20,91.6
4,NSW Barwon Darling,2006,Planting,54.265933,21.029856,438685.8,8084.0,28.60,86.9
...,...,...,...,...,...,...,...,...,...
895,QLD Moonie,2020,Flowering,65.716331,16.082475,32595.3,496.0,47.20,90.8
896,QLD Moonie,2020,Growing,67.833402,14.289465,66205.4,976.0,49.00,87.3
897,QLD Moonie,2020,Harvesting,88.301667,11.127370,42384.8,480.0,71.70,100.0
898,QLD Moonie,2020,Maturation,86.096042,11.882465,82652.2,960.0,71.45,100.0


In [64]:
list_of_frames.append(rh_tmin_aggrs)

In [65]:
for frame in list_of_frames:
    print(frame.columns)

Index(['Region', 'Year', 'Season', 'rain_weighted_mean', 'rain_weighted_std',
       'rain_weighted_sum', 'rain_weighted_nobs', 'rain_weighted_p10',
       'rain_weighted_p90'],
      dtype='object')
Index(['Year', 'Region', 'Season', 'num_rainy_days'], dtype='object')
Index(['Region', 'Year', 'Season', 'rad_weighted_mean', 'rad_weighted_std',
       'rad_weighted_sum', 'rad_weighted_nobs', 'rad_weighted_p10',
       'rad_weighted_p90'],
      dtype='object')
Index(['Region', 'Year', 'Season', 'et_weighted_mean', 'et_weighted_std',
       'et_weighted_sum', 'et_weighted_nobs', 'et_weighted_p10',
       'et_weighted_p90'],
      dtype='object')
Index(['Region', 'Year', 'Season', 'maxt_weighted_mean', 'maxt_weighted_std',
       'maxt_weighted_sum', 'maxt_weighted_nobs', 'maxt_weighted_p10',
       'maxt_weighted_p90'],
      dtype='object')
Index(['Region', 'Year', 'Season', 'mint_weighted_mean', 'mint_weighted_std',
       'mint_weighted_sum', 'mint_weighted_nobs', 'mint_weighted_p10',

### Combined

In [66]:
full_aggregate = list_of_frames[0].copy()
full_aggregate

Unnamed: 0,Region,Year,Season,rain_weighted_mean,rain_weighted_std,rain_weighted_sum,rain_weighted_nobs,rain_weighted_p10,rain_weighted_p90
0,NSW Barwon Darling,2006,Flowering,0.947149,3.553518,5050.2,5332.0,0.0,2.00
1,NSW Barwon Darling,2006,Growing,0.593642,2.477509,3165.3,5332.0,0.0,0.60
2,NSW Barwon Darling,2006,Harvesting,0.215650,1.965174,2262.6,10492.0,0.0,0.00
3,NSW Barwon Darling,2006,Maturation,0.928952,3.893367,9427.0,10148.0,0.0,1.80
4,NSW Barwon Darling,2006,Planting,0.385205,2.212524,3114.0,8084.0,0.0,0.10
...,...,...,...,...,...,...,...,...,...
895,QLD Moonie,2020,Flowering,2.212702,5.715057,1097.5,496.0,0.0,7.20
896,QLD Moonie,2020,Growing,0.948873,4.296768,926.1,976.0,0.0,2.10
897,QLD Moonie,2020,Harvesting,0.282917,1.146480,135.8,480.0,0.0,0.35
898,QLD Moonie,2020,Maturation,3.364792,7.029660,3230.2,960.0,0.0,11.50


In [67]:
for df in list_of_frames[1:]:
    full_aggregate = full_aggregate.merge(df, on = ["Region", "Year", "Season"], how = "left")

In [68]:
full_aggregate.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 900 entries, 0 to 899
Data columns (total 49 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Region                 900 non-null    object 
 1   Year                   900 non-null    int64  
 2   Season                 900 non-null    object 
 3   rain_weighted_mean     900 non-null    float64
 4   rain_weighted_std      900 non-null    float64
 5   rain_weighted_sum      900 non-null    float64
 6   rain_weighted_nobs     900 non-null    float64
 7   rain_weighted_p10      900 non-null    float64
 8   rain_weighted_p90      900 non-null    float64
 9   num_rainy_days         900 non-null    float64
 10  rad_weighted_mean      900 non-null    float64
 11  rad_weighted_std       900 non-null    float64
 12  rad_weighted_sum       900 non-null    float64
 13  rad_weighted_nobs      900 non-null    float64
 14  rad_weighted_p10       900 non-null    float64
 15  rad_we

In [69]:
full_aggregate[full_aggregate.isna().any(axis = 1)]

Unnamed: 0,Region,Year,Season,rain_weighted_mean,rain_weighted_std,rain_weighted_sum,rain_weighted_nobs,rain_weighted_p10,rain_weighted_p90,num_rainy_days,...,rh_tmax_weighted_sum,rh_tmax_weighted_nobs,rh_tmax_weighted_p10,rh_tmax_weighted_p90,rh_tmin_weighted_mean,rh_tmin_weighted_std,rh_tmin_weighted_sum,rh_tmin_weighted_nobs,rh_tmin_weighted_p10,rh_tmin_weighted_p90
0,NSW Barwon Darling,2006,Flowering,0.947149,3.553518,5050.2,5332.0,0.0,2.00,1.137696,...,145508.3,5332.0,14.70,40.60,59.398125,18.811649,316710.8,5332.0,34.70,84.5
1,NSW Barwon Darling,2006,Growing,0.593642,2.477509,3165.3,5332.0,0.0,0.60,1.007839,...,127924.0,5332.0,13.20,31.70,54.034415,16.135505,288111.5,5332.0,33.50,75.6
2,NSW Barwon Darling,2006,Harvesting,0.215650,1.965174,2262.6,10492.0,0.0,0.00,0.585017,...,335644.3,10492.0,22.60,42.30,84.172093,15.605183,883133.6,10492.0,60.80,100.0
3,NSW Barwon Darling,2006,Maturation,0.928952,3.893367,9427.0,10148.0,0.0,1.80,1.900653,...,287221.5,10148.0,14.00,44.60,62.687347,19.884141,636151.2,10148.0,36.20,91.6
4,NSW Barwon Darling,2006,Planting,0.385205,2.212524,3114.0,8084.0,0.0,0.10,0.868655,...,174209.6,8084.0,9.90,34.50,54.265933,21.029856,438685.8,8084.0,28.60,86.9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
895,QLD Moonie,2020,Flowering,2.212702,5.715057,1097.5,496.0,0.0,7.20,0.145756,...,15592.6,496.0,20.20,48.50,65.716331,16.082475,32595.3,496.0,47.20,90.8
896,QLD Moonie,2020,Growing,0.948873,4.296768,926.1,976.0,0.0,2.10,0.261785,...,28401.4,976.0,19.10,42.40,67.833402,14.289465,66205.4,976.0,49.00,87.3
897,QLD Moonie,2020,Harvesting,0.282917,1.146480,135.8,480.0,0.0,0.35,0.069522,...,17920.6,480.0,27.60,50.80,88.301667,11.127370,42384.8,480.0,71.70,100.0
898,QLD Moonie,2020,Maturation,3.364792,7.029660,3230.2,960.0,0.0,11.50,0.405143,...,42249.5,960.0,29.15,63.55,86.096042,11.882465,82652.2,960.0,71.45,100.0


In [70]:
full_aggregate.columns[full_aggregate.isna().any()].tolist()

['num_hot_days', 'num_frost_days']

In [71]:
full_aggregate.fillna(0.0, inplace=True)

In [72]:
full_aggregate[full_aggregate.isna().any(axis = 1)]

Unnamed: 0,Region,Year,Season,rain_weighted_mean,rain_weighted_std,rain_weighted_sum,rain_weighted_nobs,rain_weighted_p10,rain_weighted_p90,num_rainy_days,...,rh_tmax_weighted_sum,rh_tmax_weighted_nobs,rh_tmax_weighted_p10,rh_tmax_weighted_p90,rh_tmin_weighted_mean,rh_tmin_weighted_std,rh_tmin_weighted_sum,rh_tmin_weighted_nobs,rh_tmin_weighted_p10,rh_tmin_weighted_p90


In [73]:
full_aggregate.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 900 entries, 0 to 899
Data columns (total 49 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Region                 900 non-null    object 
 1   Year                   900 non-null    int64  
 2   Season                 900 non-null    object 
 3   rain_weighted_mean     900 non-null    float64
 4   rain_weighted_std      900 non-null    float64
 5   rain_weighted_sum      900 non-null    float64
 6   rain_weighted_nobs     900 non-null    float64
 7   rain_weighted_p10      900 non-null    float64
 8   rain_weighted_p90      900 non-null    float64
 9   num_rainy_days         900 non-null    float64
 10  rad_weighted_mean      900 non-null    float64
 11  rad_weighted_std       900 non-null    float64
 12  rad_weighted_sum       900 non-null    float64
 13  rad_weighted_nobs      900 non-null    float64
 14  rad_weighted_p10       900 non-null    float64
 15  rad_we

In [74]:
full_aggregate.to_csv("seasonal_aggregates_unweigted.csv", index = False)

In [67]:
check = pd.read_csv("seasonal_aggregates_unweigted.csv")
check

Unnamed: 0,Region,Year,Season,rain_weighted_mean,rain_weighted_std,rain_weighted_sum,rain_weighted_nobs,rain_weighted_p10,rain_weighted_p90,num_rainy_days,...,rh_tmax_weighted_sum,rh_tmax_weighted_nobs,rh_tmax_weighted_p10,rh_tmax_weighted_p90,rh_tmin_weighted_mean,rh_tmin_weighted_std,rh_tmin_weighted_sum,rh_tmin_weighted_nobs,rh_tmin_weighted_p10,rh_tmin_weighted_p90
0,NSW Barwon Darling,2006,Flowering,0.947149,3.553518,5050.2,5332.0,0.0,2.00,1.137696,...,145508.3,5332.0,14.70,40.60,59.398125,18.811649,316710.8,5332.0,34.70,84.5
1,NSW Barwon Darling,2006,Growing,0.593642,2.477509,3165.3,5332.0,0.0,0.60,1.007839,...,127924.0,5332.0,13.20,31.70,54.034415,16.135505,288111.5,5332.0,33.50,75.6
2,NSW Barwon Darling,2006,Harvesting,0.215650,1.965174,2262.6,10492.0,0.0,0.00,0.585017,...,335644.3,10492.0,22.60,42.30,84.172093,15.605183,883133.6,10492.0,60.80,100.0
3,NSW Barwon Darling,2006,Maturation,0.928952,3.893367,9427.0,10148.0,0.0,1.80,1.900653,...,287221.5,10148.0,14.00,44.60,62.687347,19.884141,636151.2,10148.0,36.20,91.6
4,NSW Barwon Darling,2006,Planting,0.385205,2.212524,3114.0,8084.0,0.0,0.10,0.868655,...,174209.6,8084.0,9.90,34.50,54.265933,21.029856,438685.8,8084.0,28.60,86.9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
895,QLD Moonie,2020,Flowering,2.212702,5.715057,1097.5,496.0,0.0,7.20,0.145756,...,15592.6,496.0,20.20,48.50,65.716331,16.082475,32595.3,496.0,47.20,90.8
896,QLD Moonie,2020,Growing,0.948873,4.296768,926.1,976.0,0.0,2.10,0.261785,...,28401.4,976.0,19.10,42.40,67.833402,14.289465,66205.4,976.0,49.00,87.3
897,QLD Moonie,2020,Harvesting,0.282917,1.146480,135.8,480.0,0.0,0.35,0.069522,...,17920.6,480.0,27.60,50.80,88.301667,11.127370,42384.8,480.0,71.70,100.0
898,QLD Moonie,2020,Maturation,3.364792,7.029660,3230.2,960.0,0.0,11.50,0.405143,...,42249.5,960.0,29.15,63.55,86.096042,11.882465,82652.2,960.0,71.45,100.0


In [76]:
check["Season"].unique()

array(['Flowering', 'Growing', 'Harvesting', 'Maturation', 'Planting'],
      dtype=object)