In [1]:
import hydrant.gis.gis as gis
import pandas as pd
import numpy as np
from typing import Dict, Union
from itertools import product


## Illustrative examples

In [2]:
# Sample DataFrames
data1 = {
    'ID1': [1,   2,  3],
    'A_1': [0.3, 0,  0],
    'A_2': [0,   0,  0],
    'A_3': [0.5, 0,  0],
    'A_4': [0,   1,  0],
    'A_5': [0.2, 0,  1],
    'B':   ['B','B','B'],
}

data2 = {
    'ID2': [1, 2, 3  ],
    'B1':  [0, 1, 0.5],
    'B2':  [1, 0, 0.5]
}

data3 = {
    'ID3': [1, 2,   3],
    'C1':  [0, 1, 0.3],
    'C2':  [1, 0, 0.7],
    'D':   [1, 1,   1]
}

# Create DataFrames
df1 = pd.DataFrame(data1)
df2 = pd.DataFrame(data2)
df3 = pd.DataFrame(data3)

# Usage example:
result, report    = gis.intersect_df(df1, df2, df3, 
                                     df_mappings={'df1': {'id': 'ID1', 'prefix':'A' , 'data_name':'LULC'}, 
                                                  'df2': {'id': 'ID2', 'prefix':'B' , 'data_name':'S'}, 
                                                  'df3': {'id': 'ID3', 'prefix':'C' , 'data_name':'ASP'}},
                                     remove_zero_combinations = True)

print(result)
print(report)

The indexes of all DataFrames are exactly the same with the same order.
total number of non zero combinations:  7
     comb_0001  comb_0002  comb_0003  comb_0004  comb_0005  comb_0006  \
ID1                                                                     
1          0.3        0.5        0.0       0.00       0.00       0.00   
2          0.0        0.0        1.0       0.00       0.00       0.00   
3          0.0        0.0        0.0       0.15       0.35       0.15   

     comb_0007  
ID1             
1         0.20  
2         0.00  
3         0.35  
     Combinations LULC  S ASP  comb
0  LULC_1 S2 ASP2    1  2   2     1
1  LULC_3 S2 ASP2    3  2   2     2
2  LULC_4 S1 ASP1    4  1   1     3
3  LULC_5 S1 ASP1    5  1   1     4
4  LULC_5 S1 ASP2    5  1   2     5
5  LULC_5 S2 ASP1    5  2   1     6
6  LULC_5 S2 ASP2    5  2   2     7


In [3]:
# Your DataFrame
data = {
    'ID': [10,20,30],
    'Frac_1': [0.0, 0.2, 0.0],
    'Frac_2': [0.5, 0.5, 0.0],
    'Frac_3': [0.6, 0.3, 0.0],
    'Frac_4': [0.0, 0.0, 0.0],
}

df = pd.DataFrame(data)

df_modified = gis.manipulating_fractions(df,
                                         df_mapping={'id':'ID', 'prefix': 'Frac_'},
                                         action = 'normalize',
                                         minimum_value = 0.20)

print(df)
print(df_modified)

    Frac_1  Frac_2  Frac_3  Frac_4
ID                                
10     0.0     0.5     0.6     0.0
20     0.2     0.5     0.3     0.0
30     0.0     0.0     0.0     0.0
    Frac_1    Frac_2    Frac_3  Frac_4
ID                                    
10     0.0  0.454545  0.545455     0.0
20     0.2  0.500000  0.300000     0.0
30     0.0  1.000000  0.000000     0.0


In [4]:
# Your DataFrame
data = {
    'ID': [10,20,30],
    'Frac_1': [0.0, 0.2, 0.0],
    'Frac_2': [0.5, 0.5, 0.0],
    'Frac_3': [0.6, 0.3, 0.0],
    'Frac_4': [0.0, 0.0, 0.0],
}

df = pd.DataFrame(data)

df_modified = gis.manipulating_fractions(df,
                                         df_mapping={'id':'ID', 'prefix': 'Frac_'},
                                         action = 'majority')

print(df)
print(df_modified)

    Frac_1  Frac_2  Frac_3  Frac_4
ID                                
10     0.0     0.5     0.6     0.0
20     0.2     0.5     0.3     0.0
30     0.0     0.0     0.0     0.0
    Frac_1  Frac_2  Frac_3  Frac_4
ID                                
10     0.0     0.0     1.0     0.0
20     0.0     1.0     0.0     0.0
30     0.0     1.0     0.0     0.0


## Real case

In [5]:
# read the files for land cover and soil and do simple manupulation
land_cover = pd.read_csv('./data/gis/West_stats_NA_NALCMS_landcover_2020_30m.csv')
soil_type = pd.read_csv('./data/gis/West_stats_soil_classes.csv')

# drop the 0 soil type which is mostly to represent water
soil_type = soil_type.drop(columns = 'frac_0')
# get the majority of the soil type for each subbasin
soil_type   = gis.manipulating_fractions(soil_type,
                                         df_mapping={'prefix': 'frac_'},
                                         action = 'majority')

# smooth land cover and remove below 5% land cover and renormalize
land_cover = gis.manipulating_fractions(land_cover,
                                         df_mapping={'prefix': 'frac_'},
                                         action = 'normalize',
                                         minimum_value = 0.05)

# call the function
result,report         = gis.intersect_df(soil_type, land_cover,
                                     df_mappings={'df1': {'id': 'COMID', 'prefix':'frac_' , 'data_name':'soil'}, 
                                                  'df2': {'id': 'COMID', 'prefix':'frac_' , 'data_name':'LULC'}},
                                     remove_zero_combinations = True)

print(result) 
print(report)

# saving the result and report
result.to_csv('data/gis/HRU_frac.csv')
report.to_csv('data/gis/HRU.csv')

The indexes of all DataFrames are exactly the same with the same order.
total number of non zero combinations:  15
          comb_0001  comb_0002  comb_0003  comb_0004  comb_0005  comb_0006  \
COMID                                                                        
71022153   0.000000        0.0        0.0   0.072815   0.000000   0.000000   
71022160   0.000000        0.0        0.0   0.000000   0.000000   0.000000   
71022164   0.000000        0.0        0.0   0.000000   0.000000   0.000000   
71022165   0.108643        0.0        0.0   0.000000   0.000000   0.068153   
71022168   0.000000        0.0        0.0   0.000000   0.000000   0.000000   
...             ...        ...        ...        ...        ...        ...   
71039048   0.314622        0.0        0.0   0.000000   0.173279   0.000000   
71039109   0.264265        0.0        0.0   0.084640   0.160080   0.000000   
71039121   0.318708        0.0        0.0   0.087057   0.105298   0.000000   
71039250   0.507148        