In [1]:
# Dataset
from sklearn.datasets import load_diabetes

# Basics
import pandas as pd
import numpy as np

# Plotting
import matplotlib.pyplot as plt

# Model
from sklearn.ensemble import RandomForestRegressor

# Helpful:
from sklearn.model_selection import train_test_split

import sys
sys.path.append("/home/dchen/Random_Forest_Weights/")
# Now, you can import your modules
from src_rf.methods.calc_mean import *
from src_rf.methods.calc_weights import *
from src_rf.methods.calc_dist import *

### 1. Loading Data & Train_test_split

In [2]:
diabetes = load_diabetes()
df_diabetes = pd.DataFrame(
    data=np.c_[diabetes["data"], diabetes["target"]],
    columns=diabetes["feature_names"] + ["target"],
)

In [3]:
X = df_diabetes.iloc[:, :-1].to_numpy(dtype="float32")
y = df_diabetes.iloc[:, -1:].to_numpy().flatten()

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

### 2. Random Forest

In [5]:
bootstrap = True
max_sample = 0.7

In [6]:
rf = RandomForestRegressor(bootstrap=bootstrap, max_samples=max_sample, random_state=42)

In [7]:
rf_no = RandomForestRegressor(random_state=42, bootstrap= False)

In [8]:
rf.fit(X_train, y_train)
rf_no.fit(X_train, y_train)

RandomForestRegressor(bootstrap=False, random_state=42)

### 3. Calculate Weights Random Forest

In [9]:
rf_weights = calc_weights_rf(rf, X_train, X_test, bootstrap, max_sample)
rf_no_weights = calc_weights_rf(rf_no, X_train, X_test, False, None)

100it [00:00, 309.41it/s]
100it [00:00, 251.75it/s]


### 4. Calc Mean with weights and normal:

In [10]:
rf_mean_weights = calc_mean_rf(rf_weights, y_train)
rf_mean_normal = rf.predict(X_test)

rf_no_mean_weights = calc_mean_rf(rf_no_weights, y_train)
rf_no_mean_normal = rf_no.predict(X_test)

In [11]:
# Are the two the same?
sum(np.round(rf_mean_weights, 10) == np.round(rf_mean_normal, 10))

89

In [12]:
# Are the two the same?
sum(np.round(rf_no_mean_weights, 10) == np.round(rf_no_mean_normal, 10))

89

In [13]:
rf_no_mean_normal

array([230.35,  73.23, 288.  , 148.34,  96.39, 163.25, 105.17,  72.07,
       225.64,  80.09, 179.2 , 189.63,  83.64, 112.14, 124.04,  62.6 ,
       169.43,  95.92, 183.67, 166.97,  90.5 , 143.19, 118.07,  74.16,
       246.9 , 257.04, 274.88, 211.64, 212.77, 162.06, 138.09, 275.  ,
       257.62, 288.  ,  75.  ,  69.26, 292.92,  93.95, 149.48,  77.3 ,
       217.14,  76.74, 215.4 , 132.  , 171.46, 138.44, 181.41,  58.14,
        54.02,  71.96, 288.  ,  97.08,  98.94, 108.35,  74.8 , 103.03,
        59.9 ,  52.4 , 233.  , 200.05, 146.28, 187.14, 141.86, 115.54,
        54.44,  57.27,  71.32, 107.15, 170.6 , 105.16, 145.93, 266.02,
        72.34, 302.  , 169.38, 142.41, 147.92, 100.82, 107.2 ,  91.  ,
       102.03, 137.61,  79.87,  95.56, 124.76,  79.62,  77.58, 152.32,
        92.74])

In [14]:
rf_no_mean_weights

array([230.35,  73.23, 288.  , 148.34,  96.39, 163.25, 105.17,  72.07,
       225.64,  80.09, 179.2 , 189.63,  83.64, 112.14, 124.04,  62.6 ,
       169.43,  95.92, 183.67, 166.97,  90.5 , 143.19, 118.07,  74.16,
       246.9 , 257.04, 274.88, 211.64, 212.77, 162.06, 138.09, 275.  ,
       257.62, 288.  ,  75.  ,  69.26, 292.92,  93.95, 149.48,  77.3 ,
       217.14,  76.74, 215.4 , 132.  , 171.46, 138.44, 181.41,  58.14,
        54.02,  71.96, 288.  ,  97.08,  98.94, 108.35,  74.8 , 103.03,
        59.9 ,  52.4 , 233.  , 200.05, 146.28, 187.14, 141.86, 115.54,
        54.44,  57.27,  71.32, 107.15, 170.6 , 105.16, 145.93, 266.02,
        72.34, 302.  , 169.38, 142.41, 147.92, 100.82, 107.2 ,  91.  ,
       102.03, 137.61,  79.87,  95.56, 124.76,  79.62,  77.58, 152.32,
        92.74])

### 5. Calc Quantile Random Forest

In [15]:
rf_cdfs = calc_dist_rf(rf_weights, y_train)

In [16]:
len(rf_cdfs)

89

In [17]:
rf_median = calc_quantile_rf(rf_cdfs, 0.5, y_train)

In [18]:
rf_median[:10]

[201.9999999999994,
 103.99609375,
 175.00000000000037,
 150.99999999999892,
 76.99999999999929,
 128.447265625,
 112.7734375,
 103.99609375,
 139.99999999999932,
 116.00000000000018]