In [24]:
import pandas as pd
import numpy as np
from scipy.stats import linregress

path  = r"C:\Users\demir\OneDrive\Desktop\MSc Thesis\lasstttrun\final_data_mad1_filtered.csv"
df      = pd.read_csv(path)

df['af_mass0'] = 1.0 - df['ac'] / 100.0
df['Y_af']     = df['devol_yield'] / df['af_mass0']
df['k_af']     = df['Y_af'] / df['residence_time']

R = 8.314  # J/(mol·K)

params = []
for fuel, sub in df.groupby('fuel_type'):
    # need ≥3 temps for a decent linear fit
    if sub['temperature'].nunique() < 3:
        continue

    #x=1/T, y=ln(k_af)
    T_K  = sub['temperature'] + 273.15
    invT = 1.0 / T_K
    ln_k = np.log(sub['k_af'].replace(0, np.nan)).dropna()
    invT = invT.loc[ln_k.index]

    #linear regression
    slope, intercept, rval, _, _ = linregress(invT, ln_k)

    Ea_J   = -slope * R
    Ea_kJ  = Ea_J / 1000.0
    A      = np.exp(intercept)

    params.append({
        'fuel_type': fuel,
        'E_A': Ea_kJ,
        'A0':      A,
        'R2':             rval**2
    })

df_params = pd.DataFrame(params)

df_merged = df.merge(df_params[['fuel_type','E_A','A0']],
                     on='fuel_type', how='left').drop(['Y_af', 'af_mass0', 'k_af'], axis=1)

df_merged.to_csv(r"C:\Users\demir\OneDrive\Desktop\MSc Thesis\lasstttrun\mad1_Ea.csv", index=False)


In [25]:
df_params

Unnamed: 0,fuel_type,E_A,A0,R2
0,Cellulose,23.092332,674.606854,0.566289
1,Digestate,12.097037,71.010892,0.041667
2,Digestate_PE,28.558122,459.297836,0.388732
3,Digestate_PP,19.958405,210.706807,0.213244
4,Digestate_SCP,15.098726,115.829316,0.117411
5,HTC-MSW,4.281765,36.583777,0.024738
6,Hemicellulose,23.726948,749.305691,0.531538
7,Lignin,20.547269,294.42757,0.477753
8,Lignite,16.65137,52.567873,0.139027
9,RDF1,-9.368122,9.88697,0.027657


In [26]:
df_merged

Unnamed: 0.1,Unnamed: 0,sample,wc,vm,fc,ac,c,h,o,n,...,lhv,temperature,residence_time,pressure,heat_rate,fuel_type,fuel_category,devol_yield,E_A,A0
0,1571,cel_18,5.725,87.9005,6.3745,0.00,40.305405,4.51767,54.860844,0.269662,...,16.51605,200,1.0,1.0,1000,Cellulose,Biomass,2.018886,23.092332,674.606854
1,1572,cel_19,5.725,87.9005,6.3745,0.00,40.305405,4.51767,54.860844,0.269662,...,16.51605,200,1.0,1.0,1000,Cellulose,Biomass,2.324709,23.092332,674.606854
2,1438,htc_9,7.345,48.2400,6.9000,37.52,33.085000,3.26500,16.600000,1.877000,...,13.52800,200,0.5,1.0,1000,HTC-MSW,Mix,11.277050,4.281765,36.583777
3,1439,htc_15,7.345,48.2400,6.9000,37.52,33.085000,3.26500,16.600000,1.877000,...,13.52800,200,0.5,1.0,1000,HTC-MSW,Mix,16.384181,4.281765,36.583777
4,1441,htc_152,7.345,48.2400,6.9000,37.52,33.085000,3.26500,16.600000,1.877000,...,13.52800,200,1.0,1.0,1000,HTC-MSW,Mix,2.878561,4.281765,36.583777
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1197,477,wood_17,7.600,75.4000,16.4000,0.60,50.400000,5.70000,35.300000,0.300000,...,19.84300,1200,10.0,1.0,1000,Wood,Biomass,90.828402,15.934929,99.103319
1198,478,wood_18,7.600,75.4000,16.4000,0.60,50.400000,5.70000,35.300000,0.300000,...,19.84300,1200,10.0,1.0,1000,Wood,Biomass,88.860544,15.934929,99.103319
1199,479,wood_19,7.600,75.4000,16.4000,0.60,50.400000,5.70000,35.300000,0.300000,...,19.84300,1200,10.0,1.0,1000,Wood,Biomass,89.703178,15.934929,99.103319
1200,1179,wood_16,7.600,75.4000,16.4000,0.60,50.400000,5.70000,35.300000,0.300000,...,19.84300,1200,10.0,1.0,1000,Wood,Biomass,90.514795,15.934929,99.103319


In [27]:
df_merged.to_csv(r"C:\Users\demir\OneDrive\Desktop\MSc Thesis\lasstttrun\mad1_Ea.csv", index=False)

In [16]:
import pandas as pd
import numpy as np
from statsmodels.stats.outliers_influence import variance_inflation_factor
from sklearn.preprocessing import StandardScaler

data_path = r"C:\Users\demir\OneDrive\Desktop\MSc Thesis\Final Results\Final Data Used\feature_engineering_effect\mad_t1_final_data_filtered.csv"
df = pd.read_csv(data_path, delimiter=',')
eps = 1e-6

df['vm_fc'] = df['vm'] / (df['fc'])
df['oh_w'] = df['o'] / (df['h'])
df['ac_fc'] = df['ac'] / (df['fc'])
df['t_to_T'] = df['temperature'] / df['heat_rate']
df['cl_ac'] = df['cl'] / (df['ac'])
df['n_ac'] = df['n'] / (df['ac'])

#Log transforms
for col in ['vm_fc', 'ac_fc', 'cl_ac', 'n_ac', 't_to_T','oh_w','oc', 'hc']:
    df[f'log_{col}'] = np.log(df[col])

baseline_feats = ['hc', 'oc', 'vm_fc', 'temperature', 'heat_rate', 'residence_time', 'pressure']
ratio_feats = ['vm_fc', 't_to_T', 'ac_fc', 'hc', 'oc', 'temperature', 'heat_rate', 'residence_time', 'pressure']
operational_feats = ['temperature', 'heat_rate', 'residence_time', 'pressure']
ratio_feats_log = [f'log_{col}' for col in ['vm_fc', 'ac_fc', 't_to_T', 'oc', 'hc']] + ['temperature', 'residence_time', 'pressure']

feature_sets = {
    "baseline": baseline_feats,
    "ratios": ratio_feats,
    "full": ['c', 'h', 'o', 'vm', 'fc', 'ac', 'n', 's', 'cl', 'lhv', 'hc', 'oc', 'temperature', 'pressure', 'heat_rate', 'residence_time'],
    "ratio_log": ratio_feats_log
}

#Calculate VIFs
vif_results = {}
for name, features in feature_sets.items():
    X = df[features].replace([np.inf, -np.inf], np.nan).dropna()
    X_scaled = StandardScaler().fit_transform(X)
    vif_df = pd.DataFrame({
        'Feature': features,
        'VIF': [variance_inflation_factor(X_scaled, i) for i in range(X_scaled.shape[1])]
    })
    vif_results[name] = vif_df

for name, group in vif_results.items():
    print(f"\n=== {name} ===")
    print(group)


=== baseline ===
          Feature       VIF
0              hc  1.577955
1              oc  1.775371
2           vm_fc  2.432665
3     temperature  1.117733
4       heat_rate  1.209374
5  residence_time  1.239360
6        pressure  1.101652

=== ratios ===
          Feature       VIF
0           vm_fc  2.454256
1          t_to_T  2.008149
2           ac_fc  1.191009
3              hc  1.711343
4              oc  1.788775
5     temperature  1.118764
6       heat_rate  2.259884
7  residence_time  1.242680
8        pressure  1.105751

=== full ===
           Feature         VIF
0                c  278.954433
1                h  282.710226
2                o  135.630625
3               vm  196.849010
4               fc  430.472087
5               ac  201.230667
6                n   53.767307
7                s    7.343169
8               cl    7.006977
9              lhv   47.798738
10              hc  363.735190
11              oc   52.024463
12     temperature    1.154619
13        pres

  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
