In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=RuntimeWarning)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/neurips-open-polymer-prediction-2025/sample_submission.csv
/kaggle/input/neurips-open-polymer-prediction-2025/train.csv
/kaggle/input/neurips-open-polymer-prediction-2025/test.csv
/kaggle/input/neurips-open-polymer-prediction-2025/train_supplement/dataset2.csv
/kaggle/input/neurips-open-polymer-prediction-2025/train_supplement/dataset4.csv
/kaggle/input/neurips-open-polymer-prediction-2025/train_supplement/dataset1.csv
/kaggle/input/neurips-open-polymer-prediction-2025/train_supplement/dataset3.csv
/kaggle/input/rdkit-2025-3-3-cp311/rdkit-2025.3.3-cp311-cp311-manylinux_2_28_x86_64.whl


In [2]:
!pip install /kaggle/input/rdkit-2025-3-3-cp311/rdkit-2025.3.3-cp311-cp311-manylinux_2_28_x86_64.whl

Processing /kaggle/input/rdkit-2025-3-3-cp311/rdkit-2025.3.3-cp311-cp311-manylinux_2_28_x86_64.whl
Installing collected packages: rdkit
Successfully installed rdkit-2025.3.3


# Data Loading and Preprocessing Pipeline

In [3]:
# === Imports ===
import pandas as pd
import numpy as np
from rdkit import Chem

# === Config ===
BASE_PATH = '/kaggle/input/neurips-open-polymer-prediction-2025/'
TARGETS = ['Tg', 'FFV', 'Tc', 'Density', 'Rg']
BAD_PATTERNS = ['[R]', '[R1]', '[R2]', '[R3]', '[R4]', '[R5]',
                "[R']", '[R"]', 'R1', 'R2', 'R3', 'R4', 'R5',
                '([R])', '([R1])', '([R2])']

# === SMILES Cleaner ===
def clean_and_validate_smiles(smiles):
    if not isinstance(smiles, str) or not smiles:
        return None
    for pattern in BAD_PATTERNS:
        if pattern in smiles:
            return None
    try:
        mol = Chem.MolFromSmiles(smiles)
        if mol:
            return Chem.MolToSmiles(mol, canonical=True)
    except:
        return None
    return None

# === Load Train/Test ===
train = pd.read_csv(BASE_PATH + 'train.csv')
test = pd.read_csv(BASE_PATH + 'test.csv')

train['SMILES'] = train['SMILES'].apply(clean_and_validate_smiles)
test['SMILES'] = test['SMILES'].apply(clean_and_validate_smiles)

train.dropna(subset=['SMILES'], inplace=True)
test.dropna(subset=['SMILES'], inplace=True)

# === Load External Datasets (excluding dataset2) ===
external_datasets = []

def load_external(path, target, rename_map=None):
    try:
        df = pd.read_csv(path)
        if rename_map:
            df = df.rename(columns=rename_map)
        if 'SMILES' in df.columns and target in df.columns:
            df = df[['SMILES', target]].dropna()
            external_datasets.append((target, df))
            print(f"✅ Loaded {path} ({len(df)} entries for {target})")
        else:
            print(f"⚠️ Skipped {path}: required columns missing")
    except Exception as e:
        print(f"⚠️ Failed to load {path}: {e}")

load_external(BASE_PATH + 'train_supplement/dataset1.csv', 'Tc', rename_map={'TC_mean': 'Tc'})
load_external(BASE_PATH + 'train_supplement/dataset3.csv', 'Tg')
load_external(BASE_PATH + 'train_supplement/dataset4.csv', 'FFV')

# === Merge External Data ===
def merge_external(train_df, ext_df, target):
    ext_df['SMILES'] = ext_df['SMILES'].apply(clean_and_validate_smiles)
    ext_df = ext_df.dropna(subset=['SMILES', target])
    ext_df = ext_df.groupby('SMILES', as_index=False)[target].mean()

    # Fill missing target values in existing rows
    existing_smiles = set(train_df['SMILES'])
    to_fill = ext_df[ext_df['SMILES'].isin(existing_smiles)]
    for _, row in to_fill.iterrows():
        mask = (train_df['SMILES'] == row['SMILES']) & (train_df[target].isna())
        train_df.loc[mask, target] = row[target]

    # Add new rows
    new_smiles = set(ext_df['SMILES']) - existing_smiles
    new_rows = ext_df[ext_df['SMILES'].isin(new_smiles)].copy()
    for col in TARGETS:
        if col not in new_rows.columns:
            new_rows[col] = np.nan
    return pd.concat([train_df, new_rows[['SMILES'] + TARGETS]], ignore_index=True)

# === Apply Merges ===
train_extended = train[['SMILES'] + TARGETS].copy()
for target, ext in external_datasets:
    train_extended = merge_external(train_extended, ext, target)

# === Final Clean-Up ===
train_extended = train_extended.replace([np.inf, -np.inf], np.nan)
train_extended = train_extended.dropna(subset=TARGETS, how='all')
train_extended = train_extended.drop_duplicates(subset=['SMILES']).reset_index(drop=True)

# === Summary ===
print("\n📊 Final Summary:")
print(f"Train: {len(train)} | Extended: {len(train_extended)}")
for t in TARGETS:
    base = train[t].notna().sum()
    ext = train_extended[t].notna().sum()
    print(f"• {t:<8}: {ext} total ({ext - base:+} from supplements)")

print("\n✅ Data loading and preprocessing complete.")


✅ Loaded /kaggle/input/neurips-open-polymer-prediction-2025/train_supplement/dataset1.csv (874 entries for Tc)
✅ Loaded /kaggle/input/neurips-open-polymer-prediction-2025/train_supplement/dataset3.csv (46 entries for Tg)
✅ Loaded /kaggle/input/neurips-open-polymer-prediction-2025/train_supplement/dataset4.csv (862 entries for FFV)

📊 Final Summary:
Train: 7973 | Extended: 8972
• Tg      : 557 total (+46 from supplements)
• FFV     : 7892 total (+862 from supplements)
• Tc      : 866 total (+129 from supplements)
• Density : 613 total (+0 from supplements)
• Rg      : 614 total (+0 from supplements)

✅ Data loading and preprocessing complete.


In [4]:
smiles_list = train_extended['SMILES'].tolist()
# Clean SMILES column robustly
train_extended['SMILES'] = train_extended['SMILES'].apply(clean_and_validate_smiles)
train_extended.shape
train_extended

Unnamed: 0,SMILES,Tg,FFV,Tc,Density,Rg
0,*CC(*)c1ccccc1C(=O)OCCCCCC,,0.374645,0.205667,,
1,*Nc1ccc([C@H](CCC)c2ccc(C3(c4ccc([C@@H](CCC)c5...,,0.370410,,,
2,*Oc1ccc(S(=O)(=O)c2ccc(Oc3ccc(C4(c5ccc(Oc6ccc(...,,0.378860,,,
3,*Nc1ccc(-c2c(-c3ccc(C)cc3)c(-c3ccc(C)cc3)c(N*)...,,0.387324,,,
4,*Oc1ccc(OC(=O)c2cc(OCCCCCCCCCOCC3CCCN3c3ccc([N...,,0.355470,,,
...,...,...,...,...,...,...
8967,*c1cccc(OCCCCCCOc2cccc(N3C(=O)c4ccc(-c5cccc6c5...,,0.349095,,,
8968,*c1cccc(OCCCCCOc2cccc(N3C(=O)c4ccc(-c5cccc6c5C...,,0.350892,,,
8969,*c1cccc(OCCCCOc2cccc(N3C(=O)c4ccc(-c5cccc6c5C(...,,0.345386,,,
8970,*c1cccc(Oc2cccc(Oc3cccc(N4C(=O)c5ccc(Oc6ccc(Sc...,,0.362224,,,


# Feature Engineering Pipeline

In [5]:
import numpy as np
from rdkit import Chem
from rdkit.Chem import MACCSkeys, AllChem, rdMolDescriptors
from rdkit.Chem import Descriptors
import networkx as nx
from tqdm import tqdm

# === Feature category config ===
FEATURE_CATEGORIES = {
    "rdkit": "all", 
    "graph": [
        'graph_diameter', 'avg_shortest_path', 'num_cycles',
        'betweenness_mean', 'betweenness_std', 'eigenvector_mean',
        'ring_4', 'max_degree', 'closeness_mean', 'katz_centrality_std',
        'heteroatom_ratio'
    ],
    "maccs": ['MACCS_Key130', 'MACCS_Key142', 'MACCS_Key066', 'MACCS_Key153'],
    "topo_torsion": ['TopologicalTorsion_Bit0512', 'TopologicalTorsion_Bit1296'],
    "atom_pair": ['AtomPair_B512_Bit0138', 'AtomPair_B512_Bit0448', 'AtomPair_B512_Bit0408'],
    "morgan": "all",
}

USELESS_COLS = set([
    'BCUT2D_MWHI', 'BCUT2D_MWLOW', 'BCUT2D_CHGHI', 'BCUT2D_CHGLO',
    'BCUT2D_LOGPHI', 'BCUT2D_LOGPLOW', 'BCUT2D_MRHI', 'BCUT2D_MRLOW',
    'NumRadicalElectrons', 'SMR_VSA8', 'SlogP_VSA9', 'fr_barbitur',
    'fr_benzodiazepine', 'fr_dihydropyridine', 'fr_epoxide', 'fr_isothiocyan',
    'fr_lactam', 'fr_nitroso', 'fr_prisulfonamd', 'fr_thiocyan',
    'LabuteASA', 'HeavyAtomCount', 'Chi4v', 'MinAbsPartialCharge',
    'MinPartialCharge', 'MaxAbsPartialCharge', 'fr_nitrile',
    'NumAromaticCarbocycles', 'NumAromaticRings', 'fr_amide',
    'fr_Nhpyrrole', 'fr_phenol', 'fr_phenol_noOrthoHbond', 'fr_COO2',
    'fr_diazo', 'fr_nitro_arom', 'fr_phos_ester'
])

# === Core Functions ===

def compute_rdkit_descriptors(mol, allowed=None):
    descs = {}
    for name, func in Descriptors.descList:
        if name in USELESS_COLS:
            continue
        if allowed is not None and name not in allowed:
            continue
        try:
            descs[name] = func(mol)
        except:
            descs[name] = np.nan
        
    try:
        descs['LogP'] = MolLogP(mol)
    except:
        descs['LogP'] = np.nan

    try:
        descs['NumAtoms'] = mol.GetNumAtoms() if mol else np.nan
    except:
        descs['NumAtoms'] = np.nan

    try:
        descs['RotatableBonds'] = CalcNumRotatableBonds(mol)
    except:
        descs['RotatableBonds'] = np.nan
        
    return descs

def compute_graph_descriptors(mol):
    g = nx.Graph()
    g.add_nodes_from(range(mol.GetNumAtoms()))
    g.add_edges_from([(b.GetBeginAtomIdx(), b.GetEndAtomIdx()) for b in mol.GetBonds()])
    
    descriptors = {}
    try:
        descriptors['graph_diameter'] = nx.diameter(g)
        descriptors['avg_shortest_path'] = nx.average_shortest_path_length(g)
    except:
        descriptors['graph_diameter'] = 0
        descriptors['avg_shortest_path'] = 0

    descriptors['num_cycles'] = len(nx.cycle_basis(g))
    descriptors['betweenness_mean'] = np.mean(list(nx.betweenness_centrality(g).values()))
    descriptors['betweenness_std'] = np.std(list(nx.betweenness_centrality(g).values()))
    
    try:
        ec = nx.eigenvector_centrality_numpy(g)
        descriptors['eigenvector_mean'] = np.mean(list(ec.values()))
    except Exception:
        descriptors['eigenvector_mean'] = np.nan  # <-- fix here
        
    descriptors['closeness_mean'] = np.mean(list(nx.closeness_centrality(g).values()))
    descriptors['max_degree'] = max(dict(g.degree()).values())
    
    try:
        katz = nx.katz_centrality_numpy(g)
        descriptors['katz_centrality_std'] = np.std(list(katz.values()))
    except Exception:
        descriptors['katz_centrality_std'] = np.nan  # <-- fix here
        
    descriptors['ring_4'] = sum(1 for r in mol.GetRingInfo().AtomRings() if len(r) == 4)
    descriptors['heteroatom_ratio'] = sum(1 for a in mol.GetAtoms() if a.GetAtomicNum() not in [1, 6]) / mol.GetNumAtoms()
    
    return descriptors

def compute_maccs(mol):
    fp = MACCSkeys.GenMACCSKeys(mol)
    return {f'MACCS_Key{str(i).zfill(3)}': int(fp[i]) for i in range(len(fp))}

def compute_topo_torsion(mol):
    torsion_bits = ['TopologicalTorsion_Bit0512', 'TopologicalTorsion_Bit1296']
    torsion_defaults = {bit: 0 for bit in torsion_bits}

    if mol:
        try:
            fp = rdMolDescriptors.GetHashedTopologicalTorsionFingerprintAsBitVect(mol, nBits=2048)
            bits = [int(x) for x in fp.ToBitString()]
            return {
                'TopologicalTorsion_Bit0512': bits[512],
                'TopologicalTorsion_Bit1296': bits[1296]
            }
        except Exception as e:
            print(f"TT fingerprint failed: {e}")
            return torsion_defaults
    else:
        return torsion_defaults

def compute_atom_pair(mol):
    fp = AllChem.GetHashedAtomPairFingerprintAsBitVect(mol, nBits=2048)
    arr = np.zeros((2048,), dtype=int)
    AllChem.DataStructs.ConvertToNumpyArray(fp, arr)
    return {
        'AtomPair_B512_Bit0138': arr[138],
        'AtomPair_B512_Bit0448': arr[448],
        'AtomPair_B512_Bit0408': arr[408],
    }

def compute_morgan(mol, nBits=2048, radius=2):
    results = {}
    try:
        fp = AllChem.GetMorganFingerprintAsBitVect(mol, radius=radius, nBits=nBits)
        bit_array = [int(b) for b in fp.ToBitString()]
        for i in range(nBits):
            results[f'Morgan_Bit{i:04d}'] = bit_array[i]
    except Exception:
        for i in range(nBits):
            results[f'Morgan_Bit{i:04d}'] = np.nan  # Use NaN to indicate failure
    return results

def compute_all_features(smiles_list, verbose=True):
    feature_dict = {}
    valid_idx = []
    failed_idx = []

    for idx, smi in enumerate(tqdm(smiles_list, desc="Computing Features")):
        mol = Chem.MolFromSmiles(smi)
        if mol is None:
            failed_idx.append(idx)
            continue
        valid_idx.append(idx)

        feats = {}

        # 1. RDKit descriptors (filtered)
        feats.update(compute_rdkit_descriptors(mol))

        # 2. Graph features
        feats.update(compute_graph_descriptors(mol))

        # 3. MACCS
        feats.update(compute_maccs(mol))

        # 4. Topological Torsion
        feats.update(compute_topo_torsion(mol))

        # 5. Atom Pair
        feats.update(compute_atom_pair(mol))

        # 6. Morgan fingerprint bits
        feats.update(compute_morgan(mol)) 

        for k, v in feats.items():
            if k not in feature_dict:
                feature_dict[k] = []
            feature_dict[k].append(v)

    # Fill missing values with None for failed molecules
    for k in feature_dict:
        while len(feature_dict[k]) < len(valid_idx):
            feature_dict[k].append(None)

    # Summary
    if verbose:
        print("\n--- Feature Engineering Summary ---")
        print(f"Total SMILES: {len(smiles_list)}")
        print(f"Valid molecules: {len(valid_idx)}")
        print(f"Invalid molecules: {len(failed_idx)}")
        print(f"Number of computed features: {len(feature_dict)}")
        sample_key = next(iter(feature_dict))
        print(f"Feature vector length per molecule: {len(feature_dict[sample_key])}")
        print("-----------------------------------")

    return feature_dict, valid_idx


In [6]:
from rdkit import RDLogger

RDLogger.DisableLog('rdApp.*')  # Suppress all RDKit warnings

# For train set (you already have this)
feature_dict_train, valid_idx_train = compute_all_features(train_extended["SMILES"], verbose=True)
features_train = pd.DataFrame(feature_dict_train)
features_train = features_train.reset_index(drop=True)

# For test set (do the same)
feature_dict_test, valid_idx_test = compute_all_features(test["SMILES"], verbose=True)
features_test = pd.DataFrame(feature_dict_test)
features_test = features_test.reset_index(drop=True)

print("Train features shape:", features_train.shape)
print("Test features shape:", features_test.shape)
print("Training dataframe Shape:",train_extended.shape)
print("Test dataframe Shape:",test.shape)

Computing Features: 100%|██████████| 8972/8972 [05:17<00:00, 28.28it/s]



--- Feature Engineering Summary ---
Total SMILES: 8972
Valid molecules: 8972
Invalid molecules: 0
Number of computed features: 2414
Feature vector length per molecule: 8972
-----------------------------------


Computing Features: 100%|██████████| 3/3 [00:00<00:00, 24.78it/s]


--- Feature Engineering Summary ---
Total SMILES: 3
Valid molecules: 3
Invalid molecules: 0
Number of computed features: 2414
Feature vector length per molecule: 3
-----------------------------------
Train features shape: (8972, 2414)
Test features shape: (3, 2414)
Training dataframe Shape: (8972, 6)
Test dataframe Shape: (3, 2)





# Catboost Pipeline 

In [7]:
import pandas as pd
from catboost import CatBoostRegressor

# List of target columns
target_cols = ['Tg', 'FFV', 'Tc', 'Density', 'Rg']

# Create dictionaries to hold models and test predictions
models = {}
test_preds = pd.DataFrame(index=test.index)

# Train one model per target on full data and generate test predictions
for target in target_cols:
    print(f"\n🚧 Training model for {target}...")

    # Select rows where this target is not NaN
    mask = ~train_extended[target].isna()
    X_train_target = features_train.loc[mask].reset_index(drop=True)
    y_train_target = train_extended.loc[mask, target].reset_index(drop=True)

    model = CatBoostRegressor(
        iterations=1000,
        learning_rate=0.05,
        depth=6,
        loss_function='MAE',
        random_seed=42,
        verbose=100
    )

    model.fit(X_train_target, y_train_target)

    # Predict on test set
    preds = model.predict(features_test)
    test_preds[target] = preds
    models[target] = model

# Save submission
submission = test[['id']].copy()
submission = pd.concat([submission, test_preds], axis=1)
submission.to_csv("submission.csv", index=False)
print("\n✅ Submission file 'submission.csv' created!")



🚧 Training model for Tg...
0:	learn: 84.0554740	total: 79.9ms	remaining: 1m 19s
100:	learn: 34.1749594	total: 1.75s	remaining: 15.5s
200:	learn: 24.7353633	total: 3.38s	remaining: 13.4s
300:	learn: 19.9478458	total: 5.07s	remaining: 11.8s
400:	learn: 16.9583813	total: 6.72s	remaining: 10s
500:	learn: 14.6657455	total: 8.34s	remaining: 8.3s
600:	learn: 13.0833830	total: 9.96s	remaining: 6.61s
700:	learn: 12.0050819	total: 11.6s	remaining: 4.94s
800:	learn: 11.1326061	total: 13.2s	remaining: 3.28s
900:	learn: 10.3050614	total: 14.9s	remaining: 1.64s
999:	learn: 9.7041873	total: 16.5s	remaining: 0us

🚧 Training model for FFV...
0:	learn: 0.0199091	total: 40.4ms	remaining: 40.3s
100:	learn: 0.0087611	total: 2.87s	remaining: 25.6s
200:	learn: 0.0072915	total: 5.63s	remaining: 22.4s
300:	learn: 0.0063783	total: 8.67s	remaining: 20.1s
400:	learn: 0.0058588	total: 11.8s	remaining: 17.7s
500:	learn: 0.0054663	total: 14.6s	remaining: 14.6s
600:	learn: 0.0051420	total: 17.4s	remaining: 11.6s
700

# Cross Validate And Wmae

In [8]:
import numpy as np
import pandas as pd
from catboost import CatBoostRegressor
from sklearn.model_selection import GroupKFold
from sklearn.metrics import mean_absolute_error

# Set up
target_cols = ['Tg', 'FFV', 'Tc', 'Density', 'Rg']
n_splits = 5
random_seed = 42

# Use IDs as groups if no specific grouping is available
groups = train_extended['id'] if 'id' in train_extended.columns else np.arange(len(train_extended))

# Create OOF prediction dataframe
oof_preds = pd.DataFrame(index=train_extended.index, columns=target_cols, dtype=float)

# Competition weights calculation
N_k = train_extended[target_cols].notna().sum()  # Number of samples per target
ranges = {t: train_extended[t].max() - train_extended[t].min() for t in target_cols}
weights = {t: (np.sqrt(N_k[t])/ranges[t]) for t in target_cols}
total = sum(weights.values())
weights = {t: weights[t]/total for t in target_cols}

print("📊 Beginning GroupKFold Cross-Validation...")
for target in target_cols:
    print(f"\n🔁 Cross-validating model for {target}...")
    
    mask = ~train_extended[target].isna()
    X = features_train.loc[mask].reset_index(drop=True)
    y = train_extended.loc[mask, target].reset_index(drop=True)
    group_vals = pd.Series(groups)[mask].reset_index(drop=True)
    oof_target = np.zeros(len(y))
    
    gkf = GroupKFold(n_splits=n_splits)
    for fold, (tr_idx, va_idx) in enumerate(gkf.split(X, y, groups=group_vals)):
        print(f"  🔹 Fold {fold+1}/{n_splits}")
        X_tr, X_va = X.iloc[tr_idx], X.iloc[va_idx]
        y_tr, y_va = y.iloc[tr_idx], y.iloc[va_idx]

        model = CatBoostRegressor(
            iterations=1000,
            learning_rate=0.05,
            depth=6,
            loss_function='MAE',
            random_seed=random_seed,
            verbose=0,
            early_stopping_rounds=50
        )
        model.fit(X_tr, y_tr, eval_set=(X_va, y_va), use_best_model=True)
        oof_target[va_idx] = model.predict(X_va)

    # Store predictions in original index space
    oof_preds.loc[mask, target] = oof_target

    # Compute MAE for this target
    mae = mean_absolute_error(y, oof_target)
    weighted = mae * weights[target]
    print(f"📌 {target}: MAE = {mae:.4f}, Weight = {weights[target]:.4f}, Weighted = {weighted:.4f}")

# Final WMAE
print("\n✅ Calculating overall Weighted MAE (WMAE)...")
wmae_total = 0.0
for target in target_cols:
    mask = ~train_extended[target].isna()
    mae = mean_absolute_error(train_extended.loc[mask, target], oof_preds.loc[mask, target])
    weighted = mae * weights[target]
    wmae_total += weighted
print(f"\n🎯 Final Out-of-Fold WMAE: {wmae_total:.4f}")

📊 Beginning GroupKFold Cross-Validation...

🔁 Cross-validating model for Tg...
  🔹 Fold 1/5
  🔹 Fold 2/5
  🔹 Fold 3/5
  🔹 Fold 4/5
  🔹 Fold 5/5
📌 Tg: MAE = 48.8333, Weight = 0.0002, Weighted = 0.0091

🔁 Cross-validating model for FFV...
  🔹 Fold 1/5
  🔹 Fold 2/5
  🔹 Fold 3/5
  🔹 Fold 4/5
  🔹 Fold 5/5
📌 FFV: MAE = 0.0061, Weight = 0.7906, Weighted = 0.0048

🔁 Cross-validating model for Tc...
  🔹 Fold 1/5
  🔹 Fold 2/5
  🔹 Fold 3/5
  🔹 Fold 4/5
  🔹 Fold 5/5
📌 Tc: MAE = 0.0303, Weight = 0.0933, Weighted = 0.0028

🔁 Cross-validating model for Density...
  🔹 Fold 1/5
  🔹 Fold 2/5
  🔹 Fold 3/5
  🔹 Fold 4/5
  🔹 Fold 5/5
📌 Density: MAE = 0.0278, Weight = 0.1110, Weighted = 0.0031

🔁 Cross-validating model for Rg...
  🔹 Fold 1/5
  🔹 Fold 2/5
  🔹 Fold 3/5
  🔹 Fold 4/5
  🔹 Fold 5/5
📌 Rg: MAE = 1.4984, Weight = 0.0049, Weighted = 0.0073

✅ Calculating overall Weighted MAE (WMAE)...

🎯 Final Out-of-Fold WMAE: 0.0271


In [9]:
submission

Unnamed: 0,id,Tg,FFV,Tc,Density,Rg
0,1109053969,166.650778,0.3737,0.197476,1.179752,22.789036
1,1422188626,166.423828,0.376546,0.234134,1.0947,20.282314
2,2032016830,94.47176,0.351152,0.243154,1.156533,20.550048
