# 🚀 Self-Developed Dynamic Graph Learning Framework AdaptoFlux: Fully Automated Modeling on Titanic

Welcome to **AdaptoFlux** — an interpretable machine learning framework based on automatic function graph growth.

This notebook demonstrates how to:
- Load preprocessed data
- Train a dynamic function graph model
- Save the graph structure as JSON
- Visualize the graph (interactive)
- Generate a Kaggle submission file

👉 All code is open-source. Feel free to Star & Fork!

In [None]:
! git init
! git clone https://github.com/gugugu12138/AdaptoFlux  

In [None]:
! pip install -r /kaggle/working/AdaptoFlux/requirements.txt


In [None]:
cd /kaggle/working/AdaptoFlux

In [None]:
import pandas as pd
import numpy as np
import json
import traceback
from ATF.core.flux import AdaptoFlux
from ATF.CollapseManager.collapse_functions import CollapseMethod
from ATF.ModelTrainer.LayerGrowTrainer.layer_grow_trainer import LayerGrowTrainer

import logging

logging.basicConfig(
    level=logging.INFO,
    format='[%(levelname)s] %(name)s: %(message)s'
)

logging.info("✅ Logging Test: Configuration successful!")

## 🧩 Custom Method Pool (methods.py)

Ensure `methods.py` defines basic functions (e.g., `add_values`, `multiply_values`, etc.) decorated with `@method_profile`. An example method pool is located at /kaggle/working/AdaptoFlux/examples/kaggle/titanic/methods.py

Content:

In [None]:
# This code block does not need to be executed
import math
from ATF.methods.decorators import method_profile


@method_profile(
    output_count=1,
    input_types=['scalar'],
    output_types=['scalar'],
    group="basic",           # ✅ Unified group
    weight=1.0,
    vectorized=False
)
def return_value(x):
    """
    Returns the input value
    :param x: Input value
    :return: Original value
    """
    return [x]

@method_profile(
    output_count=1,
    input_types=['scalar', 'scalar'],
    output_types=['scalar'],
    group="basic",           # ✅ Unified group
    weight=1.0,
    vectorized=False
)
def add_values(x, y):
    """
    Adds two values
    :param x: First number
    :param y: Second number
    :return: Sum of x and y
    """
    return [x + y]

@method_profile(
    output_count=1,
    input_types=['scalar', 'scalar'],
    output_types=['scalar'],
    group="basic",           # ✅ Unified group
    weight=1.0,
    vectorized=False
)
def calculate_difference(a, b):
    """
    Calculates the difference between two numbers
    :param a: Minuend
    :param b: Subtrahend
    :return: Difference
    """
    return [a - b]


@method_profile(
    output_count=1,
    input_types=['scalar', 'scalar'],
    output_types=['scalar'],
    group="basic",           # ✅ Unified group
    weight=1.0,
    vectorized=False
)
def multiply_values(x, y):
    """
    Multiplies two values
    :param x: First number
    :param y: Second number
    :return: Product of x and y
    """
    return [x * y]

@method_profile(
    output_count=1,
    input_types=['scalar', 'scalar'],
    output_types=['scalar'],
    group="basic",
    weight=1.0,
    vectorized=False
)
def divide_values(x, y):
    """
    Divides two values (with zero check)
    :param x: Dividend
    :param y: Divisor
    :return: x / y or 0 if y = 0
    """
    return [x / y if y != 0 else 0]

@method_profile(
    output_count=2,
    input_types=['scalar'],
    output_types=['scalar', 'scalar'],
    group="basic",           # ✅ Unified group
    weight=1.0,
    vectorized=False
)
def return_two_values(x):
    """
    Returns two copies of the input value
    :param x: Input value
    :return: Original value
    """
    return [x, x]

# No improvement, only slows down speed and increases computation
# @method_profile(
#     output_count=3,
#     input_types=['scalar'],
#     output_types=['scalar', 'scalar', 'scalar'],
#     group="basic",           # ✅ Unified group
#     weight=1.0,
#     vectorized=False
# )
# def return_three_values(x):
#     """
#     Returns three copies of the input value
#     :param x: Input value
#     :return: Original value
#     """
#     return [x, x, x]

@method_profile(
    output_count=1,
    input_types=['scalar'],
    output_types=['scalar'],
    group="basic",           # Unified into basic group
    weight=1.0,
    vectorized=False
)
def decrement(x):
    """
    Decreases input by 1
    :param x: Input value
    :return: x - 1
    """
    return [x - 1]


@method_profile(
    output_count=1,
    input_types=['scalar'],
    output_types=['scalar'],
    group="basic",           # Unified into basic group
    weight=1.0,
    vectorized=False
)
def increment(x):
    """
    Increases input by 1
    :param x: Input value
    :return: x + 1
    """
    return [x + 1]


@method_profile(
    output_count=1,
    input_types=['scalar'],
    output_types=['scalar'],
    group="basic",           # Unified into basic group
    weight=1.0,
    vectorized=False
)
def negate_value(x):
    """
    Negates the input value
    :param x: Input value
    :return: -x
    """
    return [-x]

## 🗃️ Loading Titanic Preprocessed Data

In [None]:
def load_titanic_for_adaptoflux(train_processed_path, methods_path=None, collapse_method=CollapseMethod.SUM):
    df = pd.read_csv(train_processed_path)
    if 'Survived' not in df.columns:
        raise ValueError("train_processed.csv must contain 'Survived' column")
    labels = df['Survived'].values
    values = df.drop(columns=['Survived']).values
    values = np.array(values, dtype=np.float64)
    return AdaptoFlux(
        values=values,
        labels=labels,
        methods_path=methods_path,
        collapse_method=collapse_method
    )

# Load model
model = load_titanic_for_adaptoflux(
    train_processed_path='examples/kaggle/titanic/output/train_processed.csv',
    methods_path='examples/kaggle/titanic/methods.py',
    collapse_method=CollapseMethod.SUM
)

print(f"✅ Data loaded: {model.values.shape[0]} samples, {model.values.shape[1]} features")

## 🌱 Adding Custom Collapse Function

In [None]:
def collapse_sum_positive(values):
    total = np.sum(values)
    return 1 if total > 0 else 0

model.add_collapse_method(collapse_sum_positive)

## 🏗️ Model Training (Layer-by-Layer Growth)

In [None]:
trainer = LayerGrowTrainer(
    adaptoflux_instance=model,
    max_attempts=10,
    decision_threshold=0.0,
    verbose=True
)

trainer.train(
    input_data=model.values,
    target=model.labels,
    max_layers=15,
    save_model=True,
    on_retry_exhausted="rollback",
    rollback_layers=2,
    max_total_attempts=2000
)

## 🎨 Visualizing Graph Structure

In [None]:
import networkx as nx
import matplotlib.pyplot as plt

# Read graph
G = nx.read_gexf("/kaggle/working/AdaptoFlux/models/best/graph.gexf")

# Specify root node (modify 'root' according to your data)
# If root is unknown, pick the highest-degree node or assign manually
root = "root"  # Replace with your actual root node name
if root not in G:
    # If no explicit root, use the node with maximum degree
    root = max(dict(G.degree()), key=lambda x: dict(G.degree())[x])
    print(f"Using highest-degree node as root: {root}")

# Use BFS to compute distance (layer) from root to each node
try:
    # For directed graphs, convert to undirected for BFS
    if G.is_directed():
        bfs_dist = nx.shortest_path_length(G.to_undirected(), source=root)
    else:
        bfs_dist = nx.shortest_path_length(G, source=root)
except nx.NetworkXNoPath:
    print("Graph is disconnected, only considering connected component containing root")
    # Keep only the connected component containing root
    if G.is_directed():
        connected_nodes = nx.node_connected_component(G.to_undirected(), root)
    else:
        connected_nodes = nx.node_connected_component(G, root)
    G = G.subgraph(connected_nodes)
    bfs_dist = nx.shortest_path_length(G.to_undirected() if G.is_directed() else G, source=root)

# Group nodes by distance (layer)
layers = {}
for node, dist in bfs_dist.items():
    layers.setdefault(dist, []).append(node)

# Manually set layout: x-coordinates evenly spaced within each layer, y = -layer (root on top)
pos = {}
for layer, nodes in layers.items():
    pos.update({node: (i, -layer) for i, node in enumerate(nodes)})  # y = -layer ensures root is at top

# Set node colors (optional: color by layer)
node_colors = [bfs_dist[node] for node in G.nodes]

# Plot
plt.figure(figsize=(12, 8))
nx.draw(
    G, pos,
    with_labels=True,
    node_color=node_colors,
    cmap='viridis',
    node_size=600,
    font_size=6,
    font_color='black',
    edge_color='gray',
    arrows=True if G.is_directed() else False,
    width=1.0,
    alpha=0.9
)

plt.title(f"Hierarchical Layout from Root: {root}")
plt.axis('off')
plt.tight_layout()
plt.show()

## 📊 Model Evaluation

In [None]:
def _evaluate_accuracy(output: np.ndarray, target: np.ndarray) -> float:
    try:
        if len(output.shape) == 1 or output.shape[1] == 1:
            pred_classes = (output >= 0.5).astype(int).flatten()
        else:
            pred_classes = np.argmax(output, axis=1)
        true_labels = np.array(target).flatten()
        return np.mean(pred_classes == true_labels)
    except Exception as e:
        print(f"Evaluation failed: {e}")
        return 0.0

pred = model.infer_with_graph(model.values)
acc = _evaluate_accuracy(pred, model.labels)
print(f"🎯 Final accuracy: {acc:.4f}")

## 📤 Generating Kaggle Submission File

In [None]:
import pandas as pd
import os

def load_titanic_for_adaptoflux(train_processed_path, methods_path=None, collapse_method=CollapseMethod.SUM):
    """
    Load data from preprocessed Titanic training CSV into AdaptoFlux-compatible format.

    :param train_processed_path: Path to preprocessed train_processed.csv
    :param methods_path: Method path (passed to AdaptoFlux)
    :param collapse_method: Collapse method (passed to AdaptoFlux)
    :return: AdaptoFlux instance
    """
    # Read CSV
    df = pd.read_csv(train_processed_path)

    # Ensure 'Survived' column exists
    if 'Survived' not in df.columns:
        raise ValueError("train_processed.csv must contain 'Survived' column as label")

    # Separate labels and features
    labels = df['Survived'].values  # 1D labels
    values = df.drop(columns=['Survived']).values  # 2D feature matrix

    # Convert to numpy float64 type (prevent int64/float64 type mismatches)
    values = np.array(values, dtype=np.float64)

    # Create AdaptoFlux instance
    adaptoflux_instance = AdaptoFlux(
        values=values,
        labels=labels,
        methods_path=methods_path,
        collapse_method=collapse_method
    )

    return adaptoflux_instance

model = load_titanic_for_adaptoflux(train_processed_path='examples/kaggle/titanic/output/test_processed.csv',
                                    methods_path='examples/kaggle/titanic/methods.py')

model.add_collapse_method(collapse_sum_positive)

model.load_model(folder='models/best')

pred = model.infer_with_graph(model.values)

# Generate corresponding PassengerId starting from 892
passenger_ids = range(892, 892 + len(pred))

# Build DataFrame
submission = pd.DataFrame({
    'PassengerId': passenger_ids,
    'Survived': pred
})

submission.to_csv('/kaggle/working/submission.csv', index=False)

print("✅ Submission file generated: /kaggle/working/submission.csv")

print("📁 Files in /kaggle/working:")
print(os.listdir("/kaggle/working"))

display(submission.head())

## 🙏 Acknowledgments

Thanks to Kaggle for providing the Titanic dataset.

If you like this project, please:
- ⭐ Star the repository
- 💬 Share your thoughts in the comments!