# Structure Learning with GES

This notebook demonstrates running GES (Greedy Equivalence Search) on League of Legends match data to learn causal structures.


In [None]:
# Imports
import sys
sys.path.insert(0, '..')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from src import config, preprocessing, ges, parameters, queries, visualize

%matplotlib inline
%load_ext autoreload
%autoreload 2

print("Loaded modules successfully!")


## 1. Preprocess Data for a Specific Rank

Let's start with Diamond rank as an example.


In [None]:
# Choose rank to analyze
RANK = "Diamond"

# Preprocess data (use small sample for quick demo)
print(f"Preprocessing data for {RANK}...")
data = preprocessing.preprocess_for_rank(RANK, sample_size=5000)

print(f"\nDataset shape: {data.shape}")
print(f"Variables: {list(data.columns)}")
print(f"\nFirst few rows:")
data.head()


## 2. Run GES Structure Learning


In [None]:
# Run GES algorithm
print("Running GES structure learning...")
print("This may take a minute...")

result = ges.fit_ges(data, use_constraints=True)

print(f"\n✓ Structure learning complete!")
print(f"  - Number of variables: {len(result['variables'])}")
print(f"  - Number of edges: {result['n_edges']}")
print(f"\nLearned edges:")
for from_var, to_var, edge_type in result['edges']:
    symbol = "→" if edge_type == "directed" else "—"
    print(f"  {from_var} {symbol} {to_var}")


## 3. Visualize the Learned Structure


In [None]:
# Visualize the CPDAG
fig = visualize.plot_cpdag(
    result['edges'],
    result['variables'],
    title=f"Learned CPDAG - {RANK} ({result['n_edges']} edges)",
    layout="hierarchical"
)
plt.show()


## 4. Learn Parameters (CPTs)


In [None]:
# Learn CPTs from data
print("Learning conditional probability tables...")

model = parameters.learn_parameters_from_ges(result, data)

print(f"✓ Parameter learning complete!")
print(f"  - Nodes: {len(model.nodes())}")
print(f"  - Edges: {len(model.edges())}")
print(f"  - CPTs: {len(model.get_cpds())}")

# Validate CPTs
validation = parameters.validate_cpts(model)
if validation["valid"]:
    print("  - ✓ CPTs are valid")
else:
    print(f"  - ⚠ Validation warnings: {validation['warnings']}")


## 5. Inspect CPTs


In [None]:
# Show CPT for Win variable (most interesting)
win_cpd = model.get_cpds('Win')
print("CPT for Win variable:")
print(win_cpd)


## 6. Run Probabilistic Queries


In [None]:
# Run example queries
print("Running example queries...\n")

query_results = queries.run_example_queries(model, RANK)


In [None]:
# Visualize query results
fig, ax = plt.subplots(figsize=(10, 6))

query_results['Query_Short'] = query_results['Query'].str[:30] + '...'
colors = ['green' if p > 0.6 else 'orange' if p > 0.4 else 'red' 
          for p in query_results['P(Win=1)']]

ax.barh(query_results['Query_Short'], query_results['P(Win=1)'], color=colors, alpha=0.7)
ax.set_xlabel('P(Win=1 | Evidence)', fontsize=12)
ax.set_ylabel('Query', fontsize=12)
ax.set_title(f'Win Probability for Different Game States - {RANK}', 
             fontsize=14, fontweight='bold')
ax.set_xlim([0, 1])
ax.axvline(x=0.5, color='black', linestyle='--', linewidth=1, alpha=0.5)
ax.grid(axis='x', alpha=0.3)

plt.tight_layout()
plt.show()


## 7. Custom Queries


In [None]:
# Ask custom questions
custom_queries = [
    {"name": "First Blood only", "evidence": {"FB": 1}},
    {"name": "First Tower only", "evidence": {"FT": 1}},
    {"name": "FB + FT", "evidence": {"FB": 1, "FT": 1}},
    {"name": "High gold @20", "evidence": {"Gold20": "high"}},
    {"name": "Low gold @20", "evidence": {"Gold20": "low"}},
    {"name": "Baron + High Gold", "evidence": {"Baron": "1", "Gold20": "high"}},
    {"name": "Multiple Drakes", "evidence": {"Drakes": "3"}},
]

custom_results = queries.query_multiple(custom_queries, model)
print("\nCustom Query Results:")
print(custom_results.to_string(index=False))


## 8. Analyze Variable Influence on Win

Which variables have the strongest impact on winning?


In [None]:
# Analyze variable influence
influence = queries.analyze_variable_influence("Win", model, data)

print("\nTop 10 Most Influential Variable States:")
print(influence.head(10).to_string(index=False))

print("\nBottom 10 (Most Negative Impact):")
print(influence.tail(10).to_string(index=False))


In [None]:
# Visualize influence
top_influences = pd.concat([influence.head(10), influence.tail(10)])

fig, ax = plt.subplots(figsize=(10, 8))

colors = ['green' if x > 0 else 'red' for x in top_influences['Influence']]
y_labels = [f"{row['Variable']}={row['Value']}" for _, row in top_influences.iterrows()]

ax.barh(y_labels, top_influences['Influence'], color=colors, alpha=0.7)
ax.set_xlabel('Influence on P(Win=1)', fontsize=12)
ax.set_ylabel('Variable State', fontsize=12)
ax.set_title(f'Variable Influence on Win Probability - {RANK}', 
             fontsize=14, fontweight='bold')
ax.axvline(x=0, color='black', linestyle='-', linewidth=1)
ax.grid(axis='x', alpha=0.3)

plt.tight_layout()
plt.show()


## Conclusion

Successfully demonstrated:
1. ✓ Data preprocessing and discretization
2. ✓ GES structure learning with domain constraints
3. ✓ Parameter estimation (CPT learning)
4. ✓ Probabilistic inference and queries
5. ✓ Variable influence analysis

Next steps:
- Run for all ranks
- Compare structures across ranks
- Generate comprehensive report

Use the CLI for full pipeline:
```bash
python -m src.cli full --ranks Diamond Platinum Master Elite
```
