# Example 4: Environmental Health Connections

This notebook demonstrates:
1. Finding chemical exposures in specific locations
2. Identifying genes affected by chemicals (PrimeKG)
3. Comparing chemical-induced gene changes to space biology
4. Discovering similar biological signatures

In [None]:
from mcp_space_life_sciences import IntegratedKGClient
import matplotlib.pyplot as plt
import pandas as pd

client = IntegratedKGClient()

## Step 1: Query Environmental Chemical Exposures

In [None]:
# Get chemical exposures in California
ca_exposures = client.get_chemical_exposures_by_location(
    location="California",
    media="air"  # Focus on air pollution
)

df_exposures = pd.DataFrame(ca_exposures)
print(f"Found {len(df_exposures)} chemical exposures in California")
print("\nTop chemicals by concentration:")
print(df_exposures.nlargest(10, 'value')[['chemical_name', 'value', 'unit', 'year']])

## Step 2: Find Genes Affected by Top Chemicals

In [None]:
# Get top 5 chemicals
top_chemicals = df_exposures.nlargest(5, 'value')['chemical_name'].tolist()

# For each chemical, find genes it affects
chemical_gene_effects = {}
for chemical in top_chemicals:
    # Find genes upregulated by chemical
    upregulated = client.find_genes_upregulated_by_compound(
        compound_name=chemical
    )
    
    # Find genes downregulated by chemical
    downregulated = client.find_genes_downregulated_by_compound(
        compound_name=chemical
    )
    
    chemical_gene_effects[chemical] = {
        'upregulated': upregulated,
        'downregulated': downregulated
    }

# Print summary
for chemical, effects in chemical_gene_effects.items():
    print(f"\n{chemical}:")
    print(f"  Upregulated genes: {len(effects['upregulated'])}")
    print(f"  Downregulated genes: {len(effects['downregulated'])}")
    print(f"  Example upregulated: {effects['upregulated'][:5]}")

## Step 3: Compare to Space Biology Gene Expression

In [None]:
# Get differential expression from a spaceflight experiment
space_de = client.get_genelab_de_genes(
    assay_id="OSD-48-EXAMPLE",  # Example microgravity study
    log2fc_threshold=1.0
)

print(f"Space biology - Upregulated: {len(space_de['upregulated'])}")
print(f"Space biology - Downregulated: {len(space_de['downregulated'])}")

## Step 4: Find Overlapping Gene Signatures

In [None]:
# For each chemical, find overlap with space biology
overlaps = {}
for chemical, effects in chemical_gene_effects.items():
    # Find genes upregulated in both
    up_overlap = set(effects['upregulated']) & set(space_de['upregulated'])
    
    # Find genes downregulated in both
    down_overlap = set(effects['downregulated']) & set(space_de['downregulated'])
    
    overlaps[chemical] = {
        'upregulated_overlap': list(up_overlap),
        'downregulated_overlap': list(down_overlap),
        'total_overlap': len(up_overlap) + len(down_overlap)
    }

# Print chemicals with highest overlap
print("\nChemicals with similar gene expression to spaceflight:")
for chemical in sorted(overlaps.keys(), 
                      key=lambda x: overlaps[x]['total_overlap'], 
                      reverse=True):
    overlap = overlaps[chemical]
    print(f"\n{chemical}: {overlap['total_overlap']} overlapping genes")
    if overlap['upregulated_overlap']:
        print(f"  Common upregulated: {overlap['upregulated_overlap'][:5]}")

## Step 5: Analyze Biological Pathways

In [None]:
# Find what biological pathways are affected by overlapping genes
# Use the chemical with highest overlap
top_chemical = max(overlaps.keys(), key=lambda x: overlaps[x]['total_overlap'])
overlap_genes = (overlaps[top_chemical]['upregulated_overlap'] + 
                overlaps[top_chemical]['downregulated_overlap'])

# Find enriched pathways
pathway_enrichment = client.find_shared_pathways(
    gene_names=overlap_genes
)

print(f"\nPathways affected by both {top_chemical} and spaceflight:")
for pathway in pathway_enrichment[:10]:
    print(f"  - {pathway['name']}: {pathway['gene_count']} genes, p={pathway.get('pvalue', 'N/A')}")

## Step 6: Visualize Chemical-Space Biology Connection

In [None]:
# Create Venn diagram of gene overlap
from matplotlib_venn import venn2

fig, ax = plt.subplots(figsize=(10, 8))

venn2(
    [set(chemical_gene_effects[top_chemical]['upregulated']), 
     set(space_de['upregulated'])],
    set_labels=(f'{top_chemical} exposure', 'Spaceflight'),
    ax=ax
)

plt.title(f'Upregulated Genes: {top_chemical} vs Spaceflight')
plt.show()

## Step 7: Health Implications

In [None]:
# Find diseases associated with overlapping genes
diseases = []
for gene in overlap_genes[:20]:  # Check first 20 genes
    gene_diseases = client.find_gene_diseases(
        gene_name=gene
    )
    diseases.extend(gene_diseases)

# Count disease frequency
from collections import Counter
disease_counts = Counter([d['disease_name'] for d in diseases])

print("\nDiseases associated with chemical-spaceflight gene overlap:")
for disease, count in disease_counts.most_common(10):
    print(f"  {disease}: {count} genes")

## Summary

This notebook demonstrated:
- ✅ Querying environmental chemical exposures by location (SPOKE-OKN)
- ✅ Finding genes affected by chemicals (PrimeKG + SPOKE-OKN)
- ✅ Comparing chemical-induced changes to spaceflight effects (GeneLab)
- ✅ Identifying overlapping gene signatures
- ✅ Discovering shared biological pathways
- ✅ Understanding health implications

**Key Insight**: Environmental exposures on Earth can produce similar biological
effects to spaceflight, suggesting potential countermeasures or risk factors.