# Association Network Analysis

This notebook analyzes the network of associations and members.

In [None]:
import sys
sys.path.insert(0, '../src')

import sqlite3
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_style('whitegrid')

## Load Data from Database

In [None]:
# Connect to database
db_path = "../data/processed/associations.sqlite"
conn = sqlite3.connect(db_path)

# Load associations
associations_df = pd.read_sql("SELECT * FROM associations", conn)
print(f"Loaded {len(associations_df)} associations")

# Load members
members_df = pd.read_sql("SELECT * FROM members", conn)
print(f"Loaded {len(members_df)} members")

conn.close()

## Association Types Distribution

In [None]:
# Plot distribution of association types
type_counts = associations_df['association_type'].value_counts()

plt.figure(figsize=(12, 6))
type_counts.plot(kind='bar')
plt.title('Distribution of Association Types')
plt.xlabel('Association Type')
plt.ylabel('Count')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

## Geographic Distribution

In [None]:
# Analyze geographic distribution
city_counts = associations_df['city'].value_counts().head(20)

plt.figure(figsize=(12, 6))
city_counts.plot(kind='barh')
plt.title('Top 20 Cities by Association Count')
plt.xlabel('Count')
plt.ylabel('City')
plt.tight_layout()
plt.show()

## Member Role Analysis

In [None]:
# Analyze distribution of member roles
role_counts = members_df['role'].value_counts().head(15)

plt.figure(figsize=(12, 6))
role_counts.plot(kind='bar')
plt.title('Top 15 Member Roles')
plt.xlabel('Role')
plt.ylabel('Count')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

## Network Analysis

In [None]:
# Build member-association network
# Identify members in multiple associations
# Find central/influential members

# Members with multiple associations
member_counts = members_df['full_name'].value_counts()
multi_association_members = member_counts[member_counts > 1]

print(f"Members in multiple associations: {len(multi_association_members)}")
print("\nTop 10:")
print(multi_association_members.head(10))

## Temporal Analysis

In [None]:
# Analyze associations over time
year_counts = associations_df.groupby('year').size()

plt.figure(figsize=(12, 6))
year_counts.plot(kind='line', marker='o')
plt.title('Associations by Year')
plt.xlabel('Year')
plt.ylabel('Count')
plt.grid(True)
plt.tight_layout()
plt.show()