# Data Visualization 5: Socioeconomic Correlation Explorer

### Type: Scatter Plot Matrix

### Tool: Altair

### Interactivity: Select different variables

### Customization: Colored points by district

### Description: Shows correlations (e.g., crime vs. income/education/pop density) and lets user explore relationships.

In [2]:
import pandas as pd
import altair as alt

# Simulated socioeconomic data by Boston district
socioeconomic_data = pd.DataFrame({
    'DISTRICT': ['A1', 'A7', 'B2', 'B3', 'C6', 'C11', 'D4', 'D14', 'E5', 'E13', 'E18'],
    'MEDIAN_INCOME': [75000, 62000, 41000, 43000, 56000, 49000, 70000, 67000, 72000, 60000, 58000],
    'EDUCATION_RATE': [0.91, 0.87, 0.68, 0.72, 0.84, 0.77, 0.90, 0.88, 0.93, 0.86, 0.82],
    'POP_DENSITY': [10500, 9800, 12500, 11200, 9700, 10800, 10200, 9600, 8800, 9500, 9100]
})

# Load crime data from all years
paths = {
    "2019": "2019.csv",
    "2020": "2020.csv",
    "2021": "2021.csv",
    "2022": "crime_reports_2022.csv",
    "2023": "2023_to_present.csv"
}
dfs = [pd.read_csv(path, low_memory=False)[['DISTRICT']] for path in paths.values()]
crime_df = pd.concat(dfs, ignore_index=True).dropna(subset=['DISTRICT'])

# Aggregate crime counts per district
crime_by_district = crime_df.groupby('DISTRICT').size().reset_index(name='CRIME_COUNT')

# Merge crime and socioeconomic data
merged_data = pd.merge(socioeconomic_data, crime_by_district, on='DISTRICT', how='inner')

# Melt for dynamic Altair selection
melted = merged_data.melt(
    id_vars=['DISTRICT', 'CRIME_COUNT'],
    value_vars=['MEDIAN_INCOME', 'EDUCATION_RATE', 'POP_DENSITY'],
    var_name='SOCIO_VAR',
    value_name='VALUE'
)

# Interactive selector for variable
selector = alt.binding_select(options=['MEDIAN_INCOME', 'EDUCATION_RATE', 'POP_DENSITY'], name='Variable: ')
selection = alt.selection_point(fields=['SOCIO_VAR'], bind=selector, value='MEDIAN_INCOME')

# Altair scatter plot
chart = alt.Chart(melted).add_params(
    selection
).transform_filter(
    selection
).mark_circle(size=100).encode(
    x=alt.X('VALUE:Q', title='Socioeconomic Variable'),
    y=alt.Y('CRIME_COUNT:Q', title='Crime Count'),
    color=alt.Color('DISTRICT:N'),
    tooltip=['DISTRICT', 'VALUE', 'CRIME_COUNT']
).properties(
    width=700,
    height=400,
    title='Crime vs. Socioeconomic Indicators by District (Interactive)'
)

chart.display()