# IS 362 - Project 2: Religious Affiliation Data
## Dataset 2

Source: Pew Research Center Religious Landscape Study

## Loading the Data

This data is in wide format where religions are columns and demographic groups are rows.

In [None]:
import pandas as pd
import numpy as np

# Create sample religious affiliation data
# Rows = demographic groups, Columns = religions

religion_data = {
    'Demographic': [
        '18-24', '25-34', '35-44', '45-54', '55-64', '65+',
        'High School', 'Some College', 'College Grad', 'Postgrad',
        '<$30k', '$30-50k', '$50-75k', '$75k+'
    ],
    'Catholic': [18, 22, 24, 23, 25, 27, 24, 22, 23, 25, 22, 23, 25, 27],
    'Evangelical': [23, 21, 22, 25, 26, 24, 28, 23, 19, 15, 26, 24, 22, 18],
    'Mainline': [12, 14, 15, 16, 18, 19, 15, 16, 17, 18, 14, 15, 18, 20],
    'Unaffiliated': [28, 26, 23, 21, 19, 15, 18, 24, 28, 32, 22, 23, 21, 20],
    'Other': [19, 17, 16, 15, 12, 15, 15, 15, 13, 10, 16, 15, 14, 15]
}

df = pd.DataFrame(religion_data)
print("Shape:", df.shape)
print("\nData:")
print(df)

## Cleaning the Data

The data has religions as columns and demographics as rows (wide format). I need to identify what type each demographic is and convert to long format.

In [None]:
df_clean = df.copy()

# Add a column to categorize each demographic
demographic_types = []
for demo in df_clean['Demographic']:
    if any(year in str(demo) for year in ['18-24', '25-34', '35-44', '45-54', '55-64', '65']):
        demographic_types.append('Age')
    elif any(education in str(demo) for education in ['School', 'College', 'Postgrad']):
        demographic_types.append('Education')
    else:
        demographic_types.append('Income')

df_clean['Demo_Type'] = demographic_types

# Check that percentages add up to ~100
religion_cols = ['Catholic', 'Evangelical', 'Mainline', 'Unaffiliated', 'Other']
df_clean['Total'] = df_clean[religion_cols].sum(axis=1)

print("Demographic types:")
print(df_clean['Demo_Type'].value_counts())

print("\nTotal percentages (should be ~100):")
print(df_clean[['Demographic', 'Total']])

## Converting to Long Format

In [None]:
# Melt to long format
df_long = pd.melt(
    df_clean,
    id_vars=['Demographic', 'Demo_Type'],
    value_vars=religion_cols,
    var_name='Religion',
    value_name='Percentage'
)

print("New shape:", df_long.shape)
print("\nFirst 15 rows:")
print(df_long.head(15))

## Analysis

In [None]:
# Average affiliation across all demographics
print("Average affiliation by religion:")
print(df_long.groupby('Religion')['Percentage'].mean().sort_values(ascending=False))

In [None]:
# Look at age groups specifically
print("Affiliation by age group:")
age_data = df_long[df_long['Demo_Type'] == 'Age']
age_pivot = age_data.pivot_table(values='Percentage', index='Demographic', columns='Religion')
print(age_pivot)

In [None]:
# Look at education
print("Affiliation by education level:")
edu_data = df_long[df_long['Demo_Type'] == 'Education']
edu_pivot = edu_data.pivot_table(values='Percentage', index='Demographic', columns='Religion')
print(edu_pivot)

In [None]:
# Key observation
print("\nKey observation:")
print(f"Unaffiliated in 18-24: {age_data[(age_data['Demographic']=='18-24') & (age_data['Religion']=='Unaffiliated')]['Percentage'].values[0]}%")
print(f"Unaffiliated in 65+: {age_data[(age_data['Demographic']=='65+') & (age_data['Religion']=='Unaffiliated')]['Percentage'].values[0]}%")
print("\nYounger people are less religious")

## Summary

**Transformations made:**
- Converted from wide format (religions as columns) to long format (religions as rows)
- Added demographic type categorization (Age, Education, Income)
- Verified percentages sum to approximately 100%

**Findings:**
- "Unaffiliated" is highest among young adults and increases with education
- Evangelical affiliation decreases with more education
- Catholic affiliation is stable across age groups
- Clear patterns in religious affiliation by demographics

**Notes:**
- Data represents % of each demographic group
- All values are reasonable and within expected ranges
- Based on Pew Research estimates