In [12]:
import numpy as np

# Example data matrix: rows = communities, columns = indicators
data = np.genfromtxt('entropy_in.csv', delimiter=',')
zips = data[1:][:, 0]
data = data[1:][:,1:]
data

array([[8.1000e-02, 7.0000e-02, 9.3000e-02, 5.2863e+03, 1.3690e-06],
       [4.3000e-02, 2.8000e-01, 1.4300e-01, 3.7720e+02, 1.6660e-06],
       [2.8000e-02, 2.6600e-01, 1.4800e-01, 6.8850e+02, 1.4310e-06],
       [9.9000e-02, 2.1500e-01, 1.3500e-01, 2.4507e+03, 2.4910e-06],
       [7.2000e-02, 2.4400e-01, 1.3100e-01, 2.0558e+03, 2.0280e-06],
       [5.3000e-02, 2.0300e-01, 2.1300e-01, 1.3020e+02, 1.9800e-06],
       [7.3000e-02, 1.9300e-01, 1.9900e-01, 2.7530e+02, 3.0090e-06],
       [8.0000e-02, 1.5400e-01, 2.4700e-01, 6.4400e+01, 3.8500e-06],
       [1.4100e-01, 1.3600e-01, 1.5100e-01, 4.6295e+03, 1.9000e-06],
       [3.1400e-01, 1.2400e-01, 1.5800e-01, 3.0474e+03, 2.4380e-06],
       [4.1500e-01, 2.3600e-01, 1.8200e-01, 2.0991e+03, 1.7340e-06],
       [2.8600e-01, 2.1700e-01, 1.4400e-01, 2.6159e+03, 1.3260e-06],
       [2.8000e-01, 3.0600e-01, 1.3800e-01, 2.4358e+03, 4.9000e-06],
       [3.1600e-01, 2.4600e-01, 1.9200e-01, 7.3290e+02, 2.5380e-06],
       [2.3500e-01, 2.2000e-01, 1.

In [13]:
# Step 1: Normalize using min-max normalization for each column
min_vals = data.min(axis=0)
max_vals = data.max(axis=0)
norm_data = (data - min_vals) / (max_vals - min_vals)

# Step 2: Calculate proportions p_ij
# Sum each column (for each factor)
col_sums = norm_data.sum(axis=0)
p = norm_data / col_sums

# Replace zeros (if any) to avoid log(0)
p[p == 0] = 1e-12

# Step 3: Compute entropy for each factor
n = data.shape[0]
k = 1 / np.log(n)
entropy = -k * (p * np.log(p)).sum(axis=0)

# Step 4: Diversification degree
diversity = 1 - entropy

# Step 5: Compute weights
weights = diversity / diversity.sum()

# Display the weights for each factor
factors = ["% of population below poverty line","% of population below 18","% of population above 65","Population dens","% pregnant"]
for factor, weight in zip(factors, weights):
    print(f"{factor}: {weight:.3f}")

% of population below poverty line: 0.271
% of population below 18: 0.074
% of population above 65: 0.150
Population dens: 0.195
% pregnant: 0.310


In [19]:
vulnerabilities = []
for i in norm_data:
    sum =0
    for j in range(5):
        sum += weights[j]*i[j]
    vulnerabilities.append(sum)


In [20]:
import pandas as pd

# Convert to DataFrame
df = pd.DataFrame({'Zip Codes': zips, 'Vulnerability Score': vulnerabilities})

# Save to CSV
df.to_csv('entropy_out.csv', index=False)

print(df)

    Zip Codes  Vulnerability Score
0     38103.0             0.255413
1     38002.0             0.175345
2     38017.0             0.157359
3     38016.0             0.333220
4     38018.0             0.267101
5     38028.0             0.232792
6     38060.0             0.325518
7     38066.0             0.421641
8     38104.0             0.378861
9     38105.0             0.474253
10    38106.0             0.491216
11    38107.0             0.360556
12    38108.0             0.680754
13    38109.0             0.460179
14    38111.0             0.522919
15    38112.0             0.555350
16    38117.0             0.436591
17    38125.0             0.219504
18    38126.0             0.508965
19    38127.0             0.481831
20    38128.0             0.386392
21    38133.0             0.265998
22    38134.0             0.334403
23    38135.0             0.253134
24    38138.0             0.598084
25    38139.0             0.257301
26    38141.0             0.316365
