# Iterative Proportional Fitting Code Demo


Code from https://github.com/Dirguis/ipfn (IPFN package for Python).

In [6]:
from ipfn import ipfn
import numpy as np
import pandas as pd

### Input Description

Variable      | Description   | Type
------------- | ------------- | -------------
Original  | Matrix on which to perform IPF, initialize all cells to 1  | Numpy matrix (??? dimensions ???)
Marginals  | Target sums when aggreating along specified axis (multiple axes???) | List of Numpy arrays
Dimensions  | Axes along which to sum to get marginals | List of lists of integers

### Import Census Data

** Look over all columns: not sure if these are correct!!!!!!!

In [26]:
census_data_pd = pd.read_excel('planning_database_NYC_tracts.xlsx'

In [154]:
all_columns = census_data_pd.columns
chosen_columns = list(filter(lambda x: ('CEN_2010' in x) & ('pct' not in x) , census_data_pd.columns))
census_data_pd_reduced = census_data_pd[chosen_columns]
census_data = np.array(census_data_pd_reduced)
census_data[np.isnan(census_data)] = 0

### Initialize Original Matrix

In [238]:
# number of variables
v = len(chosen_columns)
# number of tracts
t = len(census_data_pd) - 1

In [239]:
# initialze all matrix cells to 1
original = np.ones((t, v, v))

for tract in range(len(original)):
    for i in range(len(original[0])):
        original[tract, i, i] = 0

### Define Marginals and Corresponding Dimensions

In [240]:
tract_sums = np.array(2 * census_data_pd['Tot_Population_CEN_2010'])

In [241]:
chosen_variable_sums = np.array([census_data[i,:] for i in range(1,len(census_data))])

In [242]:
marginals = [tract_sums, chosen_variable_sums, chosen_variable_sums]

In [243]:
dimensions = [[0], [0, 1], [0, 2]]

[0,1] dimension corresponds to sum of each row, each tract,
m[i,j,:] where i is tract and j is row number
so [t1r1, t1r2, ...],[t2r1, t2r2, ...]...

***ADD sums for columns***

### Run IPF Algorithm

In [244]:
IPF = ipfn(original, marginals, dimensions)

In [245]:
original = IPF.iteration()

ipfn converged: convergence_rate not updating or below rate_tolerance


### Check Sums

In [222]:
count_wrong = 0
count_total = 0
tolerance = 0.1

for tract in range(len(original)):
    for var in range(len(original[0])):
        count_total += 1
        if abs(original[tract, var, :].sum() - chosen_variable_sums[tract, var]) > tolerance:
            count_wrong += 1        
        if abs(original[tract, :, var].sum() - chosen_variable_sums[tract, var]) > tolerance:
            count_wrong += 1

In [223]:
print(count_total)

108400


In [224]:
print(count_wrong)

2138


# only about 1.9% are off

In [246]:
original[0].sum()

42599.3

In [247]:
tract_sums[0]

22182.0

In [258]:
no = 0
tot = 0
for tract in range(len(original)):
    for var in range(len(original[0])):
        tot += 1
        if original[tract, var, var] != 0.0:
            no += 1

In [259]:
no

50

In [260]:
tot

108400

# PROBLEMS!!!!!