In [1]:
##############################################################################################################################
##############################################################################################################################
##PYTHON CODE (notebook, .ipynb file using https://jupyter.org/ for https://www.python.org/) 
##     TO APPLY THE ipfn PACKAGE FOR PYTHON (https://github.com/Dirguis/ipfn, LARGELY FOLLOWING ITS README FILE) 
##     AND COMPARE TO RESULTS FROM R CODE AT https://github.com/AppliedDemogToolbox/IPF_R
##
##JANUARY 2023
##edyhsgr@gmail.com
##
##THERE IS NO WARRANTY FOR THIS CODE
##
##ADDITIONAL INFORMATION ON ITERATIVE PROPORTIONAL FITTING (IPF): https://edyhsgr.github.io/datafitting.html
##
##USEFUL PYTHON INFO BY UC DAVIS PROF NORM MATLOFF: https://web.cs.ucdavis.edu/~matloff/matloff/public_html/python.html 
##AND YOUTUBER PROGRAMMING WITH MOSH: https://www.youtube.com/watch?v=kqtD5dpn9C8&list=PLTjRvDozrdlxj5wgH4qkvwSOdHLOCx10f&index=18
##############################################################################################################################

##############################################################################################################################
#2D IPF, STEP 1: Read in row, column, and seed data, along with results from R code to compare against
##############################################################################################################################

import numpy as np
import pandas as pd
from ipfn import ipfn

row2D = pd.read_csv("https://raw.githubusercontent.com/AppliedDemogToolbox/IPF_R/master/IPF/HowTo/PracticeData/2DExample/Row.csv", header=None)
col2D = pd.read_csv("https://raw.githubusercontent.com/AppliedDemogToolbox/IPF_R/master/IPF/HowTo/PracticeData/2DExample/Col.csv", header=None)
seed2D = pd.read_csv("https://raw.githubusercontent.com/AppliedDemogToolbox/IPF_R/master/IPF/HowTo/PracticeData/2DExample/Seed.csv", header=None)
output2D_Comparison = pd.read_csv("https://raw.githubusercontent.com/AppliedDemogToolbox/IPF_R/master/IPF/HowTo/PracticeData/2DExample/Output.csv")


In [2]:
##############################################################################################################################
#2D IPF, STEP 2: Set seed (m, the starting values), row aggregate (xip, where i is the row, p is summed dimension), 
#  and column aggregate (xpj, where j is the column, p is summed dimension), 
#  and dimensions, and apply the ipfn function
##############################################################################################################################

m = np.array(seed2D)
xip = np.array(row2D)
xpj = np.array(col2D)

aggregates = [xip, xpj]
dimensions = [[0], [1]]

IPF = ipfn.ipfn(m, aggregates, dimensions, convergence_rate=1e-6)
m = IPF.iteration()

output2D_ipynb = pd.DataFrame(m)

display(output2D_ipynb)  #To view results
display(output2D_Comparison)  #To view results

#output2D_ipynb.to_csv("output3D_ipynb.csv")  #To save results
#output2D_Comparison.to_csv("output3D_Comparison.csv")  #To save results


Unnamed: 0,0,1,2,3,4,5
0,937.173699,980.586385,49.220812,671.373671,8.663102,65.981764
1,2609.428672,1012.668541,193.473893,1330.301021,34.560587,147.567058
2,203427.935597,22431.385242,15807.924484,15208.004847,2491.861978,13656.907915
3,2393.445161,13471.190489,84.240138,179.818515,14.48583,604.809384
4,588.439092,470.397517,9.599928,5.909833,4.787296,23.866052
5,1654.809172,106.913685,30.490649,33.239281,6.652227,83.895179
6,1171.157842,3361.52498,28.983497,42.62398,13.489935,285.217247
7,65130.031038,6458.872976,4763.552088,1779.501671,246.841298,3781.207492
8,1906.641141,269.871004,10.486392,23.474734,2.852377,104.674448
9,23771.343005,3748.070095,263.548089,1447.062536,113.331348,1942.645982


Unnamed: 0,V1,V2,V3,V4,V5,V6
1,937.17241,980.58911,49.220734,671.372943,8.66309,65.981712
2,2609.426509,1012.671909,193.473695,1330.300304,34.560561,147.567023
3,203427.844184,22431.468345,15807.914254,15208.002423,2491.861011,13656.909783
4,2393.438606,13471.209555,84.239891,179.818075,14.485791,604.808082
5,588.438193,470.398752,9.599911,5.909826,4.787289,23.86603
6,1654.808576,106.91409,30.490632,33.239279,6.652225,83.895198
7,1171.155066,3361.530977,28.983423,42.623892,13.489904,285.216739
8,65130.003135,6458.89704,4763.549105,1779.501424,246.841208,3781.208089
9,1906.640085,269.871975,10.486385,23.474728,2.852376,104.674451
10,23771.329224,3748.083492,263.547884,1447.062116,113.331289,1942.645994


In [3]:
##############################################################################################################################
#3D IPF, STEP 1: Read in row, column, stack, and seed data, along with results from R code to compare against
##############################################################################################################################

rows3D = pd.read_csv("https://raw.githubusercontent.com/AppliedDemogToolbox/IPF_R/master/IPF/HowTo/PracticeData/3DExample/Row.csv", header=None)
cols3D = pd.read_csv("https://raw.githubusercontent.com/AppliedDemogToolbox/IPF_R/master/IPF/HowTo/PracticeData/3DExample/Col.csv", header=None)
stacks3D = pd.read_csv("https://raw.githubusercontent.com/AppliedDemogToolbox/IPF_R/master/IPF/HowTo/PracticeData/3DExample/Stack.csv", header=None)
seed3D = pd.read_csv("https://raw.githubusercontent.com/AppliedDemogToolbox/IPF_R/master/IPF/HowTo/PracticeData/3DExample/Seed.csv", header=None)
output3D_Comparison = pd.read_csv("https://raw.githubusercontent.com/AppliedDemogToolbox/IPF_R/master/IPF/HowTo/PracticeData/3DExample/Output.csv", usecols=["V1", "V2", "V3", "V4", "V5"])


In [6]:
##############################################################################################################################
#3D IPF, STEP 2: Set and shape seed (m, the starting values), rows aggregate (xijp), 
#  columns aggregate (xpjk), stacks aggregate (xipk), along with their sums (xipp, xpjp, xppk),  
#  and dimensions, and apply the ipfn function
##############################################################################################################################

m = seed3D
#m[m==0] = .001 #Option to "tweak" zeroes in the seed, doesn't appear needed with this package
m = np.array(m)
m = np.reshape(m, (rows3D.shape[0],rows3D.shape[1],cols3D.shape[1]))

#display(m)

xijp = rows3D
xipk = cols3D
xpjk = stacks3D

#display(xijp) #or np.shape(xijp) to just get dimensions
#display(xpjk)
#display(xipk)

xipp = np.sum(xijp, axis=1) #axis refers to the array dimension, starting with 0
xpjp = np.sum(xijp, axis=0)
xppk = np.sum(xpjk, axis=0)

#display(xipp)
#display(xpjp)
#display(xppk)

aggregates = [xipp, xpjp, xppk, xijp, xipk, xpjk]
dimensions = [[0], [1], [2], [0, 1], [0, 2], [1, 2]]

IPF = ipfn.ipfn(m, aggregates, dimensions, convergence_rate=1e-6)
m = IPF.iteration()

#display(m)

outputdataframelength = rows3D.shape[0]*rows3D.shape[1]
outputdataframewidth = cols3D.shape[1]
output3D_ipynb = pd.DataFrame(np.reshape(m,(outputdataframelength,outputdataframewidth)))

display(output3D_ipynb)
display(output3D_Comparison)

#output3D_ipynb.to_csv("output3D_ipynb.csv")
#output3D_Comparison.to_csv("output3D_Comparison.csv")


Unnamed: 0,0,1,2,3,4
0,21.931495,63.971734,0.000000,5.096771,0.000000
1,28.940731,53.110688,0.000000,1.948580,0.000000
2,26.607397,83.211942,1.063359,3.117302,0.000000
3,30.795816,98.503218,0.000000,13.700966,0.000000
4,55.092509,90.823005,4.446720,64.685325,1.952442
...,...,...,...,...,...
508,41.221098,104.778903,0.000000,0.000000,0.000000
509,15.845826,94.154177,0.000000,0.000000,0.000000
510,14.222594,47.777406,0.000000,0.000000,0.000000
511,7.516897,23.483114,0.000000,0.000000,0.000000


Unnamed: 0,V1,V2,V3,V4,V5
0,21.930848,63.970527,0.001093,5.096530,0.001002
1,28.939749,53.109389,0.001138,1.948482,0.001242
2,26.607058,83.211590,1.063124,3.117211,0.001017
3,30.795242,98.502246,0.001093,13.700500,0.000919
4,55.093497,90.825462,4.445856,64.685340,1.949846
...,...,...,...,...,...
508,41.219480,104.777222,0.001006,0.000970,0.001321
509,15.844753,94.151928,0.001059,0.000982,0.001281
510,14.220860,47.775599,0.001210,0.001087,0.001245
511,7.514841,23.481747,0.000995,0.000950,0.001466
