# description will go here

In [1]:
import pandas as pd
import numpy as np
import math
#import glob

In [None]:
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt

from shapely.geometry import Point, Polygon
import geopandas as gpd

In [2]:
#pull in the 2018 to 2022 monthly zip code files
build = pd.DataFrame()

df = pd.DataFrame()
for y in range(2018,2024):
    build = pd.read_csv(f'../data/usps/Y{y}.csv')
    df = pd.concat([df,build])

In [5]:
df.head()

Unnamed: 0,YYYYMM,ZIPCODE,CITY,STATE,TOTAL FROM ZIP,TOTAL BUSINESS,TOTAL FAMILY,TOTAL INDIVIDUAL,TOTAL PERM,TOTAL TEMP,TOTAL TO ZIP,TOTAL BUSINESS.1,TOTAL FAMILY.1,TOTAL INDIVIDUAL.1,TOTAL PERM.1,TOTAL TEMP.1,zip,year
0,201812,"=""00601""",ADJUNTAS,PR,0,0,0,0,0,0,15,0,0,12,13,0,601,2018
1,201812,"=""00602""",AGUADA,PR,16,0,0,0,15,0,30,0,12,18,26,0,602,2018
2,201812,"=""00603""",AGUADILLA,PR,20,0,0,13,15,0,45,0,14,30,34,11,603,2018
3,201812,"=""00605""",AGUADILLA,PR,0,0,0,0,0,0,11,0,0,0,0,0,605,2018
4,201812,"=""00610""",ANASCO,PR,0,0,0,0,0,0,28,0,0,19,17,11,610,2018


In [3]:
#convert text zip column to int zips
df['zip'] = df.ZIPCODE.str[2:7]
df.zip = df.zip.astype(int)

#create a year column
df['year'] = df.YYYYMM.astype(str).str[:4]

In [4]:
#read in nyc zips, convert to list
zips = pd.read_csv('../data/geo/phx_zip-cousub.csv')
phx_zips = zips.zip.unique().tolist()

Unnamed: 0,zip,county_sub,usps_zip_pref_city,usps_zip_pref_state,res_ratio,bus_ratio,oth_ratio,tot_ratio
0,85013,401392601,PHOENIX,AZ,1.0,1.0,1.0,1.0
1,85060,401392601,PHOENIX,AZ,1.0,1.0,1.0,1.0
2,85050,401390867,PHOENIX,AZ,0.06948,0.009043,0.031621,0.06405
3,85050,401392601,PHOENIX,AZ,0.93052,0.990957,0.968379,0.93595
4,85069,401392601,PHOENIX,AZ,1.0,1.0,1.0,1.0


In [9]:
#table with NYC zips only
dff = df[df['zip'].isin(phx_zips)].copy()

In [10]:
#clean up & rename columns
dff.columns = ['YYYYMM','ZIPCODE','CITY','STATE',\
               'o_tot','o_bus','o_fam','o_ind','o_per','o_tem',\
               'i_tot','i_bus','i_fam','i_ind','i_per','i_tem',\
               'zip','year']

In [11]:
#calculate net change
cols = ['tot','bus','fam','ind','tem','per']
for col in cols:
    dff[f'n_{col}'] = dff[f'i_{col}'] - dff[f'o_{col}']

In [12]:
#drop extra stuff & reorder columns for zip id & date to be first
clean = dff[['zip','year','YYYYMM','o_tot','o_bus','o_fam','o_ind','o_per','o_tem',\
               'i_tot','i_bus','i_fam','i_ind','i_per','i_tem',\
            'n_tot','n_bus','n_fam','n_ind','n_per','n_tem']].copy()

In [24]:
clean.head()

Unnamed: 0,zip,year,YYYYMM,o_tot,o_bus,o_fam,o_ind,o_per,o_tem,i_tot,...,i_fam,i_ind,i_per,i_tem,n_tot,n_bus,n_fam,n_ind,n_per,n_tem
20059,85001,2018,201812,21,0,0,12,19,0,25,...,0,17,23,0,4,0,0,5,4,0
20060,85003,2018,201812,116,0,16,90,110,0,127,...,34,88,116,11,11,0,18,-2,6,11
20061,85004,2018,201812,150,15,26,109,138,12,168,...,17,131,152,16,18,5,-9,22,14,4
20062,85005,2018,201812,0,0,0,0,0,0,36,...,21,14,35,0,36,0,21,14,35,0
20063,85006,2018,201812,205,0,42,158,194,11,182,...,33,146,168,14,-23,0,-9,-12,-26,3


##### make lists for the in, out, and net mig columns to build separate tables

In [17]:
n_cols,i_cols,o_cols = cols,cols,cols
col_list = [n_cols,i_cols,o_cols]

n_cols = ['n_' + sub for sub in n_cols]
i_cols = ['i_' + sub for sub in i_cols]
o_cols = ['o_' + sub for sub in o_cols]

#### pivot for net, in, and out

In [18]:
net = pd.pivot_table(clean,values=n_cols,index='zip',
                            columns='YYYYMM',aggfunc=np.sum,fill_value=0,margins=True).reset_index()

In [19]:
in_mig = pd.pivot_table(clean,values=i_cols,index='zip',
                            columns='YYYYMM',aggfunc=np.sum,fill_value=0,margins=True).reset_index()

In [20]:
out_mig = pd.pivot_table(clean,values=o_cols,index='zip',
                            columns='YYYYMM',aggfunc=np.sum,fill_value=0,margins=True).reset_index()

In [22]:
with pd.ExcelWriter(f'output/usps_in-out-net.xlsx') as writer:
    net.to_excel(writer, sheet_name="net")
    in_mig.to_excel(writer, sheet_name="in")
    out_mig.to_excel(writer, sheet_name="out")

# visualization

In [26]:
clean.dtypes

zip        int64
year      object
YYYYMM     int64
o_tot      int64
o_bus      int64
o_fam      int64
o_ind      int64
o_per      int64
o_tem      int64
i_tot      int64
i_bus      int64
i_fam      int64
i_ind      int64
i_per      int64
i_tem      int64
n_tot      int64
n_bus      int64
n_fam      int64
n_ind      int64
n_per      int64
n_tem      int64
dtype: object

In [None]:
ann = 