In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
plt.style.use('ggplot')
plt.rcParams.update({'font.size': 20})

In [2]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

# County Population By Year

### Census data (2021 from Jungu)

In [3]:
new_pop_temp = pd.read_csv('../data/Iowa county population 2021.csv')

In [4]:
new_pop_temp.head()

Unnamed: 0,STNAME,CTYNAME,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
0,Iowa,Adair County,7679,7546,7466,7385,7365,7142,7006,7047,7028,7102,7059,6888
1,Iowa,Adams County,4023,3994,3910,3890,3874,3752,3690,3655,3628,3602,3588,3496
2,Iowa,Allamakee County,14377,14224,14150,14074,14067,13881,13847,13812,13832,13671,13642,13548
3,Iowa,Appanoose County,12861,12853,12713,12659,12678,12587,12515,12367,12467,12525,12430,12335
4,Iowa,Audubon County,6094,6000,5861,5859,5768,5707,5625,5550,5482,5491,5481,5338


In [5]:
new_pop = new_pop_temp.drop(['STNAME'],axis=1)

In [6]:
new_pop.head()

Unnamed: 0,CTYNAME,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
0,Adair County,7679,7546,7466,7385,7365,7142,7006,7047,7028,7102,7059,6888
1,Adams County,4023,3994,3910,3890,3874,3752,3690,3655,3628,3602,3588,3496
2,Allamakee County,14377,14224,14150,14074,14067,13881,13847,13812,13832,13671,13642,13548
3,Appanoose County,12861,12853,12713,12659,12678,12587,12515,12367,12467,12525,12430,12335
4,Audubon County,6094,6000,5861,5859,5768,5707,5625,5550,5482,5491,5481,5338


In [7]:
melt_df = pd.melt(
    new_pop, 
    id_vars=['CTYNAME'], 
    value_vars=('2010','2011','2012','2013','2014','2015',
                '2016','2017','2018','2019','2020','2021'), 
    var_name='Year', 
    value_name='Population', 
    col_level=None
)

In [8]:
melt_df.head()

Unnamed: 0,CTYNAME,Year,Population
0,Adair County,2010,7679
1,Adams County,2010,4023
2,Allamakee County,2010,14377
3,Appanoose County,2010,12861
4,Audubon County,2010,6094


In [9]:
ctemp = melt_df['CTYNAME']
ctemp_split0 = ctemp.str.split(' ', n=- 1, expand=True)
ctemp_split = ctemp_split0.drop(ctemp_split0.columns[[1]], axis=1)
melt_df["County"] = ctemp_split[0]
melt_df["County"] = melt_df["County"].str.upper()

melt_df.loc[melt_df['County']=="BLACK",'County']='BLACK HAWK'
melt_df.loc[melt_df['County']=="BUENA",'County']='BUENA VISTA'
melt_df.loc[melt_df['County']=="CERRO",'County']='CERRO GORDO'
melt_df.loc[melt_df['County']=="DES",'County']='DES MOINES'
melt_df.loc[melt_df['County']=="PALO",'County']='PALO ALTO'
melt_df.loc[melt_df['County']=="VAN",'County']='VAN BUREN'
melt_df['County'] = melt_df['County'].str.lower()

iowa_pop_short = melt_df[['County','Year','Population']]

iowa_pop_short.head()

Unnamed: 0,County,Year,Population
0,adair,2010,7679
1,adams,2010,4023
2,allamakee,2010,14377
3,appanoose,2010,12861
4,audubon,2010,6094


In [10]:
iowa_pop_short['Year'].unique()

array(['2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017',
       '2018', '2019', '2020', '2021'], dtype=object)

# County Income By Year

In [11]:
iowa_income_temp = pd.read_csv('../data/Annual_Personal_Income_for_State_of_Iowa_by_County.csv') 

In [12]:
iowa_income_temp.head()

Unnamed: 0,Row ID,Geography ID,Name,Variable Code,Variable,Value,Variable Unit,Date,Location
0,19001_2020_CAINC1-1_annual,19001,"Adair, IA",CAINC1-1,Personal income,398280,Thousands of dollars,12/31/2020,POINT (-94.478164 41.328528)
1,19003_2020_CAINC1-1_annual,19003,"Adams, IA",CAINC1-1,Personal income,213735,Thousands of dollars,12/31/2020,POINT (-94.696906 41.021656)
2,19005_2020_CAINC1-1_annual,19005,"Allamakee, IA",CAINC1-1,Personal income,697196,Thousands of dollars,12/31/2020,POINT (-91.382751 43.274964)
3,19007_2020_CAINC1-1_annual,19007,"Appanoose, IA",CAINC1-1,Personal income,523611,Thousands of dollars,12/31/2020,POINT (-92.873061 40.744296)
4,19011_2020_CAINC1-1_annual,19011,"Benton, IA",CAINC1-1,Personal income,1455638,Thousands of dollars,12/31/2020,POINT (-92.05763 42.092547)


In [13]:
temp = iowa_income_temp['Date']
temp_split0 = temp.str.split('/', n=- 1, expand=True)
temp_split = temp_split0.drop(temp_split0.columns[[1]], axis=1)

iowa_income_temp["Year"] = temp_split[2]

In [14]:
temp2 = iowa_income_temp['Name']
temp2_split0 = temp2.str.split(',', n=- 1, expand=True)
temp2_split = temp2_split0.drop(temp2_split0.columns[[1]], axis=1)

iowa_income_temp["County"] = temp2_split[0]
iowa_income_temp['County'] = iowa_income_temp['County'].str.upper()

In [15]:
iowa_income_temp.head()

Unnamed: 0,Row ID,Geography ID,Name,Variable Code,Variable,Value,Variable Unit,Date,Location,Year,County
0,19001_2020_CAINC1-1_annual,19001,"Adair, IA",CAINC1-1,Personal income,398280,Thousands of dollars,12/31/2020,POINT (-94.478164 41.328528),2020,ADAIR
1,19003_2020_CAINC1-1_annual,19003,"Adams, IA",CAINC1-1,Personal income,213735,Thousands of dollars,12/31/2020,POINT (-94.696906 41.021656),2020,ADAMS
2,19005_2020_CAINC1-1_annual,19005,"Allamakee, IA",CAINC1-1,Personal income,697196,Thousands of dollars,12/31/2020,POINT (-91.382751 43.274964),2020,ALLAMAKEE
3,19007_2020_CAINC1-1_annual,19007,"Appanoose, IA",CAINC1-1,Personal income,523611,Thousands of dollars,12/31/2020,POINT (-92.873061 40.744296),2020,APPANOOSE
4,19011_2020_CAINC1-1_annual,19011,"Benton, IA",CAINC1-1,Personal income,1455638,Thousands of dollars,12/31/2020,POINT (-92.05763 42.092547),2020,BENTON


In [16]:
iowa_income_temp['Variable'].unique()

array(['Personal income', 'Per capita personal income'], dtype=object)

In [17]:
iowa_income = iowa_income_temp.loc[iowa_income_temp['Variable'] == 'Per capita personal income']

In [18]:
iowa_income = iowa_income.sort_values(by=['County','Year'])

In [19]:
iowa_income.head()

Unnamed: 0,Row ID,Geography ID,Name,Variable Code,Variable,Value,Variable Unit,Date,Location,Year,County
2494,19001_1997_CAINC1-3_annual,19001,"Adair, IA",CAINC1-3,Per capita personal income,21269,Dollars,12/31/1997,POINT (-94.478164 41.328528),1997,ADAIR
2493,19001_1998_CAINC1-3_annual,19001,"Adair, IA",CAINC1-3,Per capita personal income,22118,Dollars,12/31/1998,POINT (-94.478164 41.328528),1998,ADAIR
2492,19001_1999_CAINC1-3_annual,19001,"Adair, IA",CAINC1-3,Per capita personal income,22793,Dollars,12/31/1999,POINT (-94.478164 41.328528),1999,ADAIR
2491,19001_2000_CAINC1-3_annual,19001,"Adair, IA",CAINC1-3,Per capita personal income,25608,Dollars,12/31/2000,POINT (-94.478164 41.328528),2000,ADAIR
2490,19001_2001_CAINC1-3_annual,19001,"Adair, IA",CAINC1-3,Per capita personal income,26047,Dollars,12/31/2001,POINT (-94.478164 41.328528),2001,ADAIR


In [20]:
iowa_income['County'].unique()

array(['ADAIR', 'ADAMS', 'ALLAMAKEE', 'APPANOOSE', 'AUDUBON', 'BENTON',
       'BLACK HAWK', 'BOONE', 'BREMER', 'BUCHANAN', 'BUENA VISTA',
       'BUTLER', 'CALHOUN', 'CARROLL', 'CASS', 'CEDAR', 'CERRO GORDO',
       'CHEROKEE', 'CHICKASAW', 'CLARKE', 'CLAY', 'CLAYTON', 'CLINTON',
       'CRAWFORD', 'DALLAS', 'DAVIS', 'DECATUR', 'DELAWARE', 'DES MOINES',
       'DICKINSON', 'DUBUQUE', 'EMMET', 'FAYETTE', 'FLOYD', 'FRANKLIN',
       'FREMONT', 'GREENE', 'GRUNDY', 'GUTHRIE', 'HAMILTON', 'HANCOCK',
       'HARDIN', 'HARRISON', 'HENRY', 'HOWARD', 'HUMBOLDT', 'IDA', 'IOWA',
       'JACKSON', 'JASPER', 'JEFFERSON', 'JOHNSON', 'JONES', 'KEOKUK',
       'KOSSUTH', 'LEE', 'LINN', 'LOUISA', 'LUCAS', 'LYON', 'MADISON',
       'MAHASKA', 'MARION', 'MARSHALL', 'MILLS', 'MITCHELL', 'MONONA',
       'MONROE', 'MONTGOMERY', 'MUSCATINE', "O'BRIEN", 'OSCEOLA', 'PAGE',
       'PALO ALTO', 'PLYMOUTH', 'POCAHONTAS', 'POLK', 'POTTAWATTAMIE',
       'POWESHIEK', 'RINGGOLD', 'SAC', 'SCOTT', 'SHELBY', 'SIOUX',

In [21]:
#iowa_income.loc[iowa_income['County']=="O'BRIEN",'County']='OBRIEN'

In [22]:
iowa_income['County'].unique()

array(['ADAIR', 'ADAMS', 'ALLAMAKEE', 'APPANOOSE', 'AUDUBON', 'BENTON',
       'BLACK HAWK', 'BOONE', 'BREMER', 'BUCHANAN', 'BUENA VISTA',
       'BUTLER', 'CALHOUN', 'CARROLL', 'CASS', 'CEDAR', 'CERRO GORDO',
       'CHEROKEE', 'CHICKASAW', 'CLARKE', 'CLAY', 'CLAYTON', 'CLINTON',
       'CRAWFORD', 'DALLAS', 'DAVIS', 'DECATUR', 'DELAWARE', 'DES MOINES',
       'DICKINSON', 'DUBUQUE', 'EMMET', 'FAYETTE', 'FLOYD', 'FRANKLIN',
       'FREMONT', 'GREENE', 'GRUNDY', 'GUTHRIE', 'HAMILTON', 'HANCOCK',
       'HARDIN', 'HARRISON', 'HENRY', 'HOWARD', 'HUMBOLDT', 'IDA', 'IOWA',
       'JACKSON', 'JASPER', 'JEFFERSON', 'JOHNSON', 'JONES', 'KEOKUK',
       'KOSSUTH', 'LEE', 'LINN', 'LOUISA', 'LUCAS', 'LYON', 'MADISON',
       'MAHASKA', 'MARION', 'MARSHALL', 'MILLS', 'MITCHELL', 'MONONA',
       'MONROE', 'MONTGOMERY', 'MUSCATINE', "O'BRIEN", 'OSCEOLA', 'PAGE',
       'PALO ALTO', 'PLYMOUTH', 'POCAHONTAS', 'POLK', 'POTTAWATTAMIE',
       'POWESHIEK', 'RINGGOLD', 'SAC', 'SCOTT', 'SHELBY', 'SIOUX',

In [23]:
iowa_income['County'] = iowa_income['County'].str.lower()

In [24]:
iowa_income['Year'].unique()

array(['1997', '1998', '1999', '2000', '2001', '2002', '2003', '2004',
       '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012',
       '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020'],
      dtype=object)

In [25]:
iowa_income.head()

Unnamed: 0,Row ID,Geography ID,Name,Variable Code,Variable,Value,Variable Unit,Date,Location,Year,County
2494,19001_1997_CAINC1-3_annual,19001,"Adair, IA",CAINC1-3,Per capita personal income,21269,Dollars,12/31/1997,POINT (-94.478164 41.328528),1997,adair
2493,19001_1998_CAINC1-3_annual,19001,"Adair, IA",CAINC1-3,Per capita personal income,22118,Dollars,12/31/1998,POINT (-94.478164 41.328528),1998,adair
2492,19001_1999_CAINC1-3_annual,19001,"Adair, IA",CAINC1-3,Per capita personal income,22793,Dollars,12/31/1999,POINT (-94.478164 41.328528),1999,adair
2491,19001_2000_CAINC1-3_annual,19001,"Adair, IA",CAINC1-3,Per capita personal income,25608,Dollars,12/31/2000,POINT (-94.478164 41.328528),2000,adair
2490,19001_2001_CAINC1-3_annual,19001,"Adair, IA",CAINC1-3,Per capita personal income,26047,Dollars,12/31/2001,POINT (-94.478164 41.328528),2001,adair


# Unemployment Data

In [26]:
iowa_employ_temp = pd.read_csv('../data/Iowa_Local_Area_Unemployment_Statistics.csv') 

In [27]:
iowa_employ_temp.head()

Unnamed: 0,State FIPS,AREA TYPE,AREA NAME,YEAR,MONTH,SEASONALLY ADJUSTED,LABORFORCE,EMPLOYMENT,UNEMPLOYMENT,UNEMPLOYMENT RATE,LABFORCE_PART RATE
0,19,State,Iowa,2021,December,0,1660700,1611700,49000,2.9,
1,19,State,Iowa,2021,December,1,1662700,1604800,57900,3.5,66.9
2,19,State,Iowa,2021,November,1,1661300,1599600,61700,3.7,66.8
3,19,State,Iowa,2021,November,0,1666300,1622200,44000,2.6,
4,19,State,Iowa,2021,October,1,1661000,1596200,64800,3.9,66.8


In [28]:
iowa_employ_temp.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 51216 entries, 0 to 51215
Data columns (total 11 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   State FIPS           51216 non-null  int64  
 1   AREA TYPE            51216 non-null  object 
 2   AREA NAME            51216 non-null  object 
 3   YEAR                 51216 non-null  int64  
 4   MONTH                51216 non-null  object 
 5   SEASONALLY ADJUSTED  51216 non-null  int64  
 6   LABORFORCE           51216 non-null  int64  
 7   EMPLOYMENT           51216 non-null  int64  
 8   UNEMPLOYMENT         51216 non-null  int64  
 9   UNEMPLOYMENT RATE    51216 non-null  float64
 10  LABFORCE_PART RATE   264 non-null    float64
dtypes: float64(2), int64(6), object(3)
memory usage: 4.3+ MB


In [29]:
iowa_employ_temp['YEAR'] = iowa_employ_temp['YEAR'].apply(str)

In [30]:
iowa_employ_temp['AREA TYPE'].unique()

array(['State', 'County', 'Cities', 'Congressional District',
       'Metropolitan Statistical Area', 'Micropolitan Statistical Area',
       'Combined Statistical Area', 'IWD Workforce Regions (2020)',
       'Council of Government (COG)', 'Community College District'],
      dtype=object)

In [31]:
iowa_employ_temp_short = iowa_employ_temp.loc[(iowa_employ_temp['AREA TYPE']=='County') & (iowa_employ_temp['MONTH']=='July')]

In [32]:
iowa_employ_temp_short.head()

Unnamed: 0,State FIPS,AREA TYPE,AREA NAME,YEAR,MONTH,SEASONALLY ADJUSTED,LABORFORCE,EMPLOYMENT,UNEMPLOYMENT,UNEMPLOYMENT RATE,LABFORCE_PART RATE
533,19,County,Adair County,2021,July,0,4300,4180,120,2.7,
545,19,County,Adair County,2020,July,0,4050,3880,170,4.2,
557,19,County,Adair County,2019,July,0,4370,4280,100,2.2,
569,19,County,Adair County,2018,July,0,4280,4190,90,2.1,
581,19,County,Adair County,2017,July,0,4280,4180,100,2.4,


In [33]:
iowa_employ_temp_short.rename(columns={'AREA NAME': 'County_0'}, inplace=True)

ctemp = iowa_employ_temp_short['County_0']
ctemp_split0 = ctemp.str.split(' ', n=- 1, expand=True)
ctemp_split = ctemp_split0.drop(ctemp_split0.columns[[1]], axis=1)

iowa_employ_temp_short["County"] = ctemp_split[0]
iowa_employ_temp_short["County"] = iowa_employ_temp_short["County"].str.upper()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iowa_employ_temp_short["County"] = ctemp_split[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iowa_employ_temp_short["County"] = iowa_employ_temp_short["County"].str.upper()


In [34]:
iowa_employ_temp_short.head()

Unnamed: 0,State FIPS,AREA TYPE,County_0,YEAR,MONTH,SEASONALLY ADJUSTED,LABORFORCE,EMPLOYMENT,UNEMPLOYMENT,UNEMPLOYMENT RATE,LABFORCE_PART RATE,County
533,19,County,Adair County,2021,July,0,4300,4180,120,2.7,,ADAIR
545,19,County,Adair County,2020,July,0,4050,3880,170,4.2,,ADAIR
557,19,County,Adair County,2019,July,0,4370,4280,100,2.2,,ADAIR
569,19,County,Adair County,2018,July,0,4280,4190,90,2.1,,ADAIR
581,19,County,Adair County,2017,July,0,4280,4180,100,2.4,,ADAIR


In [35]:
iowa_employ_temp_short['County'].unique()

array(['ADAIR', 'ADAMS', 'ALLAMAKEE', 'APPANOOSE', 'AUDUBON', 'BENTON',
       'BLACK', 'BOONE', 'BREMER', 'BUCHANAN', 'BUENA', 'BUTLER',
       'CALHOUN', 'CARROLL', 'CASS', 'CEDAR', 'CERRO', 'CHEROKEE',
       'CHICKASAW', 'CLARKE', 'CLAY', 'CLAYTON', 'CLINTON', 'CRAWFORD',
       'DALLAS', 'DAVIS', 'DECATUR', 'DELAWARE', 'DES', 'DICKINSON',
       'DUBUQUE', 'EMMET', 'FAYETTE', 'FLOYD', 'FRANKLIN', 'FREMONT',
       'GREENE', 'GRUNDY', 'GUTHRIE', 'HAMILTON', 'HANCOCK', 'HARDIN',
       'HARRISON', 'HENRY', 'HOWARD', 'HUMBOLDT', 'IDA', 'IOWA',
       'JACKSON', 'JASPER', 'JEFFERSON', 'JOHNSON', 'JONES', 'KEOKUK',
       'KOSSUTH', 'LEE', 'LINN', 'LOUISA', 'LUCAS', 'LYON', 'MADISON',
       'MAHASKA', 'MARION', 'MARSHALL', 'MILLS', 'MITCHELL', 'MONONA',
       'MONROE', 'MONTGOMERY', 'MUSCATINE', "O'BRIEN", 'OSCEOLA', 'PAGE',
       'PALO', 'PLYMOUTH', 'POCAHONTAS', 'POLK', 'POTTAWATTAMIE',
       'POWESHIEK', 'RINGGOLD', 'SAC', 'SCOTT', 'SHELBY', 'SIOUX',
       'STORY', 'TAMA', 'TAY

In [36]:
#iowa_employ_temp_short.loc[iowa_employ['County']=="O'BRIEN",'County']='OBRIEN'
iowa_employ_temp_short.loc[iowa_employ_temp_short['County']=="BLACK",'County']='BLACK HAWK'
iowa_employ_temp_short.loc[iowa_employ_temp_short['County']=="BUENA",'County']='BUENA VISTA'
iowa_employ_temp_short.loc[iowa_employ_temp_short['County']=="CERRO",'County']='CERRO GORDO'
iowa_employ_temp_short.loc[iowa_employ_temp_short['County']=="DES",'County']='DES MOINES'
iowa_employ_temp_short.loc[iowa_employ_temp_short['County']=="PALO",'County']='PALO ALTO'
iowa_employ_temp_short.loc[iowa_employ_temp_short['County']=="VAN",'County']='VAN BUREN'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


In [37]:
iowa_employ_temp_short['County'] = iowa_employ_temp_short['County'].str.lower()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iowa_employ_temp_short['County'] = iowa_employ_temp_short['County'].str.lower()


- Compare to Store County Spellings

In [38]:
store = pd.read_csv('../data/store.csv')

In [39]:
np.sort(store['County'].unique())

array(['adair', 'adams', 'allamakee', 'appanoose', 'audubon', 'benton',
       'black hawk', 'boone', 'bremer', 'buchanan', 'buena vista',
       'butler', 'calhoun', 'carroll', 'cass', 'cedar', 'cerro gordo',
       'cherokee', 'chickasaw', 'clarke', 'clay', 'clayton', 'clinton',
       'crawford', 'dallas', 'davis', 'decatur', 'delaware', 'des moines',
       'dickinson', 'dubuque', 'emmet', 'fayette', 'floyd', 'franklin',
       'fremont', 'greene', 'grundy', 'guthrie', 'hamilton', 'hancock',
       'hardin', 'harrison', 'henry', 'howard', 'humboldt', 'ida', 'iowa',
       'jackson', 'jasper', 'jefferson', 'johnson', 'jones', 'keokuk',
       'kossuth', 'lee', 'linn', 'louisa', 'lucas', 'lyon', 'madison',
       'mahaska', 'marion', 'marshall', 'mills', 'mitchell', 'monona',
       'monroe', 'montgomery', 'muscatine', "o'brien", 'osceola', 'page',
       'palo alto', 'plymouth', 'pocahontas', 'polk', 'pottawattamie',
       'poweshiek', 'ringgold', 'sac', 'scott', 'shelby', 'sioux',

In [40]:
# Digging into the discrepancies 
temp2 = list(iowa_pop_short['County'].unique())
temp1 = list(store['County'].unique())
s = set(temp2)
temp3 = [x for x in temp1 if x not in s]
temp3

[]

# Create Final Table

In [41]:
iowa_pop = iowa_pop_short[['County', 'Year', 'Population']]

In [42]:
iowa_pop.head()

Unnamed: 0,County,Year,Population
0,adair,2010,7679
1,adams,2010,4023
2,allamakee,2010,14377
3,appanoose,2010,12861
4,audubon,2010,6094


In [43]:
iowa_income = iowa_income[['County', 'Year', 'Value']]
iowa_income.rename(columns={'Value': 'Income_PerCapita'}, inplace=True)

In [44]:
iowa_employ = iowa_employ_temp_short[['County', 'YEAR', 'LABORFORCE', 'EMPLOYMENT', 'UNEMPLOYMENT', 'UNEMPLOYMENT RATE']]
iowa_employ.rename(columns={'YEAR': 'Year'}, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


In [45]:
iowa_income.head()

Unnamed: 0,County,Year,Income_PerCapita
2494,adair,1997,21269
2493,adair,1998,22118
2492,adair,1999,22793
2491,adair,2000,25608
2490,adair,2001,26047


In [46]:
iowa_employ.head()

Unnamed: 0,County,Year,LABORFORCE,EMPLOYMENT,UNEMPLOYMENT,UNEMPLOYMENT RATE
533,adair,2021,4300,4180,120,2.7
545,adair,2020,4050,3880,170,4.2
557,adair,2019,4370,4280,100,2.2
569,adair,2018,4280,4190,90,2.1
581,adair,2017,4280,4180,100,2.4


In [47]:
Merged_pop_income = iowa_pop.merge(iowa_income, on = ['County', 'Year'], how = 'left')
Merged_pop_income.head()

Unnamed: 0,County,Year,Population,Income_PerCapita
0,adair,2010,7679,35209.0
1,adams,2010,4023,37156.0
2,allamakee,2010,14377,34590.0
3,appanoose,2010,12861,29930.0
4,audubon,2010,6094,39455.0


In [48]:
Merged_pop_income_employ = Merged_pop_income.merge(iowa_employ, on = ['County', 'Year'], how = 'left')
Merged_pop_income_employ.head()

Unnamed: 0,County,Year,Population,Income_PerCapita,LABORFORCE,EMPLOYMENT,UNEMPLOYMENT,UNEMPLOYMENT RATE
0,adair,2010,7679,35209.0,4190,3970,220,5.2
1,adams,2010,4023,37156.0,2270,2170,100,4.5
2,allamakee,2010,14377,34590.0,7960,7410,550,6.9
3,appanoose,2010,12861,29930.0,6340,5930,410,6.5
4,audubon,2010,6094,39455.0,3570,3380,180,5.1


In [49]:
county = Merged_pop_income_employ.copy()

In [50]:
county = county.loc[county['Year'].isin(['2010','2011','2012','2013','2014','2015','2016','2017','2018','2019','2020','2021'])]

In [51]:
county.sort_values(by = ['County','Year'])

Unnamed: 0,County,Year,Population,Income_PerCapita,LABORFORCE,EMPLOYMENT,UNEMPLOYMENT,UNEMPLOYMENT RATE
0,adair,2010,7679,35209.0,4190,3970,220,5.2
99,adair,2011,7546,40680.0,4200,4020,180,4.3
198,adair,2012,7466,41687.0,4140,3960,180,4.3
297,adair,2013,7385,43967.0,4250,4080,160,3.8
396,adair,2014,7365,44450.0,4260,4110,150,3.6
...,...,...,...,...,...,...,...,...
791,wright,2017,12758,46665.0,6660,6420,230,3.5
890,wright,2018,12669,51879.0,6620,6450,170,2.6
989,wright,2019,12551,48770.0,7150,6950,190,2.7
1088,wright,2020,12416,52825.0,6760,6460,300,4.4


In [52]:
county['Year'].unique()

array(['2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017',
       '2018', '2019', '2020', '2021'], dtype=object)

In [53]:
county.to_csv('../data/county.csv', index_label = False)