### Data Preparation

#### Preparing: 1) Prison Wage Data; 2) Prison Phone Call Price Data; 3) Prison Toothpaste Price Data; 4) Prison Rice Price Data; and 5) Prison Pen Price Data for visualization in Observable Plot.

###### 30/09/2022 at 11:11 CEST

In [1]:
# import necessary libraries 
import os
import pandas as pd
import numpy as np

In [2]:
# change working directory to a specified directory
os.chdir('../')
print("Directory Changes")

# get current working directory
cwd = os.getcwd()
print("Current working directory is:", cwd)

Directory Changes
Current working directory is: C:\Users\Carol\Documents\PersonalProjects\dollarCost


In [3]:
# read in csv data
wage = pd.read_csv("data/raw/state_prison_wages.csv")
phone = pd.read_csv("data/raw/state_prison_cost_phone.csv")
toothpaste = pd.read_csv("data/raw/state_prison_cost_toothpaste.csv")
rice = pd.read_csv("data/raw/state_prison_cost_rice.csv")
pen = pd.read_csv("data/raw/state_prison_cost_pen.csv")

# Preparing All Data

## Prepare WAGE Data

In [4]:
# gain insight into wage data
wage.shape

(52, 6)

In [5]:
# gain insight into wage data
wage.dtypes

state            object
niMinPWage      float64
niMaxPWage      float64
ciMinPWage      float64
ciMaxPWage      float64
stateMinWage    float64
dtype: object

In [6]:
# gain insight into wage data
wage.head()

Unnamed: 0,state,niMinPWage,niMaxPWage,ciMinPWage,ciMaxPWage,stateMinWage
0,AL,0.0,0.0,0.25,0.75,7.25
1,AK,0.3,1.25,0.65,4.9,10.34
2,AZ,0.15,0.5,0.2,0.8,12.8
3,AR,0.0,0.0,0.0,0.0,11.0
4,CA,0.08,0.37,0.3,0.95,14.0


In [7]:
# set df 'wage' index to 'state'
wage = wage.set_index('state')

In [8]:
# create new df, 'meanWage', that is a subset
    # of df 'wage' using columns 'niMinPWage' 
    # to 'ciMaxPWage'
# add a new column,'meanWage' to df 'wage' and 
    # set it equal to the MEAN of all columns 
    # in the df 'meanWage' 
meanWage = wage.loc[:, 'niMinPWage':'ciMaxPWage']
wage['meanWage'] = meanWage.mean(axis = 1)

In [9]:
# create new df, 'medianWage', that is a subset
    # of df 'wage' using columns 'niMinPWage' 
    # to 'ciMaxPWage'
# add a new column,'medianWage' to df 'wage' and 
    # set it equal to the MEDIAN of all columns 
    # in the df 'medianWage'
medianWage = wage.loc[:, 'niMinPWage':'ciMaxPWage']
wage['medianWage'] = medianWage.median(axis = 1) 

In [10]:
# for df 'wage', round columns 'meanWage' and 'medianWage' to 2 decimal places 
wage['meanWage'] = np.round(wage['meanWage'], decimals = 2)
wage['medianWage'] = np.round(wage['medianWage'], decimals =2)

## Prepare PHONE Data

In [11]:
# gain insight into phone data
phone.shape

(51, 7)

In [12]:
# gain insight into phone data
phone.dtypes

state                object
collectBase         float64
collectPerMinute    float64
prepaidBase         float64
prepaidPerMinute    float64
debitBase           float64
debitPerMinute      float64
dtype: object

In [13]:
# gain insight into phone data
phone.head(10)

Unnamed: 0,state,collectBase,collectPerMinute,prepaidBase,prepaidPerMinute,debitBase,debitPerMinute
0,AL,0.0,0.25,0.0,0.21,0.0,0.21
1,AK,0.0,0.25,0.0,0.21,,
2,AZ,0.0,0.2,0.0,0.2,0.0,0.2
3,AR,3.12,0.12,3.12,0.12,3.12,0.12
4,CA,0.0,0.082,0.0,0.082,,
5,CO,0.0,0.12,0.0,0.12,0.0,0.12
6,CT,0.0,0.3245,0.0,0.2433,0.0,0.2433
7,DE,1.09,0.08,1.09,0.08,1.09,0.08
8,FL,0.0,0.14,0.0,0.14,,
9,GA,0.0,0.16,0.0,0.16,0.0,0.16


In [14]:
# set df 'phone' index to 'state'
phone = phone.set_index('state')

In [15]:
# within df 'phone', create new columns, 'collectTrueMinute', 'prepaidTrueMinute', and 
    # 'debitTrueMinute' and set them equal to the base price + price per minute for each 
    # phone call type (i.e., collect, prepaid, and debit) respectively in order to calculate
    # the true cost per minute for each phone call type
phone['collectTrueMinute'] = phone['collectBase'] + phone['collectPerMinute']  
phone['prepaidTrueMinute'] = phone['prepaidBase'] + phone['prepaidPerMinute']  
phone['debitTrueMinute'] = phone['debitBase'] + phone['debitPerMinute']  

In [16]:
# create new df, 'meanPhone', that is a subset
    # of df 'phone' using columns 'collectTrueMinute' 
    # to 'debitTrueMinute'
# add a new column,'meanPhone' to df 'phone' and 
    # set it equal to the MEAN of all columns 
    # in the df 'meanPhone'
meanPhone = phone.loc[:, 'collectTrueMinute':'debitTrueMinute']
phone['meanPhone'] = meanPhone.mean(axis = 1)

In [17]:
# for df 'phone', round column 'meanPhone' to 2 decimal places 
phone['meanPhone'] = np.round(phone['meanPhone'], decimals = 2)

In [18]:
# create new lists, 'colsBase' and 'colsMinute' with desired columns from 'phone' df
colsBase = [phone['collectBase'], phone['prepaidBase'], phone['debitBase']]
colsMinute = [phone['collectPerMinute'], phone['prepaidPerMinute'], phone['debitPerMinute']]

# create new df 'phoneBase' and new df 'phoneMinute' by concatenating columns in the respective lists
phoneBase = pd.concat(colsBase, axis = 1)
phoneMinute = pd.concat(colsMinute, axis = 1)

# for df 'phone' create new column 'meanBase' and
    # calculate the mean base price for phone calls
    # given the base price subset df 'phoneBase'
phone['meanBase'] = phoneBase.mean(axis = 1)
# for df 'phone' create new column 'meanMinute' and
    # calculate the mean minute price for phone calls
    # given the base price subset df 'phoneMinute'
phone['meanMinute'] = phoneMinute.mean(axis = 1)

#for df 'phone' create a new column 'meanPhone15' and 
    # calculate the mean price of a 15-minute phone call
    # given the 'meanBase' and 'meanMinute' per each state
phone['meanPhone15'] = phone['meanBase'] + (phone['meanMinute'] * 15)

# for df 'phone', round column 'meanPhone15' to 2 decimal places 
phone['meanPhone15'] = np.round(phone['meanPhone15'], decimals = 2)

## Prepare TOOTHPASTE Data

In [19]:
# gain insight into toothtpaste data
toothpaste.shape

(51, 40)

In [20]:
# gain insight into toothtpaste data
toothpaste.dtypes

state     object
tp0      float64
tp1      float64
tp2      float64
tp3      float64
tp4      float64
tp5      float64
tp6      float64
tp7      float64
tp8      float64
tp9      float64
tp10     float64
tp11     float64
tp12     float64
tp13     float64
tp14     float64
tp15     float64
tp16     float64
tp17     float64
tp18     float64
tp19     float64
tp20     float64
tp21     float64
tp22     float64
tp23     float64
tp24     float64
tp25     float64
tp26     float64
tp27     float64
tp28     float64
tp29     float64
tp30     float64
tp31     float64
tp32     float64
tp33     float64
tp34     float64
tp35     float64
tp36     float64
tp37     float64
tp38     float64
dtype: object

In [21]:
# gain insight into toothpaste data
toothpaste.head()

Unnamed: 0,state,tp0,tp1,tp2,tp3,tp4,tp5,tp6,tp7,tp8,...,tp29,tp30,tp31,tp32,tp33,tp34,tp35,tp36,tp37,tp38
0,AL,4.7,3.45,3.25,3.4,4.95,,,,,...,,,,,,,,,,
1,AK,1.5,1.34,3.16,6.89,,,,,,...,,,,,,,,,,
2,AZ,1.5,3.85,2.75,6.75,2.5,3.1,4.6,,,...,,,,,,,,,,
3,AR,2.75,2.4,1.7,6.75,,,,,,...,,,,,,,,,,
4,CA,2.0,5.85,4.4,6.75,2.0,,,,,...,,,,,,,,,,


In [22]:
# set df 'toothpaste' index to 'state'
toothpaste = toothpaste.set_index('state')

In [23]:
# within df 'toothpaste', create a new column 'meanToothpaste' 
    # and set it equal to the MEAN of all other columns 
toothpaste['meanToothpaste'] = toothpaste.mean(axis = 1)

In [24]:
# for df 'toothpaste', round column 'meanToothpaste' to 2 decimal places 
toothpaste['meanToothpaste'] = np.round(toothpaste['meanToothpaste'], decimals = 2)

## Prepare RICE Data

In [25]:
# gain insight into rice data
rice.shape

(51, 40)

In [26]:
# gain insight into rice data
rice.dtypes

state     object
r0       float64
r1       float64
r2       float64
r3       float64
r4       float64
r5       float64
r6       float64
r7       float64
r8       float64
r9       float64
r10      float64
r11      float64
r12      float64
r13      float64
r14      float64
r15      float64
r16      float64
r17      float64
r18      float64
r19      float64
r20      float64
r21      float64
r22      float64
r23      float64
r24      float64
r25      float64
r26      float64
r27      float64
r28      float64
r29      float64
r30      float64
r31      float64
r32      float64
r33      float64
r34      float64
r35      float64
r36      float64
r37      float64
r38      float64
dtype: object

In [27]:
# gain insight into rice data
rice.head()

Unnamed: 0,state,r0,r1,r2,r3,r4,r5,r6,r7,r8,...,r29,r30,r31,r32,r33,r34,r35,r36,r37,r38
0,AL,1.2,1.3,1.7,,,,,,,...,,,,,,,,,,
1,AK,1.5,,,,,,,,,...,,,,,,,,,,
2,AZ,1.2,1.3,,,,,,,,...,,,,,,,,,,
3,AR,1.7,1.15,,,,,,,,...,,,,,,,,,,
4,CA,0.85,1.5,0.8,1.5,,,,,,...,,,,,,,,,,


In [28]:
# set df 'rice' index to 'state'
rice = rice.set_index('state')

In [29]:
# within df 'rice', create a new column 'meanRice' 
    # and set it equal to the MEAN of all other columns 
rice['meanRice'] = rice.mean(axis = 1)

In [30]:
# for df 'rice', round column 'meanRice' to 2 decimal places
rice['meanRice'] = np.round(rice['meanRice'], decimals = 2)

## Prepare PEN Data

In [31]:
# gain insight into pen data
pen.shape

(51, 35)

In [32]:
# gain insight into pen data
pen.dtypes

state     object
p0       float64
p1       float64
p2       float64
p3       float64
p4       float64
p5       float64
p6       float64
p7       float64
p8       float64
p9       float64
p10      float64
p11      float64
p12      float64
p13      float64
p14      float64
p15      float64
p16      float64
p17      float64
p18      float64
p19      float64
p20      float64
p21      float64
p22      float64
p23      float64
p24      float64
p25      float64
p26      float64
p27      float64
p28      float64
p29      float64
p30      float64
p31      float64
p32      float64
p33      float64
dtype: object

In [33]:
# gain insight into pen data
pen.head()

Unnamed: 0,state,p0,p1,p2,p3,p4,p5,p6,p7,p8,...,p24,p25,p26,p27,p28,p29,p30,p31,p32,p33
0,AL,1.15,,,,,,,,,...,,,,,,,,,,
1,AK,2.39,3.03,,,,,,,,...,,,,,,,,,,
2,AZ,1.05,,,,,,,,,...,,,,,,,,,,
3,AR,1.1,,,,,,,,,...,,,,,,,,,,
4,CA,0.95,,,,,,,,,...,,,,,,,,,,


In [34]:
# set df 'pen' index to 'state'
pen = pen.set_index('state')

In [35]:
# within df 'pen', create a new column 'meanPen' 
    # and set it equal to the MEAN of all other columns
pen['meanPen'] = pen.mean(axis = 1)

In [36]:
# for df 'pen', round column 'meanPen' to 2 decimal places
pen['meanPen'] = np.round(pen['meanPen'], decimals = 2)

## Merge All Data

In [37]:
# create new list, 'cols' with desired columns from 'wage', 'phone', 'toothpaste', 'rice',
    # and 'pen' dfs
cols = [wage['stateMinWage'], wage['niMinPWage'], wage['niMaxPWage'], wage['ciMinPWage'], 
        wage['ciMaxPWage'], wage['meanWage'], wage['medianWage'], phone['meanPhone'], 
        phone['meanPhone15'], toothpaste['meanToothpaste'], rice['meanRice'], pen['meanPen']]

# create new df 'statePrisonPriceData' by concatenating columns in the list, 'cols'
statePrisonPriceData = pd.concat(cols, axis = 1)

In [38]:
# gain insight into df 'statePrisonPriceData'
statePrisonPriceData.head()

Unnamed: 0_level_0,stateMinWage,niMinPWage,niMaxPWage,ciMinPWage,ciMaxPWage,meanWage,medianWage,meanPhone,meanPhone15,meanToothpaste,meanRice,meanPen
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
AL,7.25,0.0,0.0,0.25,0.75,0.25,0.12,0.22,3.35,3.95,1.4,1.15
AK,10.34,0.3,1.25,0.65,4.9,1.78,0.95,0.23,3.45,3.22,1.5,2.71
AZ,12.8,0.15,0.5,0.2,0.8,0.41,0.35,0.2,3.0,3.58,1.25,1.05
AR,11.0,0.0,0.0,0.0,0.0,0.0,0.0,3.24,4.92,3.4,1.42,1.1
CA,14.0,0.08,0.37,0.3,0.95,0.42,0.34,0.08,1.23,4.2,1.16,0.95


## Calculate  Additional Metrics

##### Labor Per Dollar

In [39]:
# for df 'statePrisonPriceData', create new column, 'laborPerDollar'
    # where the number of hours of labor needed to earn $1.00 is 
    # calculated per state given the column 'medianWage'
statePrisonPriceData['laborPerDollar'] = 1 / statePrisonPriceData['medianWage']

In [40]:
# in instances where 'medianWage' = 0 (because the state does not financially compensate 
    # incarcerated workers at all),replace infinite, 'np.inf' updated data with NaN
statePrisonPriceData = statePrisonPriceData.replace(np.inf, np.nan)

In [41]:
# for df 'statePrisonPriceData', round column 'laborPerDollar' to 2 decimal places
statePrisonPriceData['laborPerDollar'] = np.round(statePrisonPriceData['laborPerDollar'], decimals = 2)

##### Phone Time Per Dollar

In [42]:
# for df 'statePrisonPriceData', create new column, 'phonePerDollar'
    # where the number of calling minutes you can buy with $1.00 is 
    # calculated per state given the column 'meanPhone'
statePrisonPriceData['phonePerDollar'] = 1 / statePrisonPriceData['meanPhone']

In [43]:
# for df 'statePrisonPriceData', round column 'laborPerDollar' to 2 decimal places
statePrisonPriceData['phonePerDollar'] = np.round(statePrisonPriceData['phonePerDollar'], decimals = 2)

##### Labor Per 15-minute Phone Call

In [44]:
# for df 'statePrisonPriceData', create new column, 'laborPerPhone15'
    # where the number of prison labor hours necessary to buy 15 minutes
    # of phone time is calculated per state given the columns
    # 'laborPerDollar' and 'meanPhone15'
statePrisonPriceData['laborPerPhone15'] = statePrisonPriceData['laborPerDollar'] *  statePrisonPriceData['meanPhone15']

In [45]:
# for df 'statePrisonPriceData', round column 'laborPerPhone15' to 2 decimal places
statePrisonPriceData['laborPerPhone15'] = np.round(statePrisonPriceData['laborPerPhone15'], decimals = 2)

##### Labor Per Tube of Toothpaste

In [46]:
# for df 'statePrisonPriceData', create new column, 'laborPerToothpaste'
    # where the number of prison labor hours necessary to buy 1
    # unit of prison toothpaste is calculated per state given the columns
    # 'laborPerDollar' and 'meanToothpaste'
statePrisonPriceData['laborPerToothpaste'] = statePrisonPriceData['laborPerDollar'] *  statePrisonPriceData['meanToothpaste']

In [47]:
# for df 'statePrisonPriceData', round column 'laborPerToothpaste' to 2 decimal places
statePrisonPriceData['laborPerToothpaste'] = np.round(statePrisonPriceData['laborPerToothpaste'], decimals = 2)

##### Labor Per Bag of Rice

In [48]:
# for df 'statePrisonPriceData', create new column, 'laborPerRice'
    # where the number of prison labor hours necessary to buy 1
    # unit of prison rice is calculated per state given the columns
    # 'laborPerDollar' and 'meanRice'
statePrisonPriceData['laborPerRice'] = statePrisonPriceData['laborPerDollar'] *  statePrisonPriceData['meanRice']

In [49]:
# for df 'statePrisonPriceData', round column 'laborPerRice' to 2 decimal places
statePrisonPriceData['laborPerRice'] = np.round(statePrisonPriceData['laborPerRice'], decimals = 2)

##### Labor Per Pen

In [50]:
# for df 'statePrisonPriceData', create new column, 'laborPerPen'
    # where the number of prison labor hours necessary to buy 
    # prison price is calculated per state given the columns
    # 'laborPerDollar' and 'meanPen'
statePrisonPriceData['laborPerPen'] = statePrisonPriceData['laborPerDollar'] *  statePrisonPriceData['meanPen']

In [51]:
# for df 'statePrisonPriceData', round column 'laborPerPen' to 2 decimal places
statePrisonPriceData['laborPerPen'] = np.round(statePrisonPriceData['laborPerPen'], decimals = 2)

In [52]:
# gain insight into statePrisonPriceData
statePrisonPriceData.head()

Unnamed: 0_level_0,stateMinWage,niMinPWage,niMaxPWage,ciMinPWage,ciMaxPWage,meanWage,medianWage,meanPhone,meanPhone15,meanToothpaste,meanRice,meanPen,laborPerDollar,phonePerDollar,laborPerPhone15,laborPerToothpaste,laborPerRice,laborPerPen
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
AL,7.25,0.0,0.0,0.25,0.75,0.25,0.12,0.22,3.35,3.95,1.4,1.15,8.33,4.55,27.91,32.9,11.66,9.58
AK,10.34,0.3,1.25,0.65,4.9,1.78,0.95,0.23,3.45,3.22,1.5,2.71,1.05,4.35,3.62,3.38,1.58,2.85
AZ,12.8,0.15,0.5,0.2,0.8,0.41,0.35,0.2,3.0,3.58,1.25,1.05,2.86,5.0,8.58,10.24,3.58,3.0
AR,11.0,0.0,0.0,0.0,0.0,0.0,0.0,3.24,4.92,3.4,1.42,1.1,,0.31,,,,
CA,14.0,0.08,0.37,0.3,0.95,0.42,0.34,0.08,1.23,4.2,1.16,0.95,2.94,12.5,3.62,12.35,3.41,2.79


## Save Data 

In [56]:
# write statePrisonPriceData to csv
statePrisonPriceData.to_csv('data/processed/state_prison_price_data.csv', encoding='utf-8', index=True)

# Reformatting Data for Viz in Observable Plot 

## Calculate a new column equal to the number of "pennies" that equate to the Median Wage earned per state

##### All States

In [53]:
# gain insight into statePrisonPriceData
statePrisonPriceData.head()

Unnamed: 0_level_0,stateMinWage,niMinPWage,niMaxPWage,ciMinPWage,ciMaxPWage,meanWage,medianWage,meanPhone,meanPhone15,meanToothpaste,meanRice,meanPen,laborPerDollar,phonePerDollar,laborPerPhone15,laborPerToothpaste,laborPerRice,laborPerPen
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
AL,7.25,0.0,0.0,0.25,0.75,0.25,0.12,0.22,3.35,3.95,1.4,1.15,8.33,4.55,27.91,32.9,11.66,9.58
AK,10.34,0.3,1.25,0.65,4.9,1.78,0.95,0.23,3.45,3.22,1.5,2.71,1.05,4.35,3.62,3.38,1.58,2.85
AZ,12.8,0.15,0.5,0.2,0.8,0.41,0.35,0.2,3.0,3.58,1.25,1.05,2.86,5.0,8.58,10.24,3.58,3.0
AR,11.0,0.0,0.0,0.0,0.0,0.0,0.0,3.24,4.92,3.4,1.42,1.1,,0.31,,,,
CA,14.0,0.08,0.37,0.3,0.95,0.42,0.34,0.08,1.23,4.2,1.16,0.95,2.94,12.5,3.62,12.35,3.41,2.79


In [54]:
# resest df 'statePrisonPriceData' index
statePrisonPriceData = statePrisonPriceData.reset_index()

In [55]:
# create new empty list, 'state_list'
state_list = []

# create a for loop that considers each individual state in the df 'statePrisonPriceData'
for state in statePrisonPriceData.state.unique():
    # filter for a specific state and save it as the temporary df variable, 'df_state' - (returns the state's single row)
    df_state = statePrisonPriceData[statePrisonPriceData["state"]==state]
    # create another for loop that loops from i (starting at 0) to the range of the state's median wage listed in whole cents
        # (ex: median wage state Y = 17 (cents))
        # append each instance of the temporary df variable, 'df_state' to the list 'state_list' to populate the list 
        # with x instances of each state,  where x = median wage in whole cents - 
        # (ex: if median wage of state Y = 17, should return 17 rows for state Y)
    for i in range(int(df_state.medianWage.values*100)):
        state_list.append(df_state)
        
# create new df, 'state_df' by concatenating each saved df element within the list, 'state_list'
state_df = pd.concat(state_list)

# create new empty list, 'state_list_indexed'
state_list_indexed = []

# create a for loop that considers each individual state in the df 'state_df'
for state in state_df.state.unique():
    # for each state subset, filter for a specific state and save it as a temporary df variable, 'df_state'
    df_state = state_df[state_df["state"] == state]
    # for each temporary df variable, 'df_state', create a new column, 'penny_i', with an index that ranges 
    # from 1 to the number of rows of the state subset
    df_state["penny_i"] = range(1, len(df_state)+1)
    # append each instance of the temporary df variable, 'df_state' to the list 'state_list_indexed'
    state_list_indexed.append(df_state)

# create a new df, 'pennies' by concatenating each saved df element within the list 'state_list_indexed'
pennies = pd.concat(state_list_indexed)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [56]:
# gain insight into df, 'pennies'
pennies.head(15)

Unnamed: 0,state,stateMinWage,niMinPWage,niMaxPWage,ciMinPWage,ciMaxPWage,meanWage,medianWage,meanPhone,meanPhone15,meanToothpaste,meanRice,meanPen,laborPerDollar,phonePerDollar,laborPerPhone15,laborPerToothpaste,laborPerRice,laborPerPen,penny_i
0,AL,7.25,0.0,0.0,0.25,0.75,0.25,0.12,0.22,3.35,3.95,1.4,1.15,8.33,4.55,27.91,32.9,11.66,9.58,1
0,AL,7.25,0.0,0.0,0.25,0.75,0.25,0.12,0.22,3.35,3.95,1.4,1.15,8.33,4.55,27.91,32.9,11.66,9.58,2
0,AL,7.25,0.0,0.0,0.25,0.75,0.25,0.12,0.22,3.35,3.95,1.4,1.15,8.33,4.55,27.91,32.9,11.66,9.58,3
0,AL,7.25,0.0,0.0,0.25,0.75,0.25,0.12,0.22,3.35,3.95,1.4,1.15,8.33,4.55,27.91,32.9,11.66,9.58,4
0,AL,7.25,0.0,0.0,0.25,0.75,0.25,0.12,0.22,3.35,3.95,1.4,1.15,8.33,4.55,27.91,32.9,11.66,9.58,5
0,AL,7.25,0.0,0.0,0.25,0.75,0.25,0.12,0.22,3.35,3.95,1.4,1.15,8.33,4.55,27.91,32.9,11.66,9.58,6
0,AL,7.25,0.0,0.0,0.25,0.75,0.25,0.12,0.22,3.35,3.95,1.4,1.15,8.33,4.55,27.91,32.9,11.66,9.58,7
0,AL,7.25,0.0,0.0,0.25,0.75,0.25,0.12,0.22,3.35,3.95,1.4,1.15,8.33,4.55,27.91,32.9,11.66,9.58,8
0,AL,7.25,0.0,0.0,0.25,0.75,0.25,0.12,0.22,3.35,3.95,1.4,1.15,8.33,4.55,27.91,32.9,11.66,9.58,9
0,AL,7.25,0.0,0.0,0.25,0.75,0.25,0.12,0.22,3.35,3.95,1.4,1.15,8.33,4.55,27.91,32.9,11.66,9.58,10


##### State With Highest Prison Wage Per Hour (2.70 dollar/hour) (Creating Pennies for the State Minimum Wage) - Nevada

In [61]:
# create a new df, 'nv' by subsetting for the 'state' 'NV' in the df 'statePrisonPriceData' 
nv = statePrisonPriceData[statePrisonPriceData["state"]=='NV']

In [63]:
# create NV state minimum wage pennies
nv_list = []

for state in nv.state.unique():
    df_state = nv[nv["state"]==state]
    for i in range(int(df_state.stateMinWage.values*100)):
        nv_list.append(df_state)
        
nv_df = pd.concat(nv_list)

nv_list_indexed = []

for state in nv_df.state.unique():
    df_state = nv_df[nv_df["state"] == state]
    df_state["penny_j"] = range(1, len(df_state)+1)
    nv_list_indexed.append(df_state)

penniesNV = pd.concat(nv_list_indexed)

##### State With Lowest Prison Wage Per Hour (0 dollar/hour) (Creating Pennies for the State Minimum Wage) - Arkansas 

In [66]:
# create a new df, 'ar' by subsetting for the 'state' 'AR' in the df 'statePrisonPriceData' 
ar = statePrisonPriceData[statePrisonPriceData["state"]=='AR']

In [68]:
# create AR state minimum wage pennies
ar_list = []

for state in ar.state.unique():
    df_state = ar[ar["state"]==state]
    for i in range(int(df_state.stateMinWage.values*100)):
        ar_list.append(df_state)
        
ar_df = pd.concat(ar_list)

ar_list_indexed = []

for state in ar_df.state.unique():
    df_state = ar_df[ar_df["state"] == state]
    df_state["penny_j"] = range(1, len(df_state)+1)
    ar_list_indexed.append(df_state)

penniesAR = pd.concat(ar_list_indexed)

##### State With Lowest Prison Wage Per Hour (0 dollar/hour) (Creating Pennies for the State Minimum Wage) - Georgia

In [70]:
# create a new df, 'ga' by subsetting for the 'state' 'GA' in the df 'statePrisonPriceData' 
ga = statePrisonPriceData[statePrisonPriceData["state"]=='GA']

In [72]:
# create GA state minimum wage pennies
ga_list = []

for state in ga.state.unique():
    df_state = ga[ga["state"]==state]
    for i in range(int(df_state.stateMinWage.values*100)):
        ga_list.append(df_state)
        
ga_df = pd.concat(ga_list)

ga_list_indexed = []

for state in ga_df.state.unique():
    df_state = ga_df[ga_df["state"] == state]
    df_state["penny_j"] = range(1, len(df_state)+1)
    ga_list_indexed.append(df_state)

penniesGA = pd.concat(ga_list_indexed)

##### State With Lowest Prison Wage Per Hour (0 dollar/hour) (Creating Pennies for the State Minimum Wage) - Texas

In [73]:
penniesGA.tail()

Unnamed: 0,state,stateMinWage,niMinPWage,niMaxPWage,ciMinPWage,ciMaxPWage,meanWage,medianWage,meanPhone,meanPhone15,meanToothpaste,meanRice,meanPen,laborPerDollar,phonePerDollar,laborPerPhone15,laborPerToothpaste,laborPerRice,laborPerPen,penny_j
9,GA,7.25,0.0,0.0,0.0,0.0,0.0,0.0,0.16,2.4,4.28,1.25,1.1,,6.25,,,,,721
9,GA,7.25,0.0,0.0,0.0,0.0,0.0,0.0,0.16,2.4,4.28,1.25,1.1,,6.25,,,,,722
9,GA,7.25,0.0,0.0,0.0,0.0,0.0,0.0,0.16,2.4,4.28,1.25,1.1,,6.25,,,,,723
9,GA,7.25,0.0,0.0,0.0,0.0,0.0,0.0,0.16,2.4,4.28,1.25,1.1,,6.25,,,,,724
9,GA,7.25,0.0,0.0,0.0,0.0,0.0,0.0,0.16,2.4,4.28,1.25,1.1,,6.25,,,,,725


## Save Data

In [47]:
# write pennies to csv
pennies.to_csv('data/processed/pennies.csv', encoding='utf-8', index=True)

In [None]:
# write penniesNV to csv
penniesNV.to_csv('data/processed/pennies_nv.csv', encoding='utf-8', index=True)

In [None]:
# write penniesAR to csv
penniesAR.to_csv('data/processed/pennies_ar.csv', encoding='utf-8', index=True)

In [None]:
# write penniesGA to csv
penniesGA.to_csv('data/processed/pennies_ga.csv', encoding='utf-8', index=True)

In [None]:
# write penniesTX to csv
penniesTX.to_csv('data/processed/pennies_tx.csv', encoding='utf-8', index=True)