In [3]:
# Dependencies
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [4]:
# Read into CSV
BLS_data = pd.read_csv("BLS_data_2017.csv")
BLS_data.head()

Unnamed: 0,area,year,area_title,area_type,naics,naics_title,own_code,occ_code,occ_title,o_group,...,h_median,h_pct75,h_pct90,a_pct10,a_pct25,a_median,a_pct75,a_pct90,annual,hourly
0,99,2017,U.S.,1,0,Cross-industry,1235,00-0000,All Occupations,total,...,18.12,29.38,46.23,19970,24770,37690,61110,96150,,
1,99,2017,U.S.,1,0,Cross-industry,1235,11-0000,Management Occupations,major,...,49.32,71.83,#,48220,69880,102590,149410,#,,
2,99,2017,U.S.,1,0,Cross-industry,1235,11-1000,Top Executives,minor,...,49.58,78.72,#,43140,66030,103120,163740,#,,
3,99,2017,U.S.,1,0,Cross-industry,1235,11-1010,Chief Executives,broad,...,88.11,#,#,68110,113470,183270,#,#,,
4,99,2017,U.S.,1,0,Cross-industry,1235,11-1011,Chief Executives,detailed,...,88.11,#,#,68110,113470,183270,#,#,,


In [5]:
# Create df based solely on US data
US_data = BLS_data[BLS_data["area_type"] == 1]
US_data.drop(["own_code","annual", "occ_code","hourly", "area_type", "area", "naics", "naics_title"], axis = 1, inplace=True)
US_data.dropna(axis=1, inplace=True)

# Create df based solely on states' data
State_data = BLS_data[( BLS_data["area_type"] == 2) & ( BLS_data["area_title"] != "District of Columbia")]
State_data.drop(["own_code","annual", "occ_code","hourly", "area_type", "area", "naics", "naics_title"], axis = 1, inplace=True)
State_data.dropna(axis=1, inplace=True)

US_data.head()
State_data.head()

Unnamed: 0,year,area_title,occ_title,o_group,tot_emp,emp_prse,jobs_1000,loc_quotient,h_mean,a_mean,...,h_pct10,h_pct25,h_median,h_pct75,h_pct90,a_pct10,a_pct25,a_median,a_pct75,a_pct90
174395,2017,Alabama,All Occupations,total,1922570,0.4,1000,1,20.76,43170,...,8.54,10.45,15.77,25.01,37.83,17770,21740,32800,52020,78690
174396,2017,Alaska,All Occupations,total,318170,0.9,1000,1,27.77,57750,...,11.27,15.21,22.86,34.92,49.06,23450,31630,47560,72630,102040
174397,2017,Arizona,All Occupations,total,2704050,0.5,1000,1,23.15,48160,...,10.01,11.75,17.44,27.76,42.74,20820,24430,36270,57740,88900
174398,2017,Arkansas,All Occupations,total,1200130,0.7,1000,1,19.49,40530,...,9.02,10.28,14.82,23.08,34.55,18750,21390,30810,48000,71870
174399,2017,California,All Occupations,total,16695010,0.2,1000,1,27.5,57190,...,10.81,12.71,19.7,33.83,54.45,22490,26430,40980,70360,113250


In [9]:
# Average annual income by industry across US
US_data_avg = US_data[US_data["o_group"] == "major"]
US_data_avg .drop_duplicates(["occ_title"], keep="first", inplace=True) 
US_data_avg ['a_mean'] = US_data_avg ['a_mean'].str.replace(',', '')  #remove comma in numbers
US_data_avg ['a_mean'] = US_data_avg ['a_mean'].str.replace('*', '0')
US_data_avg ['a_mean'] = US_data_avg ['a_mean'].astype(int)           #convert string to integers
US_data_avg .sort_values("a_mean", ascending = False, inplace=True)
US_data_avg [["occ_title", "a_mean"]].head(10)

Unnamed: 0,occ_title,a_mean
1,Management Occupations,119910
315,Legal Occupations,107370
126,Computer and Mathematical Occupations,89810
158,Architecture and Engineering Occupations,86190
489,Healthcare Practitioners and Technical Occupat...,80760
70,Business and Financial Operations Occupations,76330
218,"Life, Physical, and Social Science Occupations",74370
427,"Arts, Design, Entertainment, Sports, and Media...",58950
331,"Education, Training, and Library Occupations",55470
942,Construction and Extraction Occupations,49930


In [12]:
# Median annual income by industry across US
US_data_avg = US_data[US_data["o_group"] == "major"]
US_data_avg .drop_duplicates(["occ_title"], keep="first", inplace=True) 
US_data_avg ['a_median'] = US_data_avg ['a_median'].str.replace(',', '')  #remove comma in numbers
US_data_avg ['a_median'] = US_data_avg ['a_median'].str.replace('*', '0')
US_data_avg ['a_median'] = US_data_avg ['a_median'].astype(int)           #convert string to integers
US_data_avg .sort_values("a_median", ascending = False, inplace=True)
US_data_avg [["occ_title", "a_median"]].head(10)

Unnamed: 0,occ_title,a_median
1,Management Occupations,102590
126,Computer and Mathematical Occupations,84560
315,Legal Occupations,80080
158,Architecture and Engineering Occupations,79180
70,Business and Financial Operations Occupations,67710
489,Healthcare Practitioners and Technical Occupat...,64770
218,"Life, Physical, and Social Science Occupations",64510
331,"Education, Training, and Library Occupations",48740
427,"Arts, Design, Entertainment, Sports, and Media...",48230
942,Construction and Extraction Occupations,44730


In [13]:
# 10th percentile annual income by industry across US

US_data_avg = US_data[US_data["o_group"] == "major"]
US_data_avg .drop_duplicates(["occ_title"], keep="first", inplace=True) 
US_data_avg ['a_pct10'] = US_data_avg ['a_pct10'].str.replace(',', '')  #remove comma in numbers
US_data_avg ['a_pct10'] = US_data_avg ['a_pct10'].str.replace('*', '0')
US_data_avg ['a_pct10'] = US_data_avg ['a_pct10'].astype(int)           #convert string to integers
US_data_avg .sort_values('a_pct10', ascending = False, inplace=True)
US_data_avg [["occ_title", "a_pct10"]].head(10)

Unnamed: 0,occ_title,a_pct10
1,Management Occupations,48220
126,Computer and Mathematical Occupations,43580
158,Architecture and Engineering Occupations,43420
70,Business and Financial Operations Occupations,37690
315,Legal Occupations,36970
218,"Life, Physical, and Social Science Occupations",34070
489,Healthcare Practitioners and Technical Occupat...,32990
942,Construction and Extraction Occupations,26540
289,Community and Social Service Occupations,25690
1045,"Installation, Maintenance, and Repair Occupations",25120


In [14]:
# 25th percentile annual income by industry across US

US_data_avg = US_data[US_data["o_group"] == "major"]
US_data_avg .drop_duplicates(["occ_title"], keep="first", inplace=True) 
US_data_avg ['a_pct25'] = US_data_avg ['a_pct25'].str.replace(',', '')  #remove comma in numbers
US_data_avg ['a_pct25'] = US_data_avg ['a_pct25'].str.replace('*', '0')
US_data_avg ['a_pct25'] = US_data_avg ['a_pct25'].astype(int)           #convert string to integers
US_data_avg .sort_values('a_pct25', ascending = False, inplace=True)
US_data_avg [["occ_title", "a_pct25"]].head(10)

Unnamed: 0,occ_title,a_pct25
1,Management Occupations,69880
126,Computer and Mathematical Occupations,60360
158,Architecture and Engineering Occupations,58690
315,Legal Occupations,52340
70,Business and Financial Operations Occupations,50450
489,Healthcare Practitioners and Technical Occupat...,46340
218,"Life, Physical, and Social Science Occupations",46110
942,Construction and Extraction Occupations,33410
289,Community and Social Service Occupations,33050
1045,"Installation, Maintenance, and Repair Occupations",32830


In [15]:
# 75th percentile annual income by industry across US

US_data_avg = US_data[US_data["o_group"] == "major"]
US_data_avg .drop_duplicates(["occ_title"], keep="first", inplace=True) 
US_data_avg ['a_pct75'] = US_data_avg ['a_pct75'].str.replace(',', '')  #remove comma in numbers
US_data_avg ['a_pct75'] = US_data_avg ['a_pct75'].str.replace('*', '0')
US_data_avg ['a_pct75'] = US_data_avg ['a_pct75'].astype(int)           #convert string to integers
US_data_avg .sort_values('a_pct75', ascending = False, inplace=True)
US_data_avg [["occ_title", "a_pct75"]].head(10)

Unnamed: 0,occ_title,a_pct75
1,Management Occupations,149410
315,Legal Occupations,140170
126,Computer and Mathematical Occupations,114130
158,Architecture and Engineering Occupations,106200
489,Healthcare Practitioners and Technical Occupat...,93160
218,"Life, Physical, and Social Science Occupations",92850
70,Business and Financial Operations Occupations,91970
427,"Arts, Design, Entertainment, Sports, and Media...",72680
331,"Education, Training, and Library Occupations",68830
942,Construction and Extraction Occupations,61680


In [18]:
# 90th percentile annual income by industry across US

US_data_avg = US_data[US_data["o_group"] == "major"]
US_data_avg .drop_duplicates(["occ_title"], keep="first", inplace=True) 
US_data_avg ['a_pct90'] = US_data_avg ['a_pct90'].str.replace(',', '')  #remove comma in numbers
US_data_avg ['a_pct90'] = US_data_avg ['a_pct90'].str.replace('*', '0')
#US_data_avg ['a_pct90'] = US_data_avg ['a_pct90'].astype(int)           #convert string to integers --commented code out; caused error
US_data_avg .sort_values('a_pct90', ascending = False, inplace=True)
US_data_avg [["occ_title", "a_pct90"]].head(10)

Unnamed: 0,occ_title,a_pct90
331,"Education, Training, and Library Occupations",94900
606,Protective Service Occupations,84470
942,Construction and Extraction Occupations,82080
759,Sales and Related Occupations,77820
289,Community and Social Service Occupations,77360
1045,"Installation, Maintenance, and Repair Occupations",76720
1121,Production Occupations,61280
1287,Transportation and Material Moving Occupations,60420
802,Office and Administrative Support Occupations,59850
580,Healthcare Support Occupations,46680


In [21]:
# 10th percentile hourly income by industry across US

US_data_avg = US_data[US_data["o_group"] == "major"]
US_data_avg .drop_duplicates(["occ_title"], keep="first", inplace=True) 
US_data_avg ['h_pct10'] = US_data_avg ['h_pct10'].str.replace(',', '')  #remove comma in numbers
US_data_avg ['h_pct10'] = US_data_avg ['h_pct10'].str.replace('*', '0')
#US_data_avg ['h_pct10'] = US_data_avg ['h_pct10'].astype(int)           #convert string to integers
US_data_avg .sort_values('h_pct10', ascending = False, inplace=True)
US_data_avg [["occ_title", "h_pct10"]].head(10)

Unnamed: 0,occ_title,h_pct10
915,"Farming, Fishing, and Forestry Occupations",9.85
580,Healthcare Support Occupations,9.84
1287,Transportation and Material Moving Occupations,9.55
681,Building and Grounds Cleaning and Maintenance ...,8.98
759,Sales and Related Occupations,8.8
699,Personal Care and Service Occupations,8.7
647,Food Preparation and Serving Related Occupations,8.38
1,Management Occupations,23.19
126,Computer and Mathematical Occupations,20.95
158,Architecture and Engineering Occupations,20.87


In [22]:
# 25th percentile hourly income by industry across US

US_data_avg = US_data[US_data["o_group"] == "major"]
US_data_avg .drop_duplicates(["occ_title"], keep="first", inplace=True) 
US_data_avg ['h_pct25'] = US_data_avg ['h_pct25'].str.replace(',', '')  #remove comma in numbers
US_data_avg ['h_pct25'] = US_data_avg ['h_pct25'].str.replace('*', '0')
#US_data_avg ['h_pct25'] = US_data_avg ['h_pct25'].astype(int)           #convert string to integers
US_data_avg .sort_values('h_pct25', ascending = False, inplace=True)
US_data_avg [["occ_title", "h_pct25"]].head(10)

Unnamed: 0,occ_title,h_pct25
699,Personal Care and Service Occupations,9.74
647,Food Preparation and Serving Related Occupations,9.15
1,Management Occupations,33.6
126,Computer and Mathematical Occupations,29.02
158,Architecture and Engineering Occupations,28.21
315,Legal Occupations,25.17
70,Business and Financial Operations Occupations,24.25
489,Healthcare Practitioners and Technical Occupat...,22.28
218,"Life, Physical, and Social Science Occupations",22.17
942,Construction and Extraction Occupations,16.06


In [23]:
# 75th percentile hourly income by industry across US

US_data_avg = US_data[US_data["o_group"] == "major"]
US_data_avg .drop_duplicates(["occ_title"], keep="first", inplace=True) 
US_data_avg ['h_pct75'] = US_data_avg ['h_pct75'].str.replace(',', '')  #remove comma in numbers
US_data_avg ['h_pct75'] = US_data_avg ['h_pct75'].str.replace('*', '0')
#US_data_avg ['h_pct75'] = US_data_avg ['h_pct75'].astype(int)           #convert string to integers
US_data_avg .sort_values('h_pct75', ascending = False, inplace=True)
US_data_avg [["occ_title", "h_pct75"]].head(10)

Unnamed: 0,occ_title,h_pct75
1,Management Occupations,71.83
315,Legal Occupations,67.39
126,Computer and Mathematical Occupations,54.87
158,Architecture and Engineering Occupations,51.06
489,Healthcare Practitioners and Technical Occupat...,44.79
218,"Life, Physical, and Social Science Occupations",44.64
70,Business and Financial Operations Occupations,44.22
427,"Arts, Design, Entertainment, Sports, and Media...",34.94
331,"Education, Training, and Library Occupations",33.09
942,Construction and Extraction Occupations,29.65


In [24]:
# 90th percentile hourly income by industry across US

US_data_avg = US_data[US_data["o_group"] == "major"]
US_data_avg .drop_duplicates(["occ_title"], keep="first", inplace=True) 
US_data_avg ['h_pct90'] = US_data_avg ['h_pct90'].str.replace(',', '')  #remove comma in numbers
US_data_avg ['h_pct90'] = US_data_avg ['h_pct90'].str.replace('*', '0')
#US_data_avg ['h_pct90'] = US_data_avg ['h_pct90'].astype(int)           #convert string to integers
US_data_avg .sort_values('h_pct90', ascending = False, inplace=True)
US_data_avg [["occ_title", "h_pct90"]].head(10)

Unnamed: 0,occ_title,h_pct90
126,Computer and Mathematical Occupations,69.63
489,Healthcare Practitioners and Technical Occupat...,66.29
158,Architecture and Engineering Occupations,65.93
218,"Life, Physical, and Social Science Occupations",60.11
70,Business and Financial Operations Occupations,59.13
427,"Arts, Design, Entertainment, Sports, and Media...",49.91
331,"Education, Training, and Library Occupations",45.62
606,Protective Service Occupations,40.61
942,Construction and Extraction Occupations,39.46
759,Sales and Related Occupations,37.42


In [25]:
# State employment totals
State_employment_rate = State_data[State_data["o_group"] == "total"]
State_employment_rate["tot_emp"] = State_employment_rate['tot_emp'].str.replace(',', '')  #remove comma in numbers
State_employment_rate["tot_emp"] = State_employment_rate["tot_emp"].astype(int)  
State_employment_rate.sort_values("tot_emp", ascending=False, inplace=True)
State_employment_rate[["area_title", "tot_emp"]].head()

Unnamed: 0,area_title,tot_emp
174399,California,16695010
174438,Texas,11890880
174427,New York,9207870
174404,Florida,8419030
174408,Illinois,5927860


In [26]:
# State Average Annual Income 2017
State_avg_income= State_data[State_data["o_group"] == "total"]
State_avg_income["a_mean"] = State_avg_income['a_mean'].str.replace(',', '')  #remove comma in numbers
State_avg_income["a_mean"] = State_avg_income["a_mean"].astype(int)     
State_avg_income.sort_values("a_mean", ascending=False, inplace=True)
State_avg_income[["area_title", "a_mean"]].head(10)

Unnamed: 0,area_title,a_mean
174416,Massachusetts,62110
174427,New York,60100
174401,Connecticut,59410
174396,Alaska,57750
174442,Washington,57480
174415,Maryland,57270
174399,California,57190
174425,New Jersey,56970
174400,Colorado,54050
174441,Virginia,53980


In [19]:
# State Median Annual Income 2017
State_avg_income= State_data[State_data["o_group"] == "total"]
State_avg_income["a_median"] = State_avg_income['a_median'].str.replace(',', '')  #remove comma in numbers
State_avg_income["a_median"] = State_avg_income["a_median"].astype(int)     
State_avg_income.sort_values("a_median", ascending=False, inplace=True)
State_avg_income[["area_title", "a_median"]].head(10)

Unnamed: 0,area_title,a_median
174396,Alaska,47560
174416,Massachusetts,47440
174401,Connecticut,45870
174442,Washington,44440
174415,Maryland,43840
174427,New York,43690
174425,New Jersey,42500
174406,Hawaii,41650
174418,Minnesota,41260
174399,California,40980


In [34]:
# 10th percentile annual income by industry by state

State_data_avg = State_data[State_data["o_group"] == "major"]
State_data_avg .drop_duplicates(["occ_title"], keep="first", inplace=True) 
State_data_avg ['a_pct10'] = State_data_avg ['a_pct10'].str.replace(',', '')  #remove comma in numbers
State_data_avg ['a_pct10'] = State_data_avg ['a_pct10'].str.replace('*', '0')
#State_data_avg ['a_pct10'] = State_data_avg ['a_pct10'].astype(int)           #convert string to integers --commented code out; caused error
State_data_avg .sort_values('a_pct10', ascending = False, inplace=True)
State_data_avg [["area_title","occ_title", "a_pct10"]].head(10)

Unnamed: 0,area_title,occ_title,a_pct10
174449,Alabama,Management Occupations,52130
178652,Alabama,Architecture and Engineering Occupations,43400
177760,Alabama,Computer and Mathematical Occupations,42150
176215,Alabama,Business and Financial Operations Occupations,35870
180252,Alabama,"Life, Physical, and Social Science Occupations",33570
182874,Alabama,Legal Occupations,31140
188035,Alabama,Healthcare Practitioners and Technical Occupat...,28850
181988,Alabama,Community and Social Service Occupations,25450
202705,Alabama,"Installation, Maintenance, and Repair Occupations",24560
200229,Alabama,Construction and Extraction Occupations,23610


In [35]:
# 25th percentile annual income by industry by state

State_data_avg = State_data[State_data["o_group"] == "major"]
State_data_avg .drop_duplicates(["occ_title"], keep="first", inplace=True) 
State_data_avg ['a_pct25'] = State_data_avg ['a_pct25'].str.replace(',', '')  #remove comma in numbers
State_data_avg ['a_pct25'] = State_data_avg ['a_pct25'].str.replace('*', '0')
#State_data_avg ['a_pct25'] = State_data_avg ['a_pct25'].astype(int)           #convert string to integers --commented code out; caused error
State_data_avg .sort_values('a_pct25', ascending = False, inplace=True)
State_data_avg [["area_title","occ_title", "a_pct25"]].head(10)

Unnamed: 0,area_title,occ_title,a_pct25
174449,Alabama,Management Occupations,70130
178652,Alabama,Architecture and Engineering Occupations,60530
177760,Alabama,Computer and Mathematical Occupations,56630
176215,Alabama,Business and Financial Operations Occupations,47780
182874,Alabama,Legal Occupations,43410
180252,Alabama,"Life, Physical, and Social Science Occupations",42870
188035,Alabama,Healthcare Practitioners and Technical Occupat...,39120
181988,Alabama,Community and Social Service Occupations,32960
202705,Alabama,"Installation, Maintenance, and Repair Occupations",32220
200229,Alabama,Construction and Extraction Occupations,29140


In [36]:
# 75th percentile annual income by industry by state

State_data_avg = State_data[State_data["o_group"] == "major"]
State_data_avg .drop_duplicates(["occ_title"], keep="first", inplace=True) 
State_data_avg ['a_pct75'] = State_data_avg ['a_pct75'].str.replace(',', '')  #remove comma in numbers
State_data_avg ['a_pct75'] = State_data_avg ['a_pct75'].str.replace('*', '0')
#State_data_avg ['a_pct75'] = State_data_avg ['a_pct75'].astype(int)           #convert string to integers --commented code out; caused error
State_data_avg .sort_values('a_pct75', ascending = False, inplace=True)
State_data_avg [["area_title","occ_title", "a_pct75"]].head(10)

Unnamed: 0,area_title,occ_title,a_pct75
176215,Alabama,Business and Financial Operations Occupations,86530
180252,Alabama,"Life, Physical, and Social Science Occupations",78040
188035,Alabama,Healthcare Practitioners and Technical Occupat...,73100
202705,Alabama,"Installation, Maintenance, and Repair Occupations",58170
183343,Alabama,"Education, Training, and Library Occupations",58020
181988,Alabama,Community and Social Service Occupations,54670
186263,Alabama,"Arts, Design, Entertainment, Sports, and Media...",54630
200229,Alabama,Construction and Extraction Occupations,49350
191846,Alabama,Protective Service Occupations,47320
205003,Alabama,Production Occupations,42270


In [37]:
# 90th percentile annual income by industry by state

State_data_avg = State_data[State_data["o_group"] == "major"]
State_data_avg .drop_duplicates(["occ_title"], keep="first", inplace=True) 
State_data_avg ['a_pct90'] = State_data_avg ['a_pct90'].str.replace(',', '')  #remove comma in numbers
State_data_avg ['a_pct90'] = State_data_avg ['a_pct90'].str.replace('*', '0')
#State_data_avg ['a_pct90'] = State_data_avg ['a_pct90'].astype(int)           #convert string to integers --commented code out; caused error
State_data_avg .sort_values('a_pct90', ascending = False, inplace=True)
State_data_avg [["area_title","occ_title", "a_pct90"]].head(10)

Unnamed: 0,area_title,occ_title,a_pct90
186263,Alabama,"Arts, Design, Entertainment, Sports, and Media...",78980
202705,Alabama,"Installation, Maintenance, and Repair Occupations",73320
183343,Alabama,"Education, Training, and Library Occupations",69650
181988,Alabama,Community and Social Service Occupations,64110
195740,Alabama,Sales and Related Occupations,63110
200229,Alabama,Construction and Extraction Occupations,62480
191846,Alabama,Protective Service Occupations,60710
205003,Alabama,Production Occupations,58360
209397,Alabama,Transportation and Material Moving Occupations,54620
196868,Alabama,Office and Administrative Support Occupations,53240


In [38]:
# 10th percentile hourly income by industry by state

State_data_avg = State_data[State_data["o_group"] == "major"]
State_data_avg .drop_duplicates(["occ_title"], keep="first", inplace=True) 
State_data_avg ['h_pct10'] = State_data_avg ['h_pct10'].str.replace(',', '')  #remove comma in numbers
State_data_avg ['h_pct10'] = State_data_avg ['h_pct10'].str.replace('*', '0')
#State_data_avg ['h_pct10'] = State_data_avg ['h_pct10'].astype(int)           #convert string to integers --commented code out; caused error
State_data_avg .sort_values('h_pct10', ascending = False, inplace=True)
State_data_avg [["area_title","occ_title", "h_pct10"]].head(10)

Unnamed: 0,area_title,occ_title,h_pct10
199736,Alabama,"Farming, Fishing, and Forestry Occupations",9.37
196868,Alabama,Office and Administrative Support Occupations,9.34
205003,Alabama,Production Occupations,9.21
186263,Alabama,"Arts, Design, Entertainment, Sports, and Media...",8.75
191846,Alabama,Protective Service Occupations,8.63
183343,Alabama,"Education, Training, and Library Occupations",8.6
209397,Alabama,Transportation and Material Moving Occupations,8.53
190940,Alabama,Healthcare Support Occupations,8.51
193814,Alabama,Building and Grounds Cleaning and Maintenance ...,8.12
195740,Alabama,Sales and Related Occupations,8.03


In [40]:
# 25th percentile hourly income by industry by state

State_data_avg = State_data[State_data["o_group"] == "major"]
State_data_avg .drop_duplicates(["occ_title"], keep="first", inplace=True) 
State_data_avg ['h_pct25'] = State_data_avg ['h_pct25'].str.replace(',', '')  #remove comma in numbers
State_data_avg ['h_pct25'] = State_data_avg ['h_pct25'].str.replace('*', '0')
#State_data_avg ['h_pct25'] = State_data_avg ['h_pct25'].astype(int)           #convert string to integers --commented code out; caused error
State_data_avg .sort_values('h_pct25', ascending = False, inplace=True)
State_data_avg [["area_title","occ_title", "h_pct25"]].head(10)

Unnamed: 0,area_title,occ_title,h_pct25
190940,Alabama,Healthcare Support Occupations,9.81
193814,Alabama,Building and Grounds Cleaning and Maintenance ...,9.0
195740,Alabama,Sales and Related Occupations,8.93
194371,Alabama,Personal Care and Service Occupations,8.4
192862,Alabama,Food Preparation and Serving Related Occupations,8.34
174449,Alabama,Management Occupations,33.71
178652,Alabama,Architecture and Engineering Occupations,29.1
177760,Alabama,Computer and Mathematical Occupations,27.23
176215,Alabama,Business and Financial Operations Occupations,22.97
182874,Alabama,Legal Occupations,20.87


In [41]:
# 75th percentile hourly income by industry by state

State_data_avg = State_data[State_data["o_group"] == "major"]
State_data_avg .drop_duplicates(["occ_title"], keep="first", inplace=True) 
State_data_avg ['h_pct75'] = State_data_avg ['h_pct75'].str.replace(',', '')  #remove comma in numbers
State_data_avg ['h_pct75'] = State_data_avg ['h_pct75'].str.replace('*', '0')
#State_data_avg ['h_pct75'] = State_data_avg ['h_pct75'].astype(int)           #convert string to integers --commented code out; caused error
State_data_avg .sort_values('h_pct75', ascending = False, inplace=True)
State_data_avg [["area_title","occ_title", "h_pct75"]].head(10)

Unnamed: 0,area_title,occ_title,h_pct75
174449,Alabama,Management Occupations,64.12
182874,Alabama,Legal Occupations,54.89
178652,Alabama,Architecture and Engineering Occupations,54.73
177760,Alabama,Computer and Mathematical Occupations,49.05
176215,Alabama,Business and Financial Operations Occupations,41.6
180252,Alabama,"Life, Physical, and Social Science Occupations",37.52
188035,Alabama,Healthcare Practitioners and Technical Occupat...,35.14
202705,Alabama,"Installation, Maintenance, and Repair Occupations",27.97
183343,Alabama,"Education, Training, and Library Occupations",27.89
181988,Alabama,Community and Social Service Occupations,26.28


In [42]:
# 90th percentile hourly income by industry by state

State_data_avg = State_data[State_data["o_group"] == "major"]
State_data_avg .drop_duplicates(["occ_title"], keep="first", inplace=True) 
State_data_avg ['h_pct90'] = State_data_avg ['h_pct90'].str.replace(',', '')  #remove comma in numbers
State_data_avg ['h_pct90'] = State_data_avg ['h_pct90'].str.replace('*', '0')
#State_data_avg ['h_pct90'] = State_data_avg ['h_pct90'].astype(int)           #convert string to integers --commented code out; caused error
State_data_avg .sort_values('h_pct90', ascending = False, inplace=True)
State_data_avg [["area_title","occ_title", "h_pct90"]].head(10)

Unnamed: 0,area_title,occ_title,h_pct90
174449,Alabama,Management Occupations,90.8
182874,Alabama,Legal Occupations,81.08
178652,Alabama,Architecture and Engineering Occupations,68.21
177760,Alabama,Computer and Mathematical Occupations,61.78
188035,Alabama,Healthcare Practitioners and Technical Occupat...,59.91
176215,Alabama,Business and Financial Operations Occupations,53.51
180252,Alabama,"Life, Physical, and Social Science Occupations",50.83
186263,Alabama,"Arts, Design, Entertainment, Sports, and Media...",37.97
202705,Alabama,"Installation, Maintenance, and Repair Occupations",35.25
183343,Alabama,"Education, Training, and Library Occupations",33.49
