## Which jobs grew in pay after adjusting for inflation. 

In [1]:
import sqlite3
import os
import pandas as pd

In [2]:
occupation_file_name = os.path.join('Occupation_Data.csv')

In [3]:
occupation = pd.read_csv(occupation_file_name, index_col=None)

In [4]:
occupation.head()

Unnamed: 0,OCC_CODE,OCC_TITLE,OCC_GROUP,TOT_EMP,EMP_PRSE,H_MEAN,A_MEAN,MEAN_PRSE,H_PCT10,H_PCT25,H_MEDIAN,H_PCT75,H_PCT90,A_PCT10,A_PCT25,A_MEDIAN,A_PCT75,A_PCT90,YEAR
0,00-0000,All Occupations,total,652100.0,1.3,21.99,45740.0,1.5,9.18,11.9,17.47,26.74,38.71,19100.0,24760.0,36350.0,55610.0,80510.0,2018
1,11-0000,Management Occupations,major,31180.0,2.6,49.67,103320.0,1.5,19.9,28.6,42.37,61.46,87.13,41400.0,59490.0,88120.0,127850.0,181230.0,2018
2,11-1011,Chief Executives,detailed,1210.0,6.1,73.53,152940.0,2.8,28.36,41.72,61.92,95.4,,58980.0,86770.0,128800.0,198420.0,,2018
3,11-1021,General and Operations Managers,detailed,10360.0,4.4,48.99,101900.0,2.1,17.27,24.89,38.13,61.21,96.22,35920.0,51770.0,79310.0,127320.0,200130.0,2018
4,11-2011,Advertising and Promotions Managers,detailed,60.0,16.8,53.04,110320.0,10.4,18.64,26.74,44.61,78.86,98.1,38770.0,55630.0,92780.0,164030.0,204050.0,2018


In economics the term real dollars is the value of currency after being adjusted for inflation. I will turn everything into 2018 dollars and to compute the difference I used an inflation calculator which can be found in the following website 
https://www.usinflationcalculator.com/

Adjusted for inflation, 1.00 in 2003 is equal to 1.36 in 2018, with a 36.5% cumulative rate of inflation.

Adjusted for inflation, 1.00 in 2008 is equal to 1.17 in 2018, with a 16.6% cumulative rate of inflation.

Adjusted for inflation, 1.00 in 2013 is equal to 1.08 in 2018, with a 7.8% cumulative rate of inflation.

The following sections will need to be adjusted based on their year. 
H_PCT10, H_PCT25, H_MEDIAN, H_CT75, H_PCT90, A_PCT10, A_PCT25, A_MEDIAN, A_PCT75, A_PCT90

In [5]:
year2018 = occupation['YEAR'] == 2018
year2013 = occupation['YEAR'] == 2013
year2008 = occupation['YEAR'] == 2008
year2003 = occupation['YEAR'] == 2003

In [6]:
occupation.columns[5:18]

for x in occupation.columns[5:18]:
    occupation.loc[(occupation.YEAR == 2013), x] *=1.08
    occupation.loc[(occupation.YEAR == 2008), x] *=1.17
    occupation.loc[(occupation.YEAR == 2003), x] *=1.36


In [7]:
dupes = occupation[occupation.duplicated(('OCC_CODE'), keep=False)]
dupes.sort_values('OCC_CODE')

Unnamed: 0,OCC_CODE,OCC_TITLE,OCC_GROUP,TOT_EMP,EMP_PRSE,H_MEAN,A_MEAN,MEAN_PRSE,H_PCT10,H_PCT25,H_MEDIAN,H_PCT75,H_PCT90,A_PCT10,A_PCT25,A_MEDIAN,A_PCT75,A_PCT90,YEAR
0,00-0000,All Occupations,total,652100.0,1.3,21.9900,45740.0,1.500,9.1800,11.9000,17.4700,26.7400,38.7100,19100.0,24760.0,36350.0,55610.0,80510.0,2018
1192,00-0000,All Occupations,,617800.0,1.4,21.8907,45536.4,1.521,9.1260,12.1329,17.4681,26.6175,37.8261,18989.1,25236.9,36328.5,55352.7,78682.5,2008
580,00-0000,All Occupations,total,609210.0,1.1,21.8700,45489.6,1.404,9.2340,11.5452,17.2908,26.6976,38.6100,19202.4,24008.4,35974.8,55533.6,80319.6,2013
1797,00-0000,All Occupations,,542460.0,0.9,22.2360,46253.6,1.496,9.7376,12.6208,17.8296,27.4584,38.3520,20236.8,26261.6,37100.8,57120.0,79777.6,2003
1193,11-0000,Management occupations,major,28690.0,2.1,50.5089,105054.3,1.638,23.4468,31.5081,44.0739,60.6294,88.9668,48765.6,65520.0,91681.2,126114.3,185047.2,2008
1,11-0000,Management Occupations,major,31180.0,2.6,49.6700,103320.0,1.500,19.9000,28.6000,42.3700,61.4600,87.1300,41400.0,59490.0,88120.0,127850.0,181230.0,2018
581,11-0000,Management Occupations,major,29000.0,2.3,48.7296,101347.2,1.188,21.5676,30.1320,42.4980,59.1948,84.3912,44863.2,62661.6,88387.2,123120.0,175532.4,2013
1798,11-0000,Management occupations,major,28090.0,2.3,48.7152,101333.6,1.088,21.0664,28.8864,41.9560,60.0168,85.6936,43819.2,60084.8,87284.8,124820.8,178255.2,2003
1194,11-1011,Chief executives,,1710.0,5.7,90.7569,188779.5,3.744,41.9562,59.9508,87.6213,,,87270.3,124686.9,182250.9,,,2008
582,11-1011,Chief Executives,detailed,1130.0,5.0,92.5668,192542.4,2.376,44.4096,60.8904,88.3008,,,92372.4,126640.8,183664.8,,,2013


In [8]:
occ_count=occupation.groupby('OCC_CODE')
occ_count_all = occ_count.size()
occ_count_4=occ_count.size() == 4
occ_count_3=occ_count.size() == 3
occ_count_2=occ_count.size() == 2
occ_count_1=occ_count.size() == 1

I am curious to see how occupations span across the years

In [9]:
print("Occupations that across all of the years")
print(occ_count_all.value_counts())

Occupations that across all of the years
4    384
2    139
3    128
1     80
5      5
dtype: int64


It is odd that there is a 5 in the options I need to figure out which year has duplicates by year.

In [10]:
dupe_year=occupation.groupby(['OCC_CODE', 'YEAR']).size() > 1
print(dupe_year.value_counts())
dupe_year_list=dupe_year[dupe_year == True]
dupe_year_list

False    2293
True        5
dtype: int64


OCC_CODE  YEAR
53-7061   2003    True
53-7062   2003    True
53-7063   2003    True
53-7064   2003    True
53-7081   2003    True
dtype: bool

2003 Seems to have produced all of the duplicates and here is the list of duplicate job codes 

I am going to have to remove the duplicate data from the dataset

In [11]:
occupation.to_sql("occ_table",sqlite3.connect('occ.db'), if_exists ="replace")

In [12]:
con = sqlite3.connect("occ.db")

In [13]:
occupation = pd.read_sql_query("SELECT * FROM occ_table WHERE YEAR=2003 AND OCC_CODE IN ('53-7061', '53-7062', '53-7063', '53-7064', '53-7081') ORDER BY OCC_CODE ASC", con)
occupation

Unnamed: 0,index,OCC_CODE,OCC_TITLE,OCC_GROUP,TOT_EMP,EMP_PRSE,H_MEAN,A_MEAN,MEAN_PRSE,H_PCT10,H_PCT25,H_MEDIAN,H_PCT75,H_PCT90,A_PCT10,A_PCT25,A_MEDIAN,A_PCT75,A_PCT90,YEAR
0,2292,53-7061,Cleaners of vehicles and equipment,,1260.0,15.7,13.1784,27404.0,6.664,8.1192,9.3432,11.5736,15.3408,19.6656,16891.2,19434.4,24085.6,31905.6,40908.8,2003
1,2298,53-7061,Cleaners of vehicles and equipment,,1320.0,12.4,13.9264,28954.4,10.336,8.0784,9.2344,11.5464,16.116,24.1672,16796.0,19203.2,24017.6,33524.0,50252.0,2003
2,2293,53-7062,"Laborers and freight, stock, and material move...",,12700.0,5.6,15.3,31824.0,6.12,9.5608,11.22,13.4912,16.4016,26.5336,19883.2,23337.6,28070.4,34122.4,55188.8,2003
3,2299,53-7062,"Laborers and freight, stock, and material move...",,12600.0,9.3,15.6672,32572.0,5.304,9.5336,11.4512,14.008,17.952,26.18,19842.4,23813.6,29131.2,37345.6,54454.4,2003
4,2294,53-7063,Machine feeders and offbearers,,1650.0,29.6,17.0952,35550.4,5.848,11.0432,13.0016,14.9736,20.4136,24.3304,22956.8,27036.8,31130.4,42445.6,50605.6,2003
5,2300,53-7063,Machine feeders and offbearers,,1410.0,29.9,17.068,35509.6,6.936,10.7848,12.6888,14.8512,20.4272,24.2624,22426.4,26397.6,30885.6,42486.4,50456.0,2003
6,2295,53-7064,"Packers and packagers, hand",,4970.0,11.3,11.9952,24956.0,2.312,8.2008,9.4248,11.3832,13.9264,17.5168,17054.4,19611.2,23691.2,28968.0,36420.8,2003
7,2301,53-7064,"Packers and packagers, hand",,4650.0,9.0,12.1448,25255.2,3.128,8.228,9.3432,11.3016,14.1984,18.2784,17095.2,19434.4,23514.4,29525.6,38025.6,2003
8,2296,53-7081,Refuse and recyclable material collectors,,400.0,29.9,16.1976,33687.2,6.936,10.4992,12.852,15.912,18.8088,22.6032,21855.2,26724.0,33088.8,39127.2,47001.6,2003
9,2302,53-7081,Refuse and recyclable material collectors,,420.0,29.0,15.7488,32748.8,8.024,10.7032,12.2672,14.5384,18.4824,22.1816,22249.6,25513.6,30232.8,38460.8,46131.2,2003


There are 736 unique occupations on this list of those occupations 384 occupations existed all four observed years, 128 existed for three observed years, 139 exitsed for two observed years, and 80 existed for only one observed year. 