## Which jobs grew in pay after adjusting for inflation?

In [1]:
import sqlite3
import os
import pandas as pd
import matplotlib.pyplot as plt

### I cleaned the data in the other file and this contains only the clean dataset.

In [2]:
occupation_file_name = os.path.join('new_occupation.csv')

In [3]:
occupation = pd.read_csv(occupation_file_name, index_col=0)

In [25]:
occupation["OCC_TITLE"] = occupation["OCC_TITLE"].str.lower()
occupation.head()

Unnamed: 0,OCC_CODE,OCC_TITLE,OCC_GROUP,TOT_EMP,EMP_PRSE,H_MEAN,A_MEAN,MEAN_PRSE,H_PCT10,H_PCT25,...,A_PCT90,YEAR,Unnamed: 19,H_CHANGE,A_CHANGE,FOUR_YEAR,THREE_YEAR,TWO_YEAR,ONE_YEAR,GAP_YEAR
1797,00-0000,all occupations,,542460.0,0.9,30.24096,62904.896,2.03456,13.243136,17.164288,...,108497.536,2003,,,,True,False,False,False,False
1798,11-0000,management occupations,major,28090.0,2.3,66.252672,137813.696,1.47968,28.650304,39.285504,...,242427.072,2003,,,,True,False,False,False,False
1799,11-1011,chief executives,,2070.0,4.8,120.612416,250879.744,4.06912,52.880064,74.205952,...,,2003,,,,True,False,False,False,False
1800,11-1021,general and operations managers,,8730.0,2.7,66.419136,138165.12,2.58944,28.354368,39.063552,...,246181.76,2003,,,,True,False,False,False,False
1801,11-1031,legislators,,90.0,28.3,28.927744,60185.984,17.75616,11.227072,13.243136,...,85414.528,2003,,,,False,True,False,False,False


In economics the term real dollars is the value of currency after being adjusted for inflation. I will turn everything into 2018 dollars and to compute the difference I used an inflation calculator which can be found in the following website 
https://www.usinflationcalculator.com/

Adjusted for inflation, 1.00 in 2003 is equal to 1.36 in 2018, with a 36.5% cumulative rate of inflation.

Adjusted for inflation, 1.00 in 2008 is equal to 1.17 in 2018, with a 16.6% cumulative rate of inflation.

Adjusted for inflation, 1.00 in 2013 is equal to 1.08 in 2018, with a 7.8% cumulative rate of inflation.

The following sections will need to be adjusted based on their year. 
H_PCT10, H_PCT25, H_MEDIAN, H_CT75, H_PCT90, A_PCT10, A_PCT25, A_MEDIAN, A_PCT75, A_PCT90

## Converting to 2018 dollars

In [5]:
#This formula converts each year to 2018 dollars, which contorls for inflation. 
occupation.columns[5:18]

for x in occupation.columns[5:18]:
    occupation.loc[(occupation.YEAR == 2013), x] *=1.08
    occupation.loc[(occupation.YEAR == 2008), x] *=1.17
    occupation.loc[(occupation.YEAR == 2003), x] *=1.36


## Checking to see how many occupations existed across all four sample years\


In [6]:
#This counts the number of instances of a specific job code. As you can see there are 389 jobs that where in all four 
#instances, 128 in three, 139 in two, and 80 in 1. 

occ_count=occupation.groupby('OCC_CODE')
occ_count_all = occ_count.size()
occ_count_all.value_counts()

4    389
2    139
3    128
1     80
dtype: int64

In [7]:
#made dictionaries that can be used to show years
total_instance=occupation.groupby(['OCC_CODE'])
total_instance=total_instance.size()
dict4={}
dict3={}
dict2={}
dict1={}
for y, x in total_instance.items():
    if x == 4:
        dict4.update({y:x})
    elif x == 3:
        dict3.update({y:x})
    elif x == 2:
        dict2.update({y:x})
    else:
        dict1.update({y:x})   

In [8]:
#I am going to create a dictionary that will allow me to organize the years.
check=(occupation.groupby(['OCC_CODE','YEAR'])).size()

#dictuionary containing each year and the years that fall in it. 
occ_yeardict={}
for y,x in check.items():
    (a,b)=y
    if a in occ_yeardict:
        t=occ_yeardict[a]
        t = t + (b,)
        occ_yeardict.update({a:t})
    else:
        occ_yeardict.update({a:(b,)})

#dictuionary containing each gaps and the length. 
dictgap={}
for x, y in occ_yeardict.items():
    z=pd.np.diff(y)
    if (len(z)>0) and ((10 in z)or(15 in z)):
        if z[0]>5:
            dictgap.update({x:(y[0:2],z[0])})
        elif z[1]>5:
            dictgap.update({x:(y[1:],z[1])})
    

In [9]:
#Determine the number of occupations with gaps
len(dictgap)

43

In [10]:
#sort values for percent change
occupation.sort_values(['YEAR', 'OCC_CODE'], ascending=[1, 1], inplace=True)

In [11]:
#create percent change for mean items
occupation['H_CHANGE'] = occupation.groupby('OCC_CODE').H_MEAN.pct_change()
occupation['A_CHANGE'] = occupation.groupby('OCC_CODE').A_MEAN.pct_change()

In [12]:
#I am using this to create new rows and then I am collecting the location for further manipulation
occupation = occupation.assign(**{'FOUR_YEAR':"False",'THREE_YEAR': "False",'TWO_YEAR': "False",'ONE_YEAR':"False",'GAP_YEAR':"False"})
y4_loc=occupation.columns.get_loc('FOUR_YEAR')
y3_loc=occupation.columns.get_loc('THREE_YEAR')
y2_loc=occupation.columns.get_loc('TWO_YEAR')
y1_loc=occupation.columns.get_loc('ONE_YEAR')
ygap_loc=occupation.columns.get_loc('GAP_YEAR')

In [13]:
# I am using this to populate the rows. 
locnumber=0
for index, row in occupation.iterrows():
    occupation.iloc[locnumber:,y4_loc] = (row['OCC_CODE'] in dict4)
    occupation.iloc[locnumber:,y3_loc] = (row['OCC_CODE'] in dict3)
    occupation.iloc[locnumber:,y2_loc] = (row['OCC_CODE'] in dict2)
    occupation.iloc[locnumber:,y1_loc] = (row['OCC_CODE'] in dict1)
    occupation.iloc[locnumber:,ygap_loc] = (row['OCC_CODE'] in dictgap)
    locnumber +=1

In [14]:
#Here is data that is just Occupation data that has each based on the different ways I sepereated them.  
occupation_4 = occupation[occupation['FOUR_YEAR'] == True]
occupation_3 = occupation[(occupation['THREE_YEAR'] == True) & (occupation['GAP_YEAR'] == False)]
occupation_2 = occupation[(occupation['TWO_YEAR'] == True) & (occupation['GAP_YEAR'] == False)]
occupation_1 = occupation[(occupation['ONE_YEAR'] == True) & (occupation['GAP_YEAR'] == False)]
occupation_gap = occupation[(occupation['GAP_YEAR'] == True) & (occupation['GAP_YEAR'] == False)]

occupation_1

Unnamed: 0,OCC_CODE,OCC_TITLE,OCC_GROUP,TOT_EMP,EMP_PRSE,H_MEAN,A_MEAN,MEAN_PRSE,H_PCT10,H_PCT25,...,A_PCT90,YEAR,Unnamed: 19,H_CHANGE,A_CHANGE,FOUR_YEAR,THREE_YEAR,TWO_YEAR,ONE_YEAR,GAP_YEAR
1808,11-3040,human resources managers,,860.0,5.7,59.631104,124034.176,3.88416,31.924096,40.062336,...,195595.200,2003,,,,False,False,False,True,False
1926,25-4010,"archivists, curators, and museum technicians",,60.0,24.6,33.385280,69433.984,17.75616,17.571200,21.214912,...,93497.280,2003,,,,False,False,False,True,False
1944,27-3010,announcers,,290.0,13.0,27.577536,57374.592,46.05504,10.727680,12.059392,...,186069.760,2003,,,,False,False,False,True,False
1945,27-3020,"news analysts, reporters and correspondents",,210.0,7.7,35.678784,74205.952,27.55904,15.240704,19.180352,...,130859.200,2003,,,,False,False,False,True,False
1958,29-1020,dentists,,330.0,31.6,105.038784,218493.248,16.27648,50.753024,69.896384,...,,2003,,,,False,False,False,True,False
1995,29-9010,occupational health and safety specialists and...,,130.0,20.4,43.835520,91166.784,7.95328,23.952320,31.646656,...,134576.896,2003,,,,False,False,False,True,False
2040,37-3013,tree trimmers and pruners,,,,26.819200,55765.440,13.87200,22.306176,25.542976,...,64384.576,2003,,,,False,False,False,True,False
2144,47-2130,insulation workers,,120.0,18.7,35.382848,73595.584,18.12608,19.661248,23.711872,...,114693.696,2003,,,,False,False,False,True,False
2196,49-9064,watch repairers,,,,30.536896,63496.768,24.04480,16.849856,19.901696,...,112733.120,2003,,,,False,False,False,True,False
2199,49-9095,manufactured building and mobile home installers,,,,26.301312,54711.168,7.21344,18.237056,20.364096,...,71672.000,2003,,,,False,False,False,True,False


In [29]:
occupation["OCC_TITLE"] = occupation["OCC_TITLE"].str.lower()
#occupation.loc[occupation.OCC_TITLE.isupper(), 'OCC_TITLE"] = occupation.OCC_TITLE.str.lower()
occupation[occupation["OCC_TITLE"].str.contains("econ")]

Unnamed: 0,OCC_CODE,OCC_TITLE,OCC_GROUP,TOT_EMP,EMP_PRSE,H_MEAN,A_MEAN,MEAN_PRSE,H_PCT10,H_PCT25,...,A_PCT90,YEAR,Unnamed: 19,H_CHANGE,A_CHANGE,FOUR_YEAR,THREE_YEAR,TWO_YEAR,ONE_YEAR,GAP_YEAR
1814,11-9032,"education administrators, elementary and secon...",,560.0,4.0,,144990.144,10.54272,,,...,183258.368,2003,,,,True,False,False,False,False
1908,25-1011,"business teachers, postsecondary",,180.0,4.4,,124274.624,27.55904,,,...,213665.792,2003,,,,True,False,False,False,False
1909,25-1021,"computer science teachers, postsecondary",,80.0,3.9,,124607.552,25.15456,,,...,199053.952,2003,,,,False,True,False,False,False
1910,25-1022,"mathematical science teachers, postsecondary",,90.0,14.9,,97381.440,18.49600,,,...,157326.976,2003,,,,True,False,False,False,False
1911,25-1052,"chemistry teachers, postsecondary",,50.0,19.1,,114360.768,16.83136,,,...,179707.136,2003,,,,False,True,False,False,False
1912,25-1054,"physics teachers, postsecondary",,30.0,41.4,,127455.936,9.98784,,,...,186883.584,2003,,,,False,False,True,False,True
1913,25-1066,"psychology teachers, postsecondary",,70.0,13.9,,116580.288,13.50208,,,...,173085.568,2003,,,,False,True,False,False,False
1914,25-1072,"nursing instructors and teachers, postsecondary",,160.0,29.9,,92424.512,9.98784,,,...,134225.472,2003,,,,True,False,False,False,False
1915,25-1121,"art, drama, and music teachers, postsecondary",,190.0,21.9,,82270.208,17.01632,,,...,131876.480,2003,,,,True,False,False,False,False
1916,25-1122,"communications teachers, postsecondary",,60.0,26.0,,92369.024,9.98784,,,...,150686.912,2003,,,,False,True,False,False,False


In [17]:
#new_occupation['YEAR'] = pd.to_datetime(new_occupation['YEAR'], format = "%Y")

### SQL STUFF

In [18]:
occupation_4.to_sql("occ_table",sqlite3.connect('occ.db'), if_exists ="replace")

  dtype=dtype, method=method)


In [19]:
con = sqlite3.connect("occ.db")

In [30]:
#This is sorting the data by the average hourly percent change. Where those with the highest percent change fairing better.   
pd.read_sql_query("SELECT OCC_CODE,OCC_TITLE,AVG(H_CHANGE) as H_average, AVG(A_CHANGE) as A_average FROM occ_table GROUP BY OCC_CODE ORDER BY H_average DESC LIMIT 10", con)

Unnamed: 0,OCC_CODE,OCC_TITLE,H_average,A_average
0,27-2012,producers and directors,0.134717,0.008545
1,11-9061,funeral directors,0.109731,0.1097
2,49-9098,"helpers--installation, maintenance, and repair...",0.080048,0.079867
3,19-4091,environmental science and protection technicia...,0.042304,0.042487
4,11-2011,advertising and promotions managers,0.040898,0.040891
5,11-3061,purchasing managers,0.039929,0.039937
6,29-1066,psychiatrists,0.036577,0.036582
7,39-9032,recreation workers,0.031287,0.031165
8,13-2082,tax preparers,0.02921,0.029084
9,27-4032,film and video editors,0.021536,0.021562


In [44]:
#offset it to avoid na data
pd.read_sql_query("SELECT OCC_CODE,OCC_TITLE,AVG(H_CHANGE) as H_average, AVG(A_CHANGE) as H_average FROM occ_table GROUP BY OCC_CODE ORDER BY H_average ASC LIMIT 10 OFFSET 12", con)

Unnamed: 0,OCC_CODE,OCC_TITLE,H_average,H_average.1
0,29-1011,chiropractors,-0.242064,-0.242086
1,13-1121,meeting and convention planners,-0.225118,-0.225118
2,13-2021,appraisers and assessors of real estate,-0.220821,-0.220818
3,51-3091,"food and tobacco roasting, baking, and drying ...",-0.190703,-0.190689
4,17-3025,environmental engineering technicians,-0.180274,-0.180318
5,51-4023,"rolling machine setters, operators, and tender...",-0.179433,-0.179492
6,53-7021,crane and tower operators,-0.168824,-0.168827
7,49-3022,automotive glass installers and repairers,-0.168186,-0.168263
8,25-1194,"vocational education teachers, postsecondary",-0.167822,-0.167853
9,51-4122,"welding, soldering, and brazing machine setter...",-0.161642,-0.161551


In [45]:
#This is sorting the data by the average annual percent change. Where those with the highest percent change fairing better.   
pd.read_sql_query("SELECT OCC_CODE,OCC_TITLE,AVG(H_CHANGE) as H_average, AVG(A_CHANGE) as A_average FROM occ_table GROUP BY OCC_CODE ORDER BY A_average DESC LIMIT 10", con)

Unnamed: 0,OCC_CODE,OCC_TITLE,H_average,A_average
0,11-9061,funeral directors,0.109731,0.1097
1,25-1121,"art, drama, and music teachers, postsecondary",,0.082466
2,49-9098,"helpers--installation, maintenance, and repair...",0.080048,0.079867
3,53-2012,commercial pilots,,0.04979
4,19-4091,environmental science and protection technicia...,0.042304,0.042487
5,11-2011,advertising and promotions managers,0.040898,0.040891
6,11-3061,purchasing managers,0.039929,0.039937
7,29-1066,psychiatrists,0.036577,0.036582
8,39-9032,recreation workers,0.031287,0.031165
9,13-2082,tax preparers,0.02921,0.029084


In [46]:
#This is sorting the data by the average annual percent change. Where those with the lowest percent change fairing better.   
pd.read_sql_query("SELECT OCC_CODE,OCC_TITLE,AVG(H_CHANGE) as H_average, AVG(A_CHANGE) as A_average FROM occ_table GROUP BY OCC_CODE ORDER BY A_average ASC LIMIT 10", con)

Unnamed: 0,OCC_CODE,OCC_TITLE,H_average,A_average
0,29-1011,chiropractors,-0.242064,-0.242086
1,13-1121,meeting and convention planners,-0.225118,-0.225118
2,13-2021,appraisers and assessors of real estate,-0.220821,-0.220818
3,29-9091,athletic trainers,,-0.192253
4,51-3091,"food and tobacco roasting, baking, and drying ...",-0.190703,-0.190689
5,17-3025,environmental engineering technicians,-0.180274,-0.180318
6,51-4023,"rolling machine setters, operators, and tender...",-0.179433,-0.179492
7,53-7021,crane and tower operators,-0.168824,-0.168827
8,49-3022,automotive glass installers and repairers,-0.168186,-0.168263
9,25-1194,"vocational education teachers, postsecondary",-0.167822,-0.167853
