# SANDAG Sector Estimates Analysis
This is a calculator that takes in EDD data and aggregates it to specified SANDAG sectors. This also takes in estimates data and aggregates those to SANDAG sectors as well. Then a comparison between the two is drawn. 

See Purva's Excel for more information: https://sandag.sharepoint.com/:x:/r/qaqc/_layouts/15/Doc.aspx?sourcedoc=%7BCFD80313-6A49-43E6-B776-50C85CD17DD4%7D&file=EDD_Forecast%20Output%20Industry%20Level%20Jobs%20Comparison_QA.xlsx&action=default&mobileredirect=true&cid=9c4c556c-a447-4a46-8e56-ab5f1c95311a

In [2]:
import pandas as pd
import urllib.request  # For downloading the xlsx file
import pandas as pd
from sodapy import Socrata
import ssl
import sqlalchemy

# Grabbing EDD Data

In [79]:
# Sector Level Data
client = Socrata("data.edd.ca.gov", None)
results = client.get_all("r4zm-kdcg", area_name='San Diego-Carlsbad MSA')
results_df = pd.DataFrame.from_records(results)



In [80]:
# Cleaning the sector Data
edd_data = results_df[results_df['seasonally_adjusted']=='N'][['year', 'month', 'industry_title', 'current_employment']]
edd_data['date'] = edd_data.assign(day=1)[['year','month','day']].apply(lambda x: '-'.join(x.values.astype(str)), axis="columns")
edd_data['date'] = pd.to_datetime(edd_data['date']) #year-month-day
edd_data = edd_data.pivot(index='industry_title', columns='date', values='current_employment')
edd_data = edd_data.apply(pd.to_numeric)

In [81]:
# Grab the EDD sectors that we are interested in 
edd_breakdown = ['Mining and Logging','Total Farm','Construction','Manufacturing','Wholesale Trade','Retail Trade','Utilities','Transportation and Warehousing','Information','Finance and Insurance','Real Estate and Rental and Leasing','Professional, Scientific and Technical S','Management of Companies and Enterprises','Administrative and Support and Waste Ser','Educational Services','Health Care and Social Assistance','Arts, Entertainment, and Recreation','Accommodation','Food Services and Drinking Places','Other Services','Federal Government excluding Department of Defense','Department of Defense','State Government Education','State Government Excluding Education','Local Government Education','Local Government Excluding Education']
edd_data = edd_data[edd_data.index.isin(edd_breakdown)]

In [82]:
edd_data = pd.DataFrame(edd_data.iloc[:,-1]) # Filter for the latest data
edd_data.columns = ['Employment'] # Change the column to be custom

In [84]:
edd_data = edd_data.T

In [85]:
edd_data

industry_title,Accommodation,Administrative and Support and Waste Ser,"Arts, Entertainment, and Recreation",Construction,Department of Defense,Educational Services,Federal Government excluding Department of Defense,Finance and Insurance,Food Services and Drinking Places,Health Care and Social Assistance,...,Other Services,"Professional, Scientific and Technical S",Real Estate and Rental and Leasing,Retail Trade,State Government Education,State Government Excluding Education,Total Farm,Transportation and Warehousing,Utilities,Wholesale Trade
Employment,26300,94700,31300,85500,23400,26600,23400,45600,141200,192600,...,53800,158100,30600,140200,35900,18600,9900,32600,5100,44300


# Grabbing Estimates Data

In [86]:
estimates_data = pd.read_csv('C:/Users/cra/OneDrive - San Diego Association of Governments/SANDAG-Sector-Estimates-Analysis/region_ind_DS42.csv')

In [87]:
# Adjust by year
cleaned_e_data = estimates_data[estimates_data['year'] == 2020].reset_index()

# Creating Final Output

In [88]:
# Setting up the final dataframe
final_df = pd.DataFrame()

# Setting up the proper code titles
final_df.index = ['Mining, logging, total farm', 'Arts, Entertainment & Recreation', 'Construction', 'Department of Defence', 'Federal Government Excluding Department of Defence', 'Health Care and Social Assistance', 'Accomodation', 'Manufactoring', 'Other Services', 'Professional Services', 'Government Eductaion', 'Educational Services', 'Food Services', 'Retail Trade', 'Government Non-education', 'Transportation & Warehousing', 'Utilities', 'Wholesale Trade']

# Originally Set the Data to Zero
final_df['Estimates Data'] = 0
final_df['EDD Data'] = 0

# Filling in the final dataframe with  the correct values 

In [89]:
# Mining, logging, total farm 
final_df['Estimates Data']['Mining, logging, total farm'] = int(cleaned_e_data['emp_ag'])
final_df['EDD Data']['Mining, logging, total farm'] = int(edd_data['Mining and Logging']) + int(edd_data['Total Farm'])

# Arts, Entertainment & Recreation
final_df['Estimates Data']['Arts, Entertainment & Recreation'] = int(cleaned_e_data['emp_amusement'])
final_df['EDD Data']['Arts, Entertainment & Recreation'] = int(edd_data['Arts, Entertainment, and Recreation'])

# Construction
final_df['Estimates Data']['Construction'] = int(cleaned_e_data['emp_const_bldg_office']) + int(cleaned_e_data['emp_const_bldg_prod']) + int(cleaned_e_data['emp_const_non_bldg_office']) + int(cleaned_e_data['emp_const_non_bldg_prod'])
final_df['EDD Data']['Construction'] = int(edd_data['Construction'])

# Department of Defence 
final_df['Estimates Data']['Department of Defence'] = int(cleaned_e_data['emp_fed_mil'])
final_df['EDD Data']['Department of Defence'] = int(edd_data['Department of Defense']) # I don't see a number 27


# Federal Government Excluding Department of Defense
final_df['Estimates Data']['Federal Government Excluding Department of Defence'] = int(cleaned_e_data['emp_fed_non_mil'])
final_df['EDD Data']['Federal Government Excluding Department of Defence'] = int(edd_data['Federal Government excluding Department of Defense']) # fix spelling 

# Health Care & Social Assistance
final_df['Estimates Data']['Health Care and Social Assistance'] = int(cleaned_e_data['emp_health'])
final_df['EDD Data']['Health Care and Social Assistance'] = int(edd_data['Health Care and Social Assistance'])

# Accommodation
final_df['Estimates Data']['Accomodation'] = int(cleaned_e_data['emp_hotel'])
final_df['EDD Data']['Accomodation'] = int(edd_data['Accommodation'])


# Manufactoring 
final_df['Estimates Data']['Manufactoring'] = int(cleaned_e_data['emp_mfg_office']) + int(cleaned_e_data['emp_mfg_prod'])
final_df['EDD Data']['Manufactoring'] = int(edd_data['Manufacturing']) # Fix spellling 

# Other Services
final_df['Estimates Data']['Other Services'] = int(cleaned_e_data['emp_personal_svcs_office']) + int(cleaned_e_data['emp_personal_svcs_retail'])
final_df['EDD Data']['Other Services'] = int(edd_data['Other Services'])

# Professional Services 
final_df['Estimates Data']['Professional Services'] = int(cleaned_e_data['emp_prof_bus_svcs']) + int(cleaned_e_data['emp_prof_bus_svcs_bldg_maint'])
final_df['EDD Data']['Professional Services'] = int(edd_data['Information']) + int(edd_data['Finance and Insurance']) + int(edd_data['Real Estate and Rental and Leasing']) + int(edd_data['Professional, Scientific and Technical S']) + int(edd_data['Management of Companies and Enterprises']) + int(edd_data['Administrative and Support and Waste Ser'])

# Government Education - Spelling
final_df['Estimates Data']['Government Eductaion'] = int(cleaned_e_data['emp_public_ed'])
final_df['EDD Data']['Government Eductaion'] = int(edd_data['State Government Education']) + int(edd_data['Local Government Education'])

# Educational Services
final_df['Estimates Data']['Educational Services'] = int(cleaned_e_data['emp_pvt_ed_k12']) + int(cleaned_e_data['emp_pvt_ed_post_k12_oth'])
final_df['EDD Data']['Educational Services'] = int(edd_data['Educational Services'])


# Food Services
final_df['Estimates Data']['Food Services'] = int(cleaned_e_data['emp_restaurant_bar'])
final_df['EDD Data']['Food Services'] = int(edd_data['Food Services and Drinking Places']) #This is an assumption, not explicitly stated

# Retail Trade 
final_df['Estimates Data']['Retail Trade'] = int(cleaned_e_data['emp_retail'])
final_df['EDD Data']['Retail Trade'] = int(edd_data['Retail Trade'])

# Government Non-eduction
final_df['Estimates Data']['Government Non-education'] = int(cleaned_e_data['emp_state_local_gov_blue']) + int(cleaned_e_data['emp_state_local_gov_ent']) + int(cleaned_e_data['emp_state_local_gov_white'])
final_df['EDD Data']['Government Non-education'] = int(edd_data['State Government Excluding Education']) + int(edd_data['Local Government Excluding Education'])

# Transportation and Warehousing
final_df['Estimates Data']['Transportation & Warehousing'] = int(cleaned_e_data['emp_trans'])
final_df['EDD Data']['Transportation & Warehousing'] = int(edd_data['Transportation and Warehousing'])

# Utilities
final_df['Estimates Data']['Utilities'] = int(cleaned_e_data['emp_utilities_office']) + int(cleaned_e_data['emp_utilities_prod'])
final_df['EDD Data']['Utilities'] = int(edd_data['Utilities'])

# Wholesale Trade
final_df['Estimates Data']['Wholesale Trade'] = int(cleaned_e_data['emp_whsle_whs'])
final_df['EDD Data']['Wholesale Trade'] = int(edd_data['Wholesale Trade'])


final_df['Diff'] = final_df['Estimates Data'] - final_df['EDD Data']

In [90]:
final_df

Unnamed: 0,Estimates Data,EDD Data,Diff
"Mining, logging, total farm",9617,10200,-583
"Arts, Entertainment & Recreation",47201,31300,15901
Construction,86628,85500,1128
Department of Defence,131042,23400,107642
Federal Government Excluding Department of Defence,11841,23400,-11559
Health Care and Social Assistance,206789,192600,14189
Accomodation,34218,26300,7918
Manufactoring,112861,113200,-339
Other Services,71476,53800,17676
Professional Services,388189,375400,12789
