In [30]:
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns

In [31]:
# data pre-processing of the family income / comfortability dataset
df = pd.read_excel("fbc_data_2024.xlsx", sheet_name='County', header=1)
ut_df = df[df["State abv."] == 'UT']
cache_county_df = ut_df[ut_df['County'] == 'Cache County']

In [55]:
def CostOfLivingPerCounty(county_name, plot=False):
    specific_county_df = ut_df[ut_df['County'] == county_name]
    family_types = specific_county_df['Family']
    
    monthly_total_costs = specific_county_df['Total']
    yearly_total_costs = specific_county_df['Total.1']

    # Print costs to the console
    print(f"\nCosts for {county_name}:")
    for family, monthly_cost, yearly_cost in zip(family_types, monthly_total_costs, yearly_total_costs):
        print(f"\tFamily Type: {family}, Monthly Cost: ${monthly_cost}, Yearly Cost: ${yearly_cost}")

    if (plot):
        # Monthly costs
        plt.figure(figsize=(12, 8))
        plt.bar(family_types, monthly_total_costs, color='skyblue')
        plt.xlabel('Family Type')
        plt.ylabel('Total Cost of Living ($)')
        plt.title(f'Monthly Cost of Living by Family Type in {county_name}')
        plt.xticks(rotation=45)
        plt.show()

        # Yearly costs
        plt.figure(figsize=(12, 8))
        plt.bar(family_types, yearly_total_costs, color='lightgreen')
        plt.xlabel('Family Type')
        plt.ylabel('Yearly Cost of Living ($)')
        plt.title(f'Yearly Cost of Living by Family Type in {county_name}')
        plt.xticks(rotation=45)
        plt.show()

CostOfLivingPerCounty("Cache County", plot=False)
CostOfLivingPerCounty("Salt Lake County", plot=False)


Costs for Cache County:
	Family Type: 1p0c, Monthly Cost: $3457, Yearly Cost: $41484
	Family Type: 1p1c, Monthly Cost: $5450, Yearly Cost: $65400
	Family Type: 1p2c, Monthly Cost: $7132, Yearly Cost: $85584
	Family Type: 1p3c, Monthly Cost: $8934, Yearly Cost: $107208
	Family Type: 1p4c, Monthly Cost: $9779, Yearly Cost: $117348
	Family Type: 2p0c, Monthly Cost: $4706, Yearly Cost: $56472
	Family Type: 2p1c, Monthly Cost: $6534, Yearly Cost: $78408
	Family Type: 2p2c, Monthly Cost: $7950, Yearly Cost: $95400
	Family Type: 2p3c, Monthly Cost: $9564, Yearly Cost: $114768
	Family Type: 2p4c, Monthly Cost: $10456, Yearly Cost: $125472

Costs for Salt Lake County:
	Family Type: 1p0c, Monthly Cost: $3961, Yearly Cost: $47532
	Family Type: 1p1c, Monthly Cost: $6262, Yearly Cost: $75144
	Family Type: 1p2c, Monthly Cost: $7882, Yearly Cost: $94584
	Family Type: 1p3c, Monthly Cost: $9964, Yearly Cost: $119568
	Family Type: 1p4c, Monthly Cost: $10863, Yearly Cost: $130356
	Family Type: 2p0c, Mon

In [56]:
def TopCountiesByCost(num, descending=True):
    average_costs = {}
    
    for county in ut_df['County'].unique():
        county_data = ut_df[ut_df['County'] == county]
        average_monthly_cost = county_data['Total'].mean()
        average_costs[county] = average_monthly_cost

    # Sort the dictionary by average cost in descending order and get the top 3 counties
    top_counties = sorted(average_costs, key=average_costs.get, reverse=descending)[:num]

    adj = 'most' if descending else 'least'
    print(f"\nTop {num} {adj} expensive counties by monthly cost:")
    for county in top_counties:
        print(f"\t{county}: ${average_costs[county]:.2f}")

TopCountiesByCost(11)
TopCountiesByCost(11, descending=False)


Top 11 most expensive counties by monthly cost:
	Summit County: $9009.40
	Morgan County: $8739.90
	Salt Lake County: $8212.30
	Washington County: $8098.80
	Wasatch County: $8098.70
	Davis County: $8096.90
	Utah County: $7683.20
	Weber County: $7655.70
	Juab County: $7636.30
	Kane County: $7589.20
	Grand County: $7586.20

Top 11 least expensive counties by monthly cost:
	Carbon County: $7085.10
	Sevier County: $7207.20
	Beaver County: $7218.90
	Emery County: $7225.80
	Duchesne County: $7254.60
	Sanpete County: $7260.40
	Wayne County: $7270.80
	Garfield County: $7282.70
	Rich County: $7319.30
	Millard County: $7335.80
	Uintah County: $7370.70


In [66]:
# pre-processing wages dataset
df = pd.read_excel("wages.xlsx")
wage_df = df[~df['Area Name'].isin(['United States', 'Statewide'])]

def wagesAnalysis(inexperienced=True):

    adj = 'Inexperienced' if inexperienced else 'Median'

    average_salary_by_job = wage_df.groupby('Job Title')[f'Annual {adj}'].mean().reset_index()
    best_paying_jobs = average_salary_by_job.sort_values(by=f'Annual {adj}', ascending=False).head()
    worst_paying_jobs = average_salary_by_job.sort_values(by=f'Annual {adj}', ascending=True).head()

    print("best paying jobs:")
    display(best_paying_jobs)
    print("worst paying jobs:")
    display(worst_paying_jobs)

    average_salary_by_area = wage_df.groupby('Area Name')[f'Annual {adj}'].mean().reset_index()
    best_locations = average_salary_by_area.sort_values(by=f'Annual {adj}', ascending=False).head()
    worst_locations = average_salary_by_area.sort_values(by=f'Annual {adj}', ascending=True).head()

    print("best locations:")
    display(best_locations)
    print("worst locations:")
    display(worst_locations)

wagesAnalysis()
wagesAnalysis(inexperienced=False)


best paying jobs:


Unnamed: 0,Job Title,Annual Inexperienced
3,Software Developers,57732.857143
0,Computer Programmers,56783.333333
1,Computer Systems Analysts,56628.0
2,Network and Computer Systems Administrators,54672.857143
4,Software Quality Assurance Analysts and Testers,42452.0


worst paying jobs:


Unnamed: 0,Job Title,Annual Inexperienced
5,Web Developers,31845.0
4,Software Quality Assurance Analysts and Testers,42452.0
2,Network and Computer Systems Administrators,54672.857143
1,Computer Systems Analysts,56628.0
0,Computer Programmers,56783.333333


best locations:


Unnamed: 0,Area Name,Annual Inexperienced
1,Central Southwest Utah,58120.0
3,Ogden-Clearfield Metro,57068.333333
5,Salt Lake Metro,53773.333333
4,Provo-Orem Metro,52250.0
2,Eastern Utah,47792.5


worst locations:


Unnamed: 0,Area Name,Annual Inexperienced
0,Cache,41580.0
6,St George Metro,46428.333333
2,Eastern Utah,47792.5
4,Provo-Orem Metro,52250.0
5,Salt Lake Metro,53773.333333


best paying jobs:


Unnamed: 0,Job Title,Annual Median
3,Software Developers,101461.428571
0,Computer Programmers,85695.0
1,Computer Systems Analysts,84830.0
2,Network and Computer Systems Administrators,80840.0
4,Software Quality Assurance Analysts and Testers,67804.0


worst paying jobs:


Unnamed: 0,Job Title,Annual Median
5,Web Developers,60238.333333
4,Software Quality Assurance Analysts and Testers,67804.0
2,Network and Computer Systems Administrators,80840.0
1,Computer Systems Analysts,84830.0
0,Computer Programmers,85695.0


best locations:


Unnamed: 0,Area Name,Annual Median
1,Central Southwest Utah,90910.0
3,Ogden-Clearfield Metro,87093.333333
5,Salt Lake Metro,86841.666667
4,Provo-Orem Metro,84166.666667
2,Eastern Utah,77827.5


worst locations:


Unnamed: 0,Area Name,Annual Median
0,Cache,69388.333333
6,St George Metro,76135.0
2,Eastern Utah,77827.5
4,Provo-Orem Metro,84166.666667
5,Salt Lake Metro,86841.666667
