In [178]:
%matplotlib inline

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from collections import Counter

In [193]:
# Import local clicks data into dataframe
locals_per_cc_df = pd.read_csv("total-locals-per-cc.csv", header=None)

# sort by clicks
locals_per_cc_df_copy = locals_per_cc_df.copy()
locals_per_cc_df_copy = locals_per_cc_df_copy.sort_values(0)
locals_per_cc_df_copy[1].value_counts()
# locals_per_cc_df_copy.head()

management       465
food             446
retail           434
mednurse         407
driver           389
install          378
education        357
customer         354
admin            353
sales            350
childcare        336
therapy          306
uncategorized    288
medtech          274
sanitation       273
service          267
accounting       253
warehouse        247
manufacturing    199
care             194
techsoftware     191
protective       187
hr               173
marketing        171
construction     160
pharmacy         159
techinfo         158
meddr            154
techhelp         150
project          148
medinfo          128
personal         124
sports           119
science          118
finance          112
meddental        109
engid            106
insurance         98
hospitality       98
veterinary        96
legal             92
arts              91
realestate        84
media             76
transport         70
math              68
engmech           61
engelectric  

In [180]:
# Import population data into dataframe
population_df = pd.read_csv("populations.csv")

# sort by population
population_df_copy = population_df.copy()
population_df_copy = population_df_copy[['city','Population']]
population_df_copy = population_df_copy.dropna()
# population_df_copy.head()

In [181]:
# Import unique jobs data into dataframe
unique_jobs_per_cc_df = pd.read_csv("total-unique-jobs-per-cc.csv", header=None)

# sort by most jobs
unique_jobs_per_cc_df_copy = unique_jobs_per_cc_df.copy()
unique_jobs_per_cc_df_copy = unique_jobs_per_cc_df_copy.sort_values(0)
# unique_jobs_per_cc_df_copy.head()
# unique_jobs_per_cc_df_copy.shape

In [182]:
# Import salary data into dataframe
salary_per_cc_df = pd.read_csv("avg-salary-per-cc.csv", header=None)

# sort by clicks
salary_per_cc_df_copy = salary_per_cc_df.copy()
salary_per_cc_df_copy = salary_per_cc_df_copy.sort_values(0)
# salary_per_cc_df_copy.head()

In [183]:
# Import MSA data into dataframe
msa_df = pd.read_csv("MSA_Counties_Cities.csv")

# sort by clicks
msa_df_copy = msa_df.copy()
# msa_df_copy.head()

In [184]:
# Import total clicks data into dataframe
clicks_per_cc_df = pd.read_csv("total-clicks-per-cc.csv", header=None)

# sort by clicks
clicks_per_cc_df_copy = clicks_per_cc_df.copy()
clicks_per_cc_df_copy = clicks_per_cc_df_copy.sort_values(0)
# clicks_per_cc_df_copy.head()
# clicks_per_cc_df_copy
# clicks_per_cc_df_copy.shape

In [185]:
def sortInterest(category): 
#     # check if category is not in dataframe to return the dataframe's size, which represents the worst city
#     if category not in locals_per_cc_df_copy[1]:
#         return len(locals_per_cc_df_copy.index)
    
    # final variable to test function
    CATEGORY = category

    # only given category, total local clicks
    local_clicks = locals_per_cc_df_copy.where(locals_per_cc_df_copy[1] == CATEGORY).dropna(subset=[1])

    # assume population is imported correctly; join with cities and drop any NaN
    local_clicks = pd.merge(local_clicks, population_df_copy, left_on=0, right_on='city')
    local_clicks = local_clicks.dropna()

    # make Interest column
    local_clicks['Interest'] = local_clicks[2] / local_clicks['Population']

    # sort based on highest Interest level at top
    local_clicks.sort_values('Interest', ascending=True, inplace=True)

    citiesInterest = local_clicks[0].values
    
    return citiesInterest


In [186]:
def sortAvailability(category):
#     # check if category is not in dataframe to return the dataframe's size, which represents the worst city
#     if category not in unique_jobs_per_cc_df_copy[1]:
#         return len(unique_jobs_per_cc_df_copy.index)
    
    # final variable to test function
    CATEGORY = category

    # only given category, total local clicks
    number_jobs = unique_jobs_per_cc_df_copy.where(unique_jobs_per_cc_df_copy[1] == CATEGORY).dropna(subset=[1])

    # assume population is imported correctly; join with cities and drop any NaN
    number_jobs = pd.merge(number_jobs, population_df_copy, left_on=0, right_on='city')
    number_jobs = number_jobs.dropna()

    # make Availability column
    number_jobs['Availability'] = number_jobs[2] / number_jobs['Population']

    # sort based on highest Availability level at top
    number_jobs.sort_values('Availability', ascending=False, inplace=True)

    citiesAvailability = number_jobs[0].values
    
    return citiesAvailability

In [187]:
def sortQuality(category):
#     # check if category is not in dataframe to return the dataframe's size, which represents the worst city
#     if category not in salary_per_cc_df_copy[1]:
#         return len(salary_per_cc_df_copy.index)
    
    # final variable to test function
    CATEGORY = category

    # only given category, total local clicks
    salary = salary_per_cc_df_copy.where(salary_per_cc_df_copy[1] == CATEGORY).dropna(subset=[1])

    # assume population is imported correctly; join with cities and drop any NaN
#     salary = salary.join(msa_df_copy, on="CITY ")
    salary = pd.merge(salary, msa_df_copy, left_on=0, right_on='CITY')
#     print(salary)
    salary = salary.dropna()

    # make Availability column
    salary['Quality'] = salary[2] / salary['C.O.L']

    # sort based on highest Availability level at top
    salary.sort_values('Quality', ascending=False, inplace=True)

    citiesQuality = salary[0].values
    
    return citiesQuality

In [190]:
def calculateRank(category):
    cities = clicks_per_cc_df_copy[0].unique()
    
    # calculate Interest, Availability, and Quality
    interest = sortInterest(category)
    availability = sortAvailability(category)
    quality = sortQuality(category)
    
    cities_scores = {}
    
    for city in cities:
        total_rank = 0
        try:
            total_rank += np.argwhere(interest == city).item()
            total_rank += np.argwhere(availability == city).item()
            total_rank += np.argwhere(quality == city).item()
        except:
            total_rank = 480
            
        cities_scores[city] = total_rank
    
    cities_rev = {v: k for k, v in cities_scores.items()}
    cities_rev = dict(sorted(cities_rev.items()))
    cities_ranked = [v for k, v in cities_rev.items()]
    return cities_ranked[:5], [cities_scores[city] for city in cities_ranked[:5]], cities_ranked[-4:], [cities_scores[city] for city in cities_ranked[-4:]]
#     return cities_ranked[:5], cities_ranked[-3:]
    
    

In [194]:
print(calculateRank("retail"))

(['Signal Hill', 'Montclair', 'Indian Wells', 'Arvin', 'Malibu'], [156, 202, 210, 213, 235], ['Desert Hot Springs', 'Indio', 'Santee', 'Woodland'], [580, 586, 587, 658])


In [192]:
# sortAvailability('math')

In [67]:
d = {'c':9,'b':8}
d_rev = {v: k for k, v in d.items()}

In [9]:
dict(sorted(d_rev.items()))

{8: 'b', 9: 'c'}

In [70]:
c = {}

In [71]:
c['Sanfranbisbo'] = 480

In [72]:
c

{'Sanfranbisbo': 480}

In [73]:
c.items()

dict_items([('Sanfranbisbo', 480)])