In [63]:
import pandas as pd
from pathlib import Path
import glob
import numpy as np
from datetime import date
from datetime import datetime
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
import ipywidgets as widgets
from ipywidgets import interact, interact_manual

* [State Occupancy](#state_occupancy)

In [20]:
counties_dict = {'AZ': ['Maricopa', 'Yavapai','Pima'],
                 'CA': ['Los Angeles', 'Orange', 'Sonoma','Mendocino','Ventura','Santa Barbara','San Diego',
                        'San Bernardino','Riverside'],
                 'CO': ['Pueblo', 'Larimer', 'Jefferson', 'El Paso', 'Denver', 'Boulder', 'Arapahoe', 'Adams'],
                 'IA': ['Wright', 'Webster', 'Sioux', 'Polk', 'Palo Alto', 'Cherokee'],
                 'ID': ['Valley', 'Owyhee','Madison','Lemhi','Gooding','Gem','Cassia','Canyon','Bannock','Ada'],
                 'KS': ['Wyandotte','Shawnee','Johnson','Franklin'],
                 'NE': ['Wayne','Richardson','Gage','Douglas','Cedar'],
                 'NV': ['Washoe'],
                 'SC': ['York','Lexington','Horry'],
                 'TX': ['Victoria','Polk','Harris','Gregg','Galveston','Fort Bend','Angelina','Williamson','Travis','Hays',
                        'Comal','Burnet','Bell','Van Zandt','Rockwall','Lamar','Kaufman','Hunt','Ellis','Denton','Dallas','Collin',
                        'Taylor','Tarrant','Randall','Potter','Lubbock','Hidalgo','Cameron','Bexar'],
                 'UT':['Weber','Salt Lake','Washington','Utah','Iron','Carbon'],
                 'WA':['Walla Walla','Thurston','Snohomish','Skagit','Pierce','Kitsap','King','Grays Harbor','Cowlitz','Clark'],
                 'WI':['Waupaca']
                 }
states = counties_dict.keys()

In [3]:
def generate_occ_table(df):
    occupancy_table = pd.pivot_table(df, values=['occupancy'], index=['provnum'], columns=['year', 'month'])
    occupancy_table['provnum'] = occupancy_table.index
    occupancy_table = occupancy_table.reset_index(drop=True)
    if df['ensign'][0]:
        occupancy_table = occupancy_table.fillna(occupancy_table.mean())
    else:
        occupancy_table = occupancy_table.dropna()
         
    return occupancy_table

In [17]:
import sqlalchemy as db
engine = db.create_engine('postgresql+psycopg2://postgres:1111@localhost/postgres')
connection = engine.connect()
metadata = db.MetaData()
occupancy = db.Table('occ', metadata, autoload=True, autoload_with=engine)
query=db.select([occupancy]).where(db.and_(occupancy.columns.ensign == False,
                                          occupancy.columns.state == 'CA'
                                          ))
# query=db.select([occ]).where(occupancy.columns.state=='CA')
df = pd.read_sql_query(query, con=engine)
prov_list = df['provnum'].unique().tolist()
len(prov_list)
df[df['year']==2020]['month'].unique()

array([ 4,  5,  6,  7,  8,  9, 10, 11, 12,  1,  2,  3])

In [5]:
# Ensign missing Jan-March 2020: AZ, IA, ID, KS, NE, NV, SC, UT, WI (2018 Oct-Dec, 2019 Jan-Sep: NV)

# State missing:
# 2020 Jan-March: NV, 

df_2020 = pd.read_csv('pbj_nurse_2020_Q1.csv', dtype={'provnum': str})
df_2020.head()
df_2020['provnum'] = df_2020['provnum'].apply(lambda x: str(x).zfill(6))

In [6]:
ne_missing_provnums = {'LEGACY SQUARE': '28E173',
 'PIONEER MEMORIAL COMMUNITY HOSPITAL':'28E175',
 'REGIONAL WEST GARDEN COUNTY NURSING HOME':'28E180',
 'MEMORIAL COMMUNITY CARE': '28E191',
 'GOLDEN OURS CONVALESCENT HOME':'28E199',
 'GORDON COUNTRYSIDE CARE':'28E257',
 'GENOA COMMUNITY HOSPITAL/LTC':'28E271',
 'HILLCREST SHADOW LAKE':'28E299', 
 'HEMINGFORD COMMUNITY CARE CENTER': '28E301'
}
provnames = ne_missing_provnums.keys()

for provname in provnames:
    df_2020['provnum'][df_2020['provname'] == provname] = ne_missing_provnums[provname]

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_2020['provnum'][df_2020['provname'] == provname] = ne_missing_provnums[provname]


In [7]:
df_2020[df_2020['state']=='NE']['provnum'].unique()

array(['285004.0', '285019.0', '285036.0', '285049.0', '285057.0',
       '285058.0', '285062.0', '285066.0', '285067.0', '285072.0',
       '285073.0', '285078.0', '285080.0', '285081.0', '285085.0',
       '285088.0', '285092.0', '285094.0', '285096.0', '285098.0',
       '285104.0', '285106.0', '285108.0', '285109.0', '285112.0',
       '285115.0', '285116.0', '285117.0', '285118.0', '285119.0',
       '285124.0', '285126.0', '285127.0', '285131.0', '285133.0',
       '285138.0', '285139.0', '285148.0', '285151.0', '285156.0',
       '285157.0', '285160.0', '285163.0', '285164.0', '285165.0',
       '285166.0', '285172.0', '285173.0', '285175.0', '285176.0',
       '285177.0', '285185.0', '285187.0', '285189.0', '285190.0',
       '285191.0', '285192.0', '285193.0', '285197.0', '285200.0',
       '285202.0', '285203.0', '285204.0', '285207.0', '285209.0',
       '285210.0', '285212.0', '285213.0', '285216.0', '285220.0',
       '285232.0', '285239.0', '285241.0', '285245.0', '285246

In [8]:
df_2020.astype({'provnum': str})
df_2020.to_csv('pbj_nurse_2020_Q1.csv', index=False)

In [9]:
# find provnum errors in 2020_Q1 raw data
ne_list = ['2.8e+174','2.7999999999999997e+176', '2.8000000000000002e+181','2.8000000000000003e+192', '2.8000000000000002e+200',
       '2.8000000000000002e+258', '2.8e+272', '2.8e+300', '2.8000000000000003e+302']
df_ne = df_2020[df_2020['state']=='NE']
df_ne[df_ne['provnum'].isin(ne_list)]['provname'].unique()

array([], dtype=object)

In [10]:
# look for rows in prov_list (ensign list)

# df_check = df_2020[df_2020['provnum'].isin(prov_list)]
# df_check

In [32]:
occupancy_table = generate_occ_table(df)
columns = occupancy_table.columns
columns

MultiIndex([('occupancy', 2017,  1),
            ('occupancy', 2017,  2),
            ('occupancy', 2017,  3),
            ('occupancy', 2017,  4),
            ('occupancy', 2017,  5),
            ('occupancy', 2017,  6),
            ('occupancy', 2017,  7),
            ('occupancy', 2017,  8),
            ('occupancy', 2017,  9),
            ('occupancy', 2017, 10),
            ('occupancy', 2017, 11),
            ('occupancy', 2017, 12),
            ('occupancy', 2018,  1),
            ('occupancy', 2018,  2),
            ('occupancy', 2018,  3),
            ('occupancy', 2018,  4),
            ('occupancy', 2018,  5),
            ('occupancy', 2018,  6),
            ('occupancy', 2018,  7),
            ('occupancy', 2018,  8),
            ('occupancy', 2018,  9),
            ('occupancy', 2018, 10),
            ('occupancy', 2018, 11),
            ('occupancy', 2018, 12),
            ('occupancy', 2019,  1),
            ('occupancy', 2019,  2),
            ('occupancy', 2019,  3),
 

In [40]:
pre_covid = occupancy_table.iloc[:,-14::-1]
pre_covid = pre_covid.iloc[:,::-1] # reverse column order back
pre_covid

Unnamed: 0_level_0,occupancy,occupancy,occupancy,occupancy,occupancy,occupancy,occupancy,occupancy,occupancy,occupancy,occupancy,occupancy,occupancy,occupancy,occupancy,occupancy,occupancy,occupancy,occupancy,occupancy,occupancy
year,2017,2017,2017,2017,2017,2017,2017,2017,2017,2017,...,2019,2019,2019,2019,2019,2019,2019,2020,2020,2020
month,1,2,3,4,5,6,7,8,9,10,...,6,7,8,9,10,11,12,1,2,3
1,0.848485,0.843795,0.839361,0.872391,0.838710,0.806397,0.827305,0.845226,0.811111,0.802216,...,0.863636,0.857282,0.864777,0.845455,0.843597,0.874411,0.862496,0.852069,0.887496,0.913327
2,0.729554,0.778499,0.772890,0.797306,0.812643,0.808754,0.788856,0.773542,0.724579,0.780385,...,0.790909,0.814272,0.804171,0.823906,0.866406,0.856902,0.854024,0.833496,0.826890,0.845552
3,0.973622,0.989955,0.992271,0.995833,0.993784,0.989931,0.996976,0.994120,0.983681,0.994288,...,0.995660,0.991599,0.991767,0.983681,0.974966,0.996701,0.992943,0.980847,0.994432,0.991095
6,0.886608,0.918831,0.911046,0.896633,0.869339,0.864983,0.932877,0.941023,0.864646,0.895732,...,0.937037,0.892147,0.901597,0.846465,0.901271,0.881818,0.860541,0.852721,0.862069,0.836429
7,0.709677,0.759779,0.723118,0.650397,0.684332,0.724603,0.718894,0.712750,0.740476,0.725807,...,0.742063,0.744240,0.739631,0.748016,0.752688,0.739286,0.759601,0.751152,0.781609,0.759217
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1105,0.873900,0.878247,0.888563,0.937879,0.944281,0.945455,0.954545,0.917889,0.887879,0.891496,...,0.877273,0.900293,0.891496,0.904545,0.843109,0.839394,0.796188,0.872434,0.913793,0.904692
1111,1.000949,0.934874,0.878558,0.875490,0.936433,0.906863,0.912713,0.907021,0.938235,0.895636,...,0.947059,0.910816,0.867173,0.851961,0.859583,0.781373,0.830171,0.909867,0.795132,0.869070
1114,0.475475,0.459883,0.472824,0.489954,0.537782,0.582648,0.471498,0.438356,0.438356,0.458683,...,0.774429,0.729121,0.666814,0.722831,0.786566,0.675342,0.718957,0.741494,0.811998,0.706142
1116,0.552755,0.718378,0.739919,0.796528,0.852487,1.009375,1.102823,1.119288,0.975694,1.012433,...,0.807986,0.798387,0.786962,0.757639,0.759745,0.756597,0.754704,0.770833,0.779454,0.744960


In [33]:
columns[-2:-14:-1] # after covid months

MultiIndex([('occupancy', 2021,  3),
            ('occupancy', 2021,  2),
            ('occupancy', 2021,  1),
            ('occupancy', 2020, 12),
            ('occupancy', 2020, 11),
            ('occupancy', 2020, 10),
            ('occupancy', 2020,  9),
            ('occupancy', 2020,  8),
            ('occupancy', 2020,  7),
            ('occupancy', 2020,  6),
            ('occupancy', 2020,  5),
            ('occupancy', 2020,  4)],
           names=[None, 'year', 'month'])

In [34]:
columns[-14::-1]  # before covid months

MultiIndex([('occupancy', 2020,  3),
            ('occupancy', 2020,  2),
            ('occupancy', 2020,  1),
            ('occupancy', 2019, 12),
            ('occupancy', 2019, 11),
            ('occupancy', 2019, 10),
            ('occupancy', 2019,  9),
            ('occupancy', 2019,  8),
            ('occupancy', 2019,  7),
            ('occupancy', 2019,  6),
            ('occupancy', 2019,  5),
            ('occupancy', 2019,  4),
            ('occupancy', 2019,  3),
            ('occupancy', 2019,  2),
            ('occupancy', 2019,  1),
            ('occupancy', 2018, 12),
            ('occupancy', 2018, 11),
            ('occupancy', 2018, 10),
            ('occupancy', 2018,  9),
            ('occupancy', 2018,  8),
            ('occupancy', 2018,  7),
            ('occupancy', 2018,  6),
            ('occupancy', 2018,  5),
            ('occupancy', 2018,  4),
            ('occupancy', 2018,  3),
            ('occupancy', 2018,  2),
            ('occupancy', 2018,  1),
 

In [13]:
# occupancy_table = pd.pivot_table(df, values=['occupancy'], index=['provnum'], columns=['year', 'month'])
# occupancy_table['provnum'] = occupancy_table.index
# occupancy_table = occupancy_table.reset_index(drop=True)
# occupancy_table = occupancy_table.fillna(occupancy_table.mean())
# occupancy_table['occupancy']

In [79]:
# plot two regression lins
sns.set(rc={"figure.figsize":(20, 8)})
mpl.rcParams.update({"axes.grid" : True, "grid.color": 'CCE5FF'})

@interact
def plot_trend_regression(x=states):
    '''
    @Usage: plot average occupancy each month from 2017/1 to 
    most recent month available for each state
    @Params: x: states array, default value is an array of 
    13 states that have Ensign facilities
    '''
    state = 'CA'
    for s in states:
        if x == s:
            state = x
#     for state in states:
    query=db.select([occupancy]).where(db.and_(occupancy.columns.ensign == False,
                                          occupancy.columns.state==state))
    df = pd.read_sql_query(query, con=engine)
    occupancy_table = generate_occ_table(df)
    x_labels = occupancy_table['occupancy'].columns.tolist()
    
    pre_covid = occupancy_table.iloc[:,-14::-1]
    pre_covid = pre_covid.iloc[:,::-1]
    pre_x = np.arange(0,len(pre_covid['occupancy'].columns.tolist()),1)
    pre_y = pre_covid['occupancy'].mean().values   
    pre_m, pre_b = np.polyfit(pre_x, pre_y, 1)
    
    post_covid = occupancy_table.iloc[:,-2:-14:-1]
    post_covid = post_covid.iloc[:,::-1]
#     post_x = np.arange(0,len(post_covid['occupancy'].columns.tolist()),1)
    post_x = np.arange(pre_x[-1]+1,pre_x[-1]+13,1)
    post_y = post_covid['occupancy'].mean().values   
    post_m, post_b = np.polyfit(post_x, post_y, 1)
    
    x1 = np.arange(0, len(x_labels),1)
    y1 = occupancy_table['occupancy'].mean().values
    
    pre_covid_color = 'r' if pre_m > 0 else 'g'
    post_covid_color = 'r' if post_m > 0 else 'g'
    
    ax = plt.axes()
    ax.set_facecolor((1, 1, 1))
#  occupancy_table['occupancy'].mean().values
    plt.scatter(x=x1,y=y1)
#     plt.plot(x1, m*x1 + b, color='r')
    plt.plot(pre_x, pre_m*pre_x + pre_b, color=pre_covid_color)
    plt.plot(post_x, post_m*post_x + post_b, color=post_covid_color)
    plt.ylim(0.5,1)
    plt.xticks(np.arange(0, len(x_labels),1),x_labels, rotation=90);
    plt.title(f'{state}\n PreCovid: {round(pre_m*100,2)}%\n PostCovid: {round(post_m*100,2)}%', fontsize=20)
    
    plt.show()



interactive(children=(Dropdown(description='x', options=('AZ', 'CA', 'CO', 'IA', 'ID', 'KS', 'NE', 'NV', 'SC',…

### State Occupancy 2017-2021 <a class="anchor" id="state_occupancy"></a>

In [80]:
# x_ticks labels
# x_lables = occupancy_table[occupancy_table['provnum']=='555875']['occupancy'].columns.tolist()
# sns.set(rc={"figure.figsize":(20, 8)})

# @interact
# def plot_trend_regression(x=states):
#     '''
#     @Usage: plot average occupancy each month from 2017/1 to 
#     most recent month available for each state
#     @Params: x: states array, default value is an array of 
#     13 states that have Ensign facilities
#     '''
#     state = 'CA'
#     for s in states:
#         if x == s:
#             state = x
# #     for state in states:
#     query=db.select([occupancy]).where(db.and_(occupancy.columns.ensign == False,
#                                           occupancy.columns.state==state))
#     df = pd.read_sql_query(query, con=engine)
#     occupancy_table = generate_occ_table(df)
#     x_labels = occupancy_table['occupancy'].columns.tolist()
#     x1 = np.arange(0, len(x_labels),1)
#     y1 = occupancy_table['occupancy'].mean().values
        
#     m, b = np.polyfit(x1, y1, 1)

# #  occupancy_table['occupancy'].mean().values
#     plt.scatter(x=x1,y=y1)
#     plt.plot(x1, m*x1 + b, color='r')
#     plt.ylim(0.5,1)
#     plt.xticks(np.arange(0, len(x_labels),1),x_labels, rotation=90);
#     plt.title(f'{state}, {round(m,4)}', fontsize=20)
#     plt.show()


In [15]:
@interact
def plot_ensign_trend(x=states):
    '''
    @Usage: plot average Ensign Facilities' occupancy each month from 2017/1 to 
    most recent month available for each state
    @Params: x: states array, default value is an array of 
    13 states that have Ensign facilities
    '''
    state = 'CA'
    for s in states:
        if x == s:
            state = x
#     for state in states:
    query=db.select([occupancy]).where(db.and_(occupancy.columns.ensign == True,
                                          occupancy.columns.state==state))
    df = pd.read_sql_query(query, con=engine)
    occupancy_table = generate_occ_table(df)
    x_labels = occupancy_table['occupancy'].columns.tolist()
    x1 = np.arange(0, len(x_labels),1)
    y1 = occupancy_table['occupancy'].mean().values
        
    m, b = np.polyfit(x1, y1, 1)

#  occupancy_table['occupancy'].mean().values
    plt.scatter(x=x1,y=y1)
    plt.plot(x1, m*x1 + b, color='r')
    plt.ylim(0.5,1)
    plt.xticks(np.arange(0, len(x_labels),1),x_labels, rotation=90);
    plt.title(f'{state}, {round(m,4)}')
    plt.show()

interactive(children=(Dropdown(description='x', options=('AZ', 'CA', 'CO', 'IA', 'ID', 'KS', 'NE', 'NV', 'SC',…

In [18]:
# state level Ensign vs Competitors
def state_level_trend():
    '''
    @Usage: Compare Ensign and Competitors' occupancy trend
            and average occupancy in the 13 states.
    @Return: A dataframe includes trend (calculated by linear regression).
             A negative trend means occupancy is decreasing and vice versa.
             Better Than Competitors: Both Ensign trend and Ensign average
             occupancy are better than competitors within that state.
    '''
    coef_dict = {'state': [], 'trend': [], 'average':[],
                 'Ensign_trend': [], 'Ensign_average':[],
                 '#ensign_facilities':[]
                }
       
    for state in states:
        
        query_en=db.select([occupancy]).where(db.and_(occupancy.columns.ensign == True,
                                          occupancy.columns.state==state))
        df_en = pd.read_sql_query(query_en, con=engine)
        num_unique_facilities = df_en['provnum'].nunique()
        occupancy_table_en = generate_occ_table(df_en)
        x_labels_en = occupancy_table_en['occupancy'].columns.tolist()
        x_en = np.arange(0, len(x_labels_en),1)
        y_en = occupancy_table_en['occupancy'].mean().values
        occ_mean_en = np.array(y_en).mean()
        m_en, b_en = np.polyfit(x_en, y_en, 1)
        coef_dict['Ensign_trend'].append(str(round(m_en,4)))
        coef_dict['Ensign_average'].append(str(round(occ_mean_en,3)))
        coef_dict['#ensign_facilities'].append(num_unique_facilities)
        
        query=db.select([occupancy]).where(db.and_(occupancy.columns.ensign == False,
                                          occupancy.columns.state==state))
        df = pd.read_sql_query(query, con=engine)
        occupancy_table = generate_occ_table(df)
        x_labels = occupancy_table['occupancy'].columns.tolist()
        x1 = np.arange(0, len(x_labels),1)
        y1 = occupancy_table['occupancy'].mean().values
        occ_mean = np.array(y1).mean()
        m, b = np.polyfit(x1, y1, 1)
        coef_dict['state'].append(state)
        coef_dict['trend'].append(str(round(m,4)))
        coef_dict['average'].append(str(round(occ_mean,3)))
    df = pd.DataFrame(data=coef_dict)
    df['trend > competitors'] = df.apply(lambda x: True if float(x['Ensign_trend']) > float(x['trend']) else False, axis=1)
    df['average > competitors'] = df.apply(lambda x: True if float(x['Ensign_average'])>float(x['average']) else False, axis=1)
    df['better_than_competitors'] = df.apply(lambda x: True if x['trend > competitors'] and x['average > competitors']\
                                             else False, axis=1)
    df = df.sort_values(by=['trend']).reset_index(drop=True)
    
    return df

df = state_level_trend()
df
df.style.set_caption('Better Than Competitors: Both Trend and Average Occupancy are better than Competitors')

Unnamed: 0,state,trend,average,Ensign_trend,Ensign_average,#ensign_facilities,trend > competitors,average > competitors,better_than_competitors
0,UT,-0.0012,0.669,0.0003,0.584,18,True,False,False
1,ID,-0.0025,0.67,-0.0002,0.625,11,True,False,False
2,KS,-0.0025,0.812,0.0031,0.758,7,True,False,False
3,NV,-0.0025,0.799,-0.0058,0.745,1,False,False,False
4,CA,-0.0026,0.85,-0.0017,0.853,51,True,True,True
5,WA,-0.0026,0.771,-0.001,0.743,13,True,False,False
6,NE,-0.0028,0.741,0.0016,0.629,6,True,False,False
7,SC,-0.0028,0.837,-0.0016,0.783,4,True,False,False
8,IA,-0.0029,0.774,0.001,0.776,6,True,True,True
9,CO,-0.0032,0.812,-0.0,0.842,14,True,True,True


In [50]:
def county_level_trend(state):
    coef_dict = {'county': [], 'trend': [], 'average':[],
                 'Ensign_trend': [], 'Ensign_average':[],
                 '#ensign_facilities':[],
                 'markets':[]
                }
    counties = counties_dict[state]
    big_state = False
    
    # list of states to have returned df sorted by market names
    if state in ['CA', 'TX', 'UT']:
        big_state = True
        
    for county in counties:
        coef_dict['county'].append(county)
        
        query_en=db.select([occupancy]).where(db.and_(occupancy.columns.ensign == True,
                                                      occupancy.columns.state==state,
                                                      occupancy.columns.county_name==county))
        df_en = pd.read_sql_query(query_en, con=engine)
        num_unique_facilities = df_en['provnum'].nunique()
        occupancy_table_en = generate_occ_table(df_en)
        x_labels_en = occupancy_table_en['occupancy'].columns.tolist()
        x_en = np.arange(0, len(x_labels_en),1)
        y_en = occupancy_table_en['occupancy'].mean().values
        occ_mean_en = np.array(y_en).mean()
        m_en, b_en = np.polyfit(x_en, y_en, 1)
        coef_dict['Ensign_trend'].append(round(m_en,4))
        coef_dict['Ensign_average'].append(round(occ_mean_en,3))
        coef_dict['#ensign_facilities'].append(num_unique_facilities)
        markets = df_en['market'].unique().tolist()
        markets = str(', '.join(markets))
        coef_dict['markets'].append(markets)
        
        
        query=db.select([occupancy]).where(db.and_(occupancy.columns.ensign == False,
                                                   occupancy.columns.state==state,
                                                   occupancy.columns.county_name==county))
        df = pd.read_sql_query(query, con=engine)
        if len(df) > 0:
            occupancy_table = generate_occ_table(df)
            x_labels = occupancy_table['occupancy'].columns.tolist()
            x1 = np.arange(0, len(x_labels),1)
            y1 = occupancy_table['occupancy'].mean().values
            occ_mean = np.array(y1).mean()
            m, b = np.polyfit(x1, y1, 1)
            coef_dict['trend'].append(round(m,4))
            coef_dict['average'].append(round(occ_mean,3))
        else:
            coef_dict['trend'].append(-1)
            coef_dict['average'].append(-1)
    df = pd.DataFrame(data=coef_dict)
    df['trend > competitors'] = df.apply(lambda x: True if float(x['Ensign_trend']) > float(x['trend']) else False, axis=1)
    df['average > competitors'] = df.apply(lambda x: True if float(x['Ensign_average'])> float(x['average']) else False, axis=1)
    df['better_than_competitors'] = df.apply(lambda x: True if x['trend > competitors'] and x['average > competitors']  else False, axis=1)
    if big_state:
        df['markets'] = df['markets'].sort_values()  
    else:
        df = df.sort_values(by=['trend']).reset_index(drop=True)
    
    return df

In [51]:
id_df = county_level_trend('ID')
id_df

Unnamed: 0,county,trend,average,Ensign_trend,Ensign_average,#ensign_facilities,markets,trend > competitors,average > competitors,better_than_competitors
0,Valley,-1.0,-1.0,-0.002,0.418,1,Pennant Idaho Nevada,True,True,True
1,Owyhee,-1.0,-1.0,0.0083,0.866,1,Pennant Idaho Nevada,True,True,True
2,Lemhi,-1.0,-1.0,-0.004,0.6,1,Pennant Idaho Nevada,True,True,True
3,Gooding,-1.0,-1.0,0.0002,0.374,1,Pennant Idaho Nevada,True,True,True
4,Madison,-0.0081,0.904,-0.0002,0.373,1,Pennant Idaho Nevada,True,False,False
5,Bannock,-0.0033,0.793,-0.0003,0.672,2,Pennant Idaho Nevada,True,False,False
6,Gem,-0.0025,0.484,-0.0012,0.615,1,Pennant Idaho Nevada,True,True,True
7,Ada,-0.002,0.687,-0.0007,0.668,1,Pennant Idaho Nevada,True,False,False
8,Canyon,-0.0011,0.684,0.0045,0.774,1,Pennant Idaho Nevada,True,True,True
9,Cassia,-0.0006,0.73,-0.0037,0.796,1,Pennant Idaho Nevada,False,True,False


In [23]:
ca_df = county_level_trend('CA')
ca_df

Unnamed: 0,county,trend,average,Ensign_trend,Ensign_average,#ensign_facilities,markets,trend > competitors,average > competitors,better_than_competitors
0,Los Angeles,-0.0028,0.852,-0.0023,0.872,13,"Momentum, Touchstone 2.0",True,True,True
1,Orange,-0.0026,0.835,-0.0023,0.886,7,Momentum,True,True,True
2,Sonoma,-0.0066,0.879,-0.0011,0.768,5,Flagstone No Cal,True,False,False
3,Mendocino,,,-0.002,0.677,2,Flagstone No Cal,False,False,False
4,Ventura,-0.0028,0.803,0.0002,0.882,3,NC Islanders,True,True,True
5,Santa Barbara,-0.0005,0.751,0.0018,0.745,2,NC Islanders,True,False,False
6,San Diego,-0.0024,0.844,-0.0015,0.869,15,SD United,True,True,True
7,San Bernardino,-0.0008,0.904,-0.003,0.896,2,Touchstone 2.0,False,False,False
8,Riverside,-0.002,0.889,-0.0058,0.872,2,Touchstone 2.0,False,False,False


In [49]:
az_df = county_level_trend('AZ')
az_df

Unnamed: 0,county,trend,average,Ensign_trend,Ensign_average,#ensign_facilities,markets,trend > competitors,average > competitors,better_than_competitors
0,Yavapai,-0.0045,0.74,-0.0044,0.643,1,Bandera Phoenix West,True,False,False
1,Pima,-0.004,0.766,-0.001,0.649,7,Bandera Tucson,True,False,False
2,Maricopa,-0.0037,0.938,0.0011,0.78,24,"Bandera Phoenix West, Bandera Phoenix East",True,False,False


In [None]:
co_df = county_level_trend('CO')
co_df

In [None]:
ia_df = county_level_trend('IA')
ia_df

In [None]:
ks_df = county_level_trend('KS')
ks_df

In [None]:
ne_df = county_level_trend('NE')
ne_df

In [None]:
nv_df = county_level_trend('NV')
nv_df

In [None]:
sc_df = county_level_trend('SC')
sc_df

In [None]:
tx_df = county_level_trend('TX')
tx_df

In [None]:
ut_df = county_level_trend('UT')
ut_df

In [None]:
wa_df = county_level_trend('WA')
wa_df

In [None]:
wi_df = county_level_trend('WI')
wi_df

In [None]:
### some snippets
mean = occupancy_table[occupancy_table['provnum']=='555875']['occupancy'].values.mean()
mean
y1 = occupancy_table[occupancy_table['provnum']=='555875']['occupancy'].values.squeeze().tolist()
np.array(y1).mean()

# x_ticks labels
x1 = occupancy_table['occupancy'].columns.tolist()

# linear regression
x = np.arange(0, len(x1),1)
m, b = np.polyfit(x, y1, 1)
m

In [None]:
sns.set(rc={"figure.figsize":(20, 8)})
plt.scatter(x=x,y=y1)
plt.plot(x, m*x + b, color='r')
plt.ylim(0.5,1)
plt.xticks(np.arange(0, len(x1),1),x1, rotation=90);
plt.title(f'Ensign in CA, {round(m,4)}')