In [35]:
import pandas as pd, requests
import urllib.request
import bar_chart_race as bcr
import matplotlib as mpl

'3.2.2'

In [36]:
##LOAD MoH Town COVIDATA
url = 'https://data.gov.il/api/3/action/datastore_search?resource_id=8a21d39d-91e3-40db-aca1-f73f7ab1df69&limit=150000'
json = requests.get(url).json()
df = pd.DataFrame(json['result']['records'])


In [37]:
##load town data
url='https://www.cbs.gov.il/he/publications/doclib/2019/ishuvim/bycode2019.xlsx'
cbs_town = pd.read_excel(url)


In [38]:
df = df.rename(columns=lambda x:x.lower())

## use 10 instead of less than
df = df.replace('<15','10')
cols = ['city_code','cumulative_verified_cases',
       'cumulated_recovered', 'cumulated_deaths', 'cumulated_number_of_tests',
       'cumulated_number_of_diagnostic_tests']
for x in cols:
    df[x] = df[x].astype('int')
df['date']=pd.to_datetime(df.date)
# df.dtypes
# df.head()

In [39]:
town_df = cbs_town[['סמל יישוב', 'סך הכל אוכלוסייה 2019']].rename(columns={'סמל יישוב' : 'city_code',
                                                                 'סך הכל אוכלוסייה 2019' : 'pop'})
town_df['pop'] = town_df['pop'].fillna(0).astype('int')
covidf = df.merge(town_df, on='city_code')

In [40]:
# covidf_town
covidf_town = covidf[['city_code','city_name','pop']].drop_duplicates().sort_values(by='pop', ascending=False).reset_index(drop=True)

tribes = covidf_town.loc[covidf_town['pop']==0,'city_name'].unique()## len == 10
covidf_town = covidf_town.loc[~covidf_town.city_name.isin(tribes)]
covidf_town['pop'] = covidf_town['pop'].astype('int')

##SET City_Type
## City >=100,000 ; Town >=10,0000; Village < 10,000
covidf_town['city_type'] = ['City' if x >=100000 else 'Town' if x >=10000 else 'Village' for x in covidf_town['pop']]
# covidf_town[['pop']].plot.hist(bins=10)

#norm coefficients
norm_by_x = lambda x: covidf_town['pop']/x
covidf_town = covidf_town.assign(norm100k=norm_by_x(10**5), norm10k=norm_by_x(10**4), norm1k=norm_by_x(10**4))

##add city type to main COVIDF
covidf = covidf.merge(covidf_town[['city_code','city_type', 'norm100k', 'norm10k', 'norm1k']], on='city_code', how='left')

# len(covidf_town) ##261

In [41]:
covidf.columns

Index(['_id', 'city_name', 'city_code', 'date', 'cumulative_verified_cases',
       'cumulated_recovered', 'cumulated_deaths', 'cumulated_number_of_tests',
       'cumulated_number_of_diagnostic_tests', 'pop', 'city_type', 'norm100k',
       'norm10k', 'norm1k'],
      dtype='object')

In [42]:
col = 'cumulated_deaths'; city_type ='Town'

In [43]:
covidf.columns



Index(['_id', 'city_name', 'city_code', 'date', 'cumulative_verified_cases',
       'cumulated_recovered', 'cumulated_deaths', 'cumulated_number_of_tests',
       'cumulated_number_of_diagnostic_tests', 'pop', 'city_type', 'norm100k',
       'norm10k', 'norm1k'],
      dtype='object')

In [44]:
##normalize column for city_type
def barChartDFandNameTotal(covidf=covidf, col='cumulated_deaths', city_type='City'):
    df_val = covidf.query("city_type == '{}'".format(city_type))
    value_name = '{}_total_{}'.format(city_type.lower(), col.replace('cumulated_',''))
    
    #revert city name (hebrew)
    df_val['city_name'] = df_val['city_name'].apply(lambda x:x[::-1])
    #WIDE format
    df_bar = df_val.pivot(index='date',columns='city_name', values=col)
    # #clean and remove all zeros
    df_bar.columns.name = ''
    df_bar = df_bar.loc[df_bar.apply(lambda x: x.sum(), axis=1) > 0]

    print(value_name)
    print("number of {}: {}".format(city_type, len(df_bar.columns)))
    
    return df_bar, value_name

# df_c = df_p.dropna(how='all').dropna(how='all', axis=1).bfill().ffill()

# df_c.head()


In [45]:
##normalize column for city_type
def barChartDFandNameNorm(covidf=covidf, col='cumulated_deaths', city_type='City'):
    df_val = covidf.query("city_type == '{}'".format(city_type))
    k_col = 'norm100k' if city_type == 'City' else 'norm10k' if city_type == 'Town' else 'norm1k'
    df_val= df_val.assign(value=df_val[col] / df_val[k_col])
    value_name = '{}_{}_per_{}'.format(city_type.lower(), col.replace('cumulated_',''),k_col.replace('norm',''))
    
    #revert city name (hebrew)
    df_val['city_name'] = df_val['city_name'].apply(lambda x:x[::-1])
    #WIDE format
    df_bar = df_val.pivot(index='date',columns='city_name', values='value')
    # #clean and remove all zeros
    df_bar.columns.name = ''
    df_bar = df_bar.loc[df_bar.apply(lambda x: x.sum(), axis=1) > 0]

    print(value_name)
    print("number of {}: {}".format(city_type, len(df_bar.columns)))
    
    return df_bar, value_name

# df_c = df_p.dropna(how='all').dropna(how='all', axis=1).bfill().ffill()

# df_c.head()


In [47]:
def barChartRaceCreator(df_bar, value_name, n_bars=16):
    print(n_bars)
    title_name = lambda x: x.title().replace('_', ' ')
    total_title = value_name.title().replace('_',' ').replace('{} '.format(city_type), '')
    bcr.bar_chart_race(df_bar,
                   n_bars=n_bars,
                   figsize=(7,5),
                   filter_column_colors=True,
                   period_fmt='%B %d, %Y',
                   period_label={'x': .9, 'y': .25, 'ha': 'right', 'va': 'center'},
                   period_summary_func=lambda v, r: {'x': .9, 'y': .18,
                                      's': '{}: {:,.0f}'.format(total_title, v.nlargest(n_bars).sum()),
                                      'ha': 'right', 'size': 10},
                   filename='barace/{}.mp4'.format(value_name),
                   period_length=200,
                   title = 'COVID in Israel - {}'.format(title_name(value_name)),
                   tick_label_size=10,
                   bar_label_size=10,
                   cmap='tab20')
    

In [49]:
for city_type in ['Town','Village']:
    print(city_type)
    df_bar, value_name = barChartDFandNameNorm(city_type=city_type)


Town
town_deaths_per_10k
number of Town: 120
Village
village_deaths_per_1k
number of Village: 125


In [50]:
## CREATE NORM AND TOTAL BAR CHARTS for City, Village and Town - this may take sometime...
for city_type in ['City','Town','Village']:
    print(city_type)
    df_bar, value_name = barChartDFandNameNorm(city_type=city_type)
    barChartRaceCreator(df_bar, value_name)
    df_bar, value_name = barChartDFandNameTotal(city_type=city_type)
    barChartRaceCreator(df_bar, value_name)


Town
town_deaths_per_10k
number of Town: 120
16


  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys


town_total_deaths
number of Town: 120
16


  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
 

Village
village_deaths_per_1k
number of Village: 125
16


  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys


village_total_deaths
number of Village: 125
16


  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
  font.set_text(s, 0.0, flags=flags)
 