## Loading the data and checking the metadata

First we get the data using the datadotworld library and print the description for the `pet_ownership` data frame

In [1]:
import datadotworld as dw

In [16]:
pet_data = dw.load_dataset('makeovermonday/2018w12-uk-pet-population-in-2017')

In [17]:
pet_data.describe('pet_ownership')

{'format': 'csv',
 'name': 'pet_ownership',
 'path': 'data/pet_ownership.csv',
 'schema': {'fields': [{'name': 'type_of_pet',
    'rdfType': 'http://www.w3.org/2001/XMLSchema#string',
    'title': 'Type of Pet',
    'type': 'string'},
   {'name': 'pet_population_2016_2017_rolling_total',
    'rdfType': 'http://www.w3.org/2001/XMLSchema#integer',
    'title': 'Pet population 2016/2017 rolling total',
    'type': 'integer'},
   {'name': 'percentage_of_households_with_this_pet',
    'rdfType': 'http://www.w3.org/2001/XMLSchema#decimal',
    'title': 'Percentage of Households with this pet',
    'type': 'number'}]}}

Once loaded we can access the data as `raw_data, tables, dataframes` which will return `bytes, list pd.DataFrames`. Note that not all files within a dataset are tabular

In [29]:
df = pet_data.dataframes['pet_ownership']

In [30]:
df.columns = ['pet', 'rolling_total', 'percentage_hh']
df.sort_values(by=['percentage_hh'], inplace =  True, ascending = False)
df.drop(df.index[0], inplace = True)
df.reset_index(inplace = True, drop = True)
df.head()

Unnamed: 0,pet,rolling_total,percentage_hh
0,Cats,8000000,0.17
1,Outdoor fish,15000000,0.05
2,Rabbits,900000,0.02
3,Guinea Pigs,500000,0.02
4,Reptiles,700000,0.015


In [31]:
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib notebook

In [32]:
import matplotlib.font_manager as fm
import os
fontpath = './Fonts/IBMPlexMono-Regular.ttf'
if os.path.exists(fontpath):
    prop = fm.FontProperties(fname=fontpath)
    print('Assigned font')
else:
    print('Not found')

Assigned font


In [64]:
sns.set_style('ticks')
fig, ax = plt.subplots(figsize = (10, 8))
sns.barplot(x = 'pet', y = 'percentage_hh', data = df, palette = "GnBu_d")
ax.set_title('Most beloved pets in the UK in 2017\n', fontproperties = prop, 
             size = 25, color = '#CD71B6', fontweight = 'bold')
ax.set_ylabel('')
ax.get_yaxis().set_visible(False)
for i, v in enumerate(df.percentage_hh):
    ax.text(x = i -0.25 , y = v + 0.002, s = str(v), color='black', fontweight = 'bold', fontproperties = prop)
plt.yticks(fontsize = 16)
plt.xticks(rotation = 'vertical',fontproperties = prop, fontsize = 16)
plt.box(on = None);

<IPython.core.display.Javascript object>