# Completeness (Decades)

These graphs show, per decade of the museums existence, how many object records are field-complete (that is, if a concept grouping of fields such as 'Depiction' has 10 possible fields (contentPlace, contentPerson, etc) an object with 5 of them with values set will be 50% complete). 

In [1]:
import glob
import pandas as pd

objects_df = pd.read_hdf('/home/richard/Git/bbk-mqaf/data/latest/field.h5')

In [5]:
# First create new data frame reducing down to just the columns we need

complete_df = objects_df.filter(regex='accessionYear$|completeness*', axis=1)

In [6]:
import re
tidied_df = complete_df.rename(columns=lambda x: re.sub('completeness:','',x))

In [7]:
# Save all the possible collectioncode
import altair as alt

ranges = [0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1]
count =0

dept_col_counts = None
charts = []
hcharts = []

def create_chart(data_df, title):
  data_rows_df = pd.DataFrame()
  for column in data_df.columns[1:]:
    # Group into counts of percentages 0-10, 10-20, ... 90-100
    # dept_col_counts = dept_stats_df[column].value_counts(bins=10, sort=False)
    
    dept_col_counts = data_df[column].groupby(pd.cut(data_df[column], ranges, labels=["0-10%","10-20%","20-30%","30-40%","40-50%","50-60%","60-70%","70-80%","80-90%","90-100%"], include_lowest=True)).count()
    dept_col_counts.name = column
    data_rows_df = data_rows_df.append(dept_col_counts, ignore_index=False)
  data_rows_df = data_rows_df.rename_axis('Concept').rename_axis('Percentages', axis='columns')
    # TO handle converting from CategoricalIndex - may change https://github.com/pandas-dev/pandas/issues/19136
  data_rows_df.columns = data_rows_df.columns.tolist()
  data_rows_df = data_rows_df.reset_index()
  data_rows_melt_df = data_rows_df.melt(id_vars=['Concept'], var_name='Percentage', value_name='Objects')
  chart = alt.Chart(data_rows_melt_df).mark_bar().encode(
       x='Objects:Q',
       y='Concept:O',
       color='Percentage:O',
      tooltip=['Percentage', 'Objects']
    ).properties(width=220, title=title)
  
  return chart
        
for decade in range(1850,2030,10):
  dept_rows_df = pd.DataFrame()
  dept_stats_df = tidied_df[tidied_df[' accessionYear'].between(decade,decade+9)] 
  # Now need to loop over each completeness coll in turn
  for column in dept_stats_df.columns[1:]:
    # Group into counts of percentages 0-10, 10-20, ... 90-100
    # dept_col_counts = dept_stats_df[column].value_counts(bins=10, sort=False)
    
    dept_col_counts = dept_stats_df[column].groupby(pd.cut(dept_stats_df[column], ranges, labels=["0-10%","10-20%","20-30%","30-40%","40-50%","50-60%","60-70%","70-80%","80-90%","90-100%"], include_lowest=True)).count()
    dept_col_counts.name = column
    dept_rows_df = dept_rows_df.append(dept_col_counts, ignore_index=False)
    
  dept_rows_df = dept_rows_df.rename_axis('Concept').rename_axis('Percentages', axis='columns')
    # TO handle converting from CategoricalIndex - may change https://github.com/pandas-dev/pandas/issues/19136
  dept_rows_df.columns = dept_rows_df.columns.tolist()
  #print(dept_rows_df)
  dept_rows_df = dept_rows_df.reset_index()

  dept_rows_melt_df = dept_rows_df.melt(id_vars=['Concept'], var_name='Percentage', value_name='Objects')

  chart = alt.Chart(dept_rows_melt_df).mark_bar().encode(
       x='Objects:Q',
       y='Concept:O',
        color='Percentage:O',
      tooltip=['Percentage', 'Objects']
    ).properties(width=220, title="%s (%d)" % (decade, len(dept_stats_df)))
  charts.append(chart)
  if count > 0:
      hcharts.append(alt.hconcat(*charts))
      charts = []
      count = 0
  else:
      count += 1

# Handle post current year, no year and less than opening year (1857)

dept_stats_df = tidied_df[tidied_df[' accessionYear'] < 1857.0] 
#print(dept_stats_df)
charts.append(create_chart(dept_stats_df, "Pre-1857 (%d)" % len(dept_stats_df)))
      
dept_stats_df = tidied_df[tidied_df[' accessionYear'].isnull()] 
#print(dept_stats_df)
charts.append(create_chart(dept_stats_df, "No Year (%d)" % len(dept_stats_df)))
    
hcharts.append(alt.hconcat(*charts))

alt.vconcat(*hcharts)
