# This notebook is to overlay visitor footfall numbers onto graphs for BM and V&A aquisitions and Tate temporary exhibitions

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from matplotlib.lines import Line2D

In [None]:
Footfall_df = pd.read_csv('../../machine_learning/data/footfall_df_2.csv')

In [None]:
# I am sure there is a more efficint way to do filter the data for years of interest in the exhibition data.
Footfall_df.drop('2004', axis=1, inplace=True)
Footfall_df.drop('2025', axis=1, inplace=True)

In [None]:
Footfall_df.info()

In [None]:
Footfall_df

In [None]:
Museum_Footfall_df = Footfall_df.melt(id_vars=['Museum Name', 'Month'], var_name='Year', value_name='Footfall')

In [None]:
Museum_Footfall_df

In [None]:
Museum_Footfall_df['DateDT'] = pd.to_datetime(Museum_Footfall_df['Year'].astype(str) + '-' + Museum_Footfall_df['Month'].astype(str) + '-01')

In [None]:
Museum_Footfall_df.describe(include='all')

In [None]:
Museum_Footfall_df.sample(20)

In [None]:
Museum_Footfall_df['Footfall'].replace(0, np.nan, inplace=True)

In [None]:
Museum_Footfall_df['Footfall'].dropna(axis=0, inplace=True)

In [None]:
Museum_Footfall_df.plot('DateDT', 'Footfall')
plt.xlabel('Year')
plt.ylabel('Visitors (Millions)')

In [None]:
# Only  Tate totals will be used for the Temporary exhibitions graph
Tate_values_df = Museum_Footfall_df[Museum_Footfall_df['Museum Name']=='Tate']
Tate_values_df.columns

In [None]:
Tate_values_df

In [None]:
Tate_values_df.tail(15)

In [None]:
Tate_temp_exhibitions_df = pd.read_csv('../../collections/data/tate_temp_exhibitions_dataset.csv')

In [None]:
Tate_temp_exhibitions_df

In [None]:
medium_colors = {'Architecture': '#003f5c', 'Film': '#2f4b7c', 'Installation': '#665191', 'Mixed': '#a05195',
                 'Painting': '#d45087', 'Performance': '#f95d6a', 'Photography': '#ff7c43', 'Sculpture': '#ffa600'}

# checking data type is datetime
Tate_temp_exhibitions_df['Start Day'] = pd.to_datetime(Tate_temp_exhibitions_df['Start Day'])
Tate_temp_exhibitions_df['End Day'] = pd.to_datetime(Tate_temp_exhibitions_df['End Day'])

# Create a figure with size
fig, ax1 = plt.subplots(figsize = (20, 10))
  
# Identify the values used
for index, row in Tate_temp_exhibitions_df.iterrows():
    ax1.barh(y=row['Medium'], width=row['exhib_duration'], left=row['Start Day'], color=medium_colors[row['Medium']])

ax2 = ax1.twinx()
ax2.plot(Tate_values_df['DateDT'], Tate_values_df['Footfall'], label='Actual Visitors', color='#003f5c', linestyle='--')
ax2.set_ylabel('Footfall (Millions)', fontsize=18, color='#003f5c')

# Adding a legend
patches = []
for medium in medium_colors:
    patches.append(matplotlib.patches.Patch(color=medium_colors[medium]))
plt.legend(handles=patches, labels=medium_colors.keys(), fontsize=11, loc='lower right')

plt.title('Tate Temporary Exhibitions 2007 - 2018 with Footfall', fontsize=28, color='#003f5c')
ax1.set_ylabel('Meduim', fontsize=18, color='#003f5c')
ax1.set_xlabel('Date', fontsize=18, color='#003f5c')
plt.grid(True, color = "grey", linewidth = "1.2", axis = 'x')

plt.show()

# Aiding the reader of the graph

While this graph is colourful it doesn't help identify the correlation between exhibitions and visitor footfall numbers.  This might be clearer with a heatmap for exhibition end dates and the related visitor numbers for that month.  I will try to use a seaborn reg plot for that.

It would also be interesting to prepare the data for acquisitions and visitor footfall numbers for BM and V&A.

Initially I have looked at the hitogram which compares visitor footfall against month across the year.  This could indicate peak times of footfall in spring and summer but a comparison between separate dataframes is difficult using seaborn.  I will need to merge the datasets or find graphing tools that will plot separate series.

In [None]:
Combined_Collections_df = pd.read_csv('../../collections/data/combined_collections_footfall_dates_dataset.csv')

In [None]:
import seaborn as sns

sns.jointplot(data=Tate_values_df, x='Month', y='Footfall', kind='hist')
plt.title('Tate Visitor Footfall vs Month')

For the Tate temporary exhibition dates it would be useful to align them with the monthly visitor dates.  I will take the year and month of the end date and associate it with the 1st of the month to allign with footfall dates.

In [None]:
import datetime as dt
pd.to_datetime(Museum_Footfall_df['Year'].astype(str) + '-' + Museum_Footfall_df['Month'].astype(str) + '-01')
Tate_temp_exhibitions_df['ExhibitPlotDate'] = pd.to_datetime(Tate_temp_exhibitions_df['End Day'].dt.year.astype(str) + '-' + Tate_temp_exhibitions_df['End Day'].dt.month.astype(str) + '-01')

In [None]:
Tate_temp_exhibitions_df

Acquistion dates need to be converted into datetime values to help with plotting on the same axes as the footfall data.  To assist the converstion I have hard coded the year to be 31st Dec of the year recorded.  This moves the acquistion to be the first day of Dec of the Acquisition year to try and align with footfall.

In [None]:
Combined_Collections_df['AcqDateDT'] = pd.to_datetime(Combined_Collections_df['AcqDate'].astype(str) + '-12-01')
Combined_Collections_df.head(5)

In [None]:
Tate_visitor_values_df = Museum_Footfall_df[Museum_Footfall_df['Museum Name']=='Tate']
BM_visitor_values_df = Museum_Footfall_df[Museum_Footfall_df['Museum Name']=='British Museum']
VAM_visitor_values_df = Museum_Footfall_df[Museum_Footfall_df['Museum Name']=='V&A']

In [None]:
Tate_count_exhibit = Tate_temp_exhibitions_df.groupby('ExhibitPlotDate')
Tate_count_exhibit_annual_values = Tate_count_exhibit['ID'].count().sort_index()

In [None]:
Tate_count_exhibit_annual_values

In [None]:
BM_df = Combined_Collections_df[Combined_Collections_df['Museum']=='BM']
VAM_df = Combined_Collections_df[Combined_Collections_df['Museum']=='VAM']

In [None]:
BM_Acq_group = BM_df.groupby('AcqDateDT')
BM_Acq_values = BM_Acq_group['RecordID'].count().sort_index()
BM_Acq_values

In [None]:
VAM_Acq_group = VAM_df.groupby('AcqDateDT')
VAM_Acq_values = VAM_Acq_group['RecordID'].count().sort_index()
VAM_Acq_values

In [None]:
VAM_visitor_values_df

In [None]:
# Data for graphs
x = Museum_Footfall_df['DateDT']  # X-axis labels (e.g.2020-01-01)
Tate_visitor_values = Tate_visitor_values_df['Footfall']
BM_visitor_values = BM_visitor_values_df['Footfall']
VAM_visitor_values = VAM_visitor_values_df['Footfall']
Tate_exhibit_count = Tate_count_exhibit_annual_values.values
BM_acquisition = BM_Acq_values.values
VAM_acquisition = VAM_Acq_values.values

# Create the figure and first axis

fig, axs = plt.subplots(3, 1, layout='constrained', figsize = (20, 10))

ax1 = axs[0]
ax2 = ax1.twinx()
Tate_Acq_line = ax1.plot(Tate_count_exhibit_annual_values.index, Tate_exhibit_count, color='#d45087', marker='x', linewidth=4, linestyle='--', label='Tate Exhibit Data')
Tate_line = ax2.plot(Tate_visitor_values_df['DateDT'], Tate_visitor_values, color='#d45087', marker='o', linewidth=4, label='Tate Visitor Data')
ax1.set_xlabel('Date', color='#003f5c')
ax1.set_ylabel('Number of Exhibitions', color='#003f5c')
ax1.tick_params(axis='y', labelcolor='#003f5c')
# Label for second y-axis
ax2.set_ylabel('Number of Visitors', color='#003f5c')
ax2.tick_params(axis='y', labelcolor='#003f5c')

ax3= axs[1]
ax4 = ax3.twinx()
BM_Acq_line = ax3.plot(BM_Acq_values.index, BM_acquisition, color='#665191', marker='x', linewidth=4, linestyle='--', label='BM Acq Data')
BM_line = ax4.plot(BM_visitor_values_df['DateDT'], BM_visitor_values, color='#665191', marker='o', linewidth=4, label='BM Visitor Data')
ax3.set_xlabel('Date', color='#003f5c')
ax3.set_ylabel('Number of Acquisitions', color='#003f5c')
ax3.tick_params(axis='y', labelcolor='#003f5c')
# Label for second y-axis
ax4.set_ylabel('Number of Visitors', color='#003f5c')
ax4.tick_params(axis='y', labelcolor='#003f5c')

ax5= axs[2]
ax6 = ax5.twinx()
VAM_Acq_line = ax5.plot(VAM_Acq_values.index, VAM_acquisition, color='#ff7c43', marker='x', linewidth=4, linestyle='--', label='VAM Acq Data')
VAM_line = ax6.plot(VAM_visitor_values_df['DateDT'], VAM_visitor_values, color='#ff7c43', marker='o', linewidth=4, label='VAM Visitor Data')
ax5.set_xlabel('Date', color='#003f5c')
ax5.set_ylabel('Number of Acquisitions', color='#003f5c')
ax5.tick_params(axis='y', labelcolor='#003f5c')
# Label for second y-axis
ax6.set_ylabel('Number of Visitors', color='#003f5c')
ax6.tick_params(axis='y', labelcolor='#003f5c')

# Combine legends from both axes
#lines_labels = [Tate_Acq_line[0], BM_Acq_line[0], VAM_Acq_line[0], Tate_line[0], BM_line[0], VAM_line[0]]
#labels = [l.get_label() for l in lines_labels]
ax1.legend(loc='upper left')
ax3.legend(loc='upper left')
ax5.legend(loc='upper left')
ax2.legend(loc='upper right')
ax4.legend(loc='upper right')
ax6.legend(loc='upper right')

plt.suptitle('Footfall data vs Exhibitions and Acquisitions', color='#003f5c', fontsize='24')
fig.savefig('../visualisations/combined_collection_footfall_tate_exhibitions.png', orientation='landscape')

# Show plot
plt.show()

These graphs attempt to explore the hypothesis that a larger number of exhibitions and/or aquisitions would have a positive correlation with visitor footfall.  As the plotted date is adjusted for convience of graph plotting, this would need deeper investigation and a larger dataset to provide any assurance that there is not bias to the examples shown.