In [None]:
# Import modules

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Import the dataset

df = pd.DataFrame(pd.read_json("../assets/normalized_pyramid_data.json"))

In [None]:
# Get the rows that contain the reigns of the individual rulers

start_reign = df["start_of_reign"].notnull()
end_reign = df["end_of_reign"].notnull()
reign_df = df[start_reign & end_reign]

In [None]:
# Plot the number of monuments that were built during the reign of each ruler in a bar graph

monuments = df['ruler'].value_counts()
rulers = df['ruler'].value_counts().keys()

plt.title("Monuments Completed Under Ruler")
plt.xlabel("Ruler")
plt.ylabel("Number of Monuments")
plt.bar(rulers, monuments)
plt.xticks(rotation=90)
plt.show()

In [None]:
# Plot the average width of the monuments in each Dynasty
# Messy elements are not considered here for the sake of simplicity in this inital exploration,
# they would need to be properly handled in an actual analysis

dynasty_df = df[['dynasty', 'width']]
dynasty_df.dropna(inplace=True)
invalid_widths = dynasty_df['width'].str.contains(' ', na=False)
dynasty_df = dynasty_df[~invalid_widths]
dynasty_df['width'] = dynasty_df['width'].astype('float64')
dynasty_group = dynasty_df.groupby('dynasty').mean().plot.bar()

In [None]:
# Same as above but reveals more information (ie better)
sns.color_palette(palette='colorblind')
dyn_pal = {'3': 'plum', '4': 'g', '5': 'orange', '6': 'b', '7': 'r', '8': 'skyblue', 'FIP or 8': 'pink'}
plot = sns.boxplot(x='dynasty', y='width', data=dynasty_df, palette=dyn_pal)
plot.set_title("Average Width of Pyramids During Each Dynasty")
plot.set_xlabel("Dynasty")
plot.set_ylabel("Width")

In [None]:
# Testing a timeline plot
#plt.style.use('ggplot')
from matplotlib.pyplot import cm
import numpy as np

tl = df.sort_values(by='start_of_reign', ascending=False)

tl = tl[['ruler', 'start_of_reign', 'end_of_reign', 'length_of_reign']].dropna()
rulers = tl['ruler']
starts = -tl['start_of_reign']
ends = -tl['end_of_reign']
length = tl['length_of_reign']

# Figure
plt.figure(figsize=(12, 8))
color = cm.rainbow(np.linspace(0, 1, len(starts)))
plt.barh(y=0, 
         width=(ends - starts), 
         height=0.3, 
         left=starts, 
         color=color, 
         edgecolor='black')
# Tick settings
plt.tick_params(left=False, labelleft=False)
plt.gca().xaxis.set_minor_locator(plt.MultipleLocator(10))
# Labeling
plt.xlabel('Year (BCE)')
plt.ylabel('Ruler')
plt.title('Lengths of Reign by Ruler')
# Vertical figure label lines
levels = np.tile([-2, 2, -1, 1], 
                 int(np.ceil(len(rulers) / 4)))[:len(rulers)]
plt.vlines(starts + length/2, 0, levels/2, color='black')
# Figure label text
for i in range(len(starts)):
    plt.text(starts.iloc[i] + length.iloc[i]/2, 
             (levels[i]*1.05)/2, 
             rulers.iloc[i], 
             ha='center', 
             fontsize = '7') 

### Thoughts on above

- It might be worth considering if another graph (line graph?) could be overlayed atop something more or less like this. For example, this + a line graph of the height or mass of the pyramids over time.

### Issues

- I can't figure out how to reduce the length of the vertical label lines without ruining the proportion of the figure, 
and vice versa.

- Rainbow colors probably aren't the best for visual clarity. Each segment should have more contrast with respect to those on either side.