In [1]:
import pandas as pd
import numpy as np
import plotly.express as px

In [2]:
df = pd.DataFrame(pd.read_csv("../assets/normalized_pyramid_data.csv"))

In [3]:
key = ['unknown', 'pyramid?']
complexes = df[~df['pyramid_complex'].isin(key)]
kings_and_queens = complexes.groupby(['pyramid_complex', 'royal_status'], sort=False).size().reset_index(name='count')

confirmed_count_fig = px.bar(kings_and_queens, 
             x='pyramid_complex', 
             y='count', 
             color='royal_status',
             title='Number of Confirmed Pyramids At Each Complex',
             labels={
                 'pyramid_complex': 'Pyramid Complex',
                 'count': 'Number of Pyramids',
                 'royal_status': 'Royal Status'
             },
             custom_data=['royal_status'])
confirmed_count_fig.update_traces(
    hovertemplate='<br>'.join([
        'Royal status: %{customdata[0]}',
        'Pyramid Complex: %{x}',
        'Total: %{y}',
        '<extra></extra>'
    ])
)
confirmed_count_fig.show()
df

Unnamed: 0,pyramid_owner,pyramid_complex,royal_status,daughter_of,royal_mother_title,likely_wife,wife_title,vizier,regent,relationship_to_king,...,site_or_location,orientation,casing,state_of_completion,superstructure_type,height,length,width,angle,notes
0,Djoser,Djoser,King,,False,False,False,False,False,,...,Saqqara,N-S,Limestone,,Pyramid,59.9,120.00,108.00,74,"15 gates, white limestone casing"
1,Sekhemkhet,Sekhemkhet,King,,False,False,False,False,False,,...,Saqqara,N-S,Limestone,Unfinished,Pyramid,70,120.00,120.00,71-75,"niched facade, unfinished, left with rough ext..."
2,Nebka?,Nebka?,King,,False,False,False,False,False,,...,Zawiyet el-Aryan,E-W,Unknown,Unfinished,Pyramid,,200.00,200.00,unknown,debate over whether this is Pyramid of Nebka
3,Khaba,Khaba,King,,False,False,False,False,False,,...,Zawiyet el-Aryan,E-W,Unknown,Unfinished,Pyramid,42,83.80,83.80,unknown,began as step pyramid
4,Sneferu,Sneferu 1,King,,False,False,False,False,False,,...,Meidum,E-W,Limestone,Unfinished,Pyramid,94.5,144.32,144.32,52,"limestone casing, causeway 200m, Cult Pyramid ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67,Neferkaure,unknown,King,,False,False,False,False,False,,...,Unknown,,Unknown,Unknown,Unknown,,,,,
68,Neferkauhor?,pyramid?,King,,False,False,False,False,False,,...,Unknown,,Unknown,Unknown,Unknown,,,,,"no archaeological evidence of a pyramid, but i..."
69,Neferirkare,unknown,King,,False,False,False,False,False,,...,Unknown,,Unknown,Unknown,Unknown,,,,,
70,Wadjkare? (Demedjibtawj),pyramid?,King,,False,False,False,False,False,,...,Unknown,,Unknown,Unknown,Unknown,,,,,"no archaeological evidence of a pyramid, but i..."


In [4]:
dynasty_pyramids = df.groupby(['dynasty', 'royal_status'], sort=False).size().reset_index(name='count')
dynasty_count_fig = px.bar(dynasty_pyramids, 
             x='dynasty', 
             y='count', 
             color='royal_status',
             title='Number of Pyramids Per Dynasty',
             labels={
                 'dynasty': 'Dynasty',
                 'count': 'Number of Pyramids (Including Speculations)',
                 'royal_status': 'Royal Status'
             },
             custom_data=['royal_status'])
dynasty_count_fig.update_traces(
    hovertemplate='<br>'.join([
        'Royal status: %{customdata[0]}',
        'Dynasty: %{x}',
        'Total: %{y}',
        '<extra></extra>'
    ])
)
dynasty_count_fig.show()

In [90]:
# Attempting to recreate George's suggested visualization (picture on my phone)

# Trying to fill 'start_of_reign' fully so that sorts work correctly
# TODO: Add this functionality to the cleanup script
unique_comp = complexes['pyramid_complex'].unique()
temp = df

for comp in unique_comp:
    start = temp[temp['pyramid_complex'] == comp]['start_of_reign'].max()
    temp[temp['pyramid_complex'] == comp]['start_of_reign'] = temp[temp['pyramid_complex'] == comp]['start_of_reign'].replace(np.nan, start)

temp.loc[temp['pyramid_complex'] == 'Sneferu 3', 'start_of_reign'] = 2574   # This had to be done to get it in the correct order (value was missing)
temp.dropna(subset='height', inplace=True)
temp.sort_values(by='start_of_reign', ascending=False, inplace=True)

# Getting the height column to be numeric
def average_of_two(val):
    if isinstance(val, int) or isinstance(val, float) or pd.isna(val): return val

    if ',' in val: return 72    # Temporary: Deals with that one weird value

    nums = val.split('-')
    if len(nums) == 1: return float(nums[0])
    return (float(nums[0]) + float(nums[1])) / 2

temp['height'] = temp['height'].map(average_of_two).astype(float)
tl = temp[['pyramid_complex', 'pyramid_owner', 'start_of_reign', 'end_of_reign', 'length_of_reign', 'height', 'royal_status', 'relationship_to_king']]
# Omit Khentkaus I (Queen, not at a King's complex)
tl = tl.drop(tl[tl['pyramid_complex'] == 'Khentkaus I'].index)


# Plotly stuff
grouped_heights_fig = px.bar(tl, 
             x='pyramid_complex', 
             y='height', 
             color='pyramid_owner',
             title='Height of Pyramids At Each Complex By Status',
             labels={
                 'pyramid_complex': 'Pyramid Complex',
                 'height': 'Height (meters)',
                 'royal_status': 'Royal Status'
             },
             barmode='group',
             text='pyramid_owner',
             custom_data=['royal_status', 'pyramid_owner', tl['relationship_to_king'].fillna('Self')])
grouped_heights_fig.update_layout(bargap=0)
grouped_heights_fig.update_traces(
    hovertemplate='<br>'.join([
        'Royal status: %{customdata[0]}',
        'Pyramid Complex: %{x}',
        'Height: %{y} m',
        'Pyramid Owner: %{customdata[1]}',
        'Relationship To King: %{customdata[2]}',
        '<extra></extra>'
    ])
)
#grouped_heights_fig.show()
from plotly import graph_objects as go

def setColor(y):
    if y == "King": return '#636EFA'     # default blue
    elif y == "Queen": return '#EF553B'  # default red

colors = [setColor(y) for y in tl['royal_status'].values]

horizontal = go.Figure(go.Bar(
    x = tl.loc[:,["pyramid_complex", "pyramid_owner"]].T.values, 
    y = tl["height"].values, 
    marker={"color": colors},
    name = "King"
    )
)
horizontal.add_trace(
            go.Scatter(
            x=["Sneferu 1"],
            y=[0],
            name="Queen",
        ))
horizontal.update_layout(
    title = "Height of Pyramids In Each Complex",
    xaxis = dict(title = "Pyramid Owner (Grouped By Complex)"),
    yaxis = dict(title = "Height (meters)"),
    showlegend = True
)
horizontal.show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [6]:
complex_heights = tl.groupby('pyramid_complex', sort=False, as_index=False)['height'].sum()

line_heights_fig = px.line(
    complex_heights,
    x='pyramid_complex',
    y='height',
    title='Total Height of All Pyramids At Each Complex',
    labels={
        'pyramid_complex': 'Pyramid Complex',
        'height': 'Aggregated Height (meters)'
    }
)
line_heights_fig.update_traces(
    hovertemplate='<br>'.join([
        'Pyramid Complex: %{x}',
        'Aggregated Height: %{y} m'
    ])
)
line_heights_fig.show()

In [7]:
# Exporting to HTML
from jinja2 import Template

output_html_path = r'../docs/index.html'
input_template_path = r'../templates/template.html'

plotly_jinja_data = {
    'grouped_heights':grouped_heights_fig.to_html(full_html=False),
    'dynasty_count':dynasty_count_fig.to_html(full_html=False),
    'confirmed_count':confirmed_count_fig.to_html(full_html=False),
    'line_heights':line_heights_fig.to_html(full_html=False)
}

with open(output_html_path, 'w', encoding='utf-8') as out:
    with open(input_template_path) as template:
        j2_template = Template(template.read())
        out.write(j2_template.render(plotly_jinja_data))

In [8]:
import plotly.graph_objects as go

com_tl = tl.copy()

# Eliminate Sneferu's complex division
sneferu_end = com_tl[com_tl['pyramid_complex'] == "Sneferu 2"].end_of_reign.item()
sneferu_length = com_tl[com_tl['pyramid_complex'] == "Sneferu 1"].start_of_reign.item() - sneferu_end
com_tl.loc[com_tl['pyramid_complex'] == "Sneferu 1", ['end_of_reign']] = sneferu_end
com_tl.loc[com_tl['pyramid_complex'] == "Sneferu 1", ['length_of_reign']] = sneferu_length
com_tl['pyramid_complex'] = com_tl['pyramid_complex'].replace(r'Sneferu *[0-9]', 'Sneferu', regex=True)

# Get the groups with relevant data
com_heights = com_tl.groupby(['pyramid_complex'], sort=False, as_index=False).agg({
    'start_of_reign': 'first',
    'end_of_reign': 'first',
    'length_of_reign': 'first',
    'height': 'sum'
})
com_heights['start_of_reign'] = -com_heights['start_of_reign']
com_heights['end_of_reign'] = -com_heights['end_of_reign']


# Select random colors for each complex
# String below copied from the output of an error, which listed valid CSS colors
colors = 'aliceblue, antiquewhite, aqua, aquamarine, azure, beige, bisque, black, blanchedalmond, blue, blueviolet, brown, burlywood, cadetblue, chartreuse, chocolate, coral, cornflowerblue, cornsilk, crimson, cyan, darkblue, darkcyan, darkgoldenrod, darkgray, darkgrey, darkgreen, darkkhaki, darkmagenta, darkolivegreen, darkorange, darkorchid, darkred, darksalmon, darkseagreen, darkslateblue, darkslategray, darkslategrey, darkturquoise, darkviolet, deeppink, deepskyblue, dimgray, dimgrey, dodgerblue, firebrick, floralwhite, forestgreen, fuchsia, gainsboro, ghostwhite, gold, goldenrod, gray, grey, green, greenyellow, honeydew, hotpink, indianred, indigo, ivory, khaki, lavender, lavenderblush, lawngreen, lemonchiffon, lightblue, lightcoral, lightcyan, lightgoldenrodyellow, lightgray, lightgrey, lightgreen, lightpink, lightsalmon, lightseagreen, lightskyblue, lightslategray, lightslategrey, lightsteelblue, lightyellow, lime, limegreen, linen, magenta, maroon, mediumaquamarine, mediumblue, mediumorchid, mediumpurple, mediumseagreen, mediumslateblue, mediumspringgreen, mediumturquoise, mediumvioletred, midnightblue, mintcream, mistyrose, moccasin, navajowhite, navy, oldlace, olive, olivedrab, orange, orangered, orchid, palegoldenrod, palegreen, paleturquoise, palevioletred, papayawhip, peachpuff, peru, pink, plum, powderblue, purple, red, rosybrown, royalblue, rebeccapurple, saddlebrown, salmon, sandybrown, seagreen, seashell, sienna, silver, skyblue, slateblue, slategray, slategrey, snow, springgreen, steelblue, tan, teal, thistle, tomato, turquoise, violet, wheat, white, whitesmoke, yellow, yellowgreen'.split(', ')
choices = np.random.default_rng().choice(len(colors), len(com_heights), replace=False)

# Create figure
figure = go.Figure()

# Add each bar individually as a trace
for complex, start, end, reign, height in com_heights.values:
    figure.add_trace(
        go.Bar(
            x=[start],
            y=[height],
            width=[reign],
            text=[complex],
            name=complex,
            offset=0.5,
            customdata=[complex, start, end, reign, height],
            hovertemplate='<br>'.join([
                'Pyramid Complex: %{customdata[0]}',
                'Reign: %{customdata[1]} - %{customdata[2]} BCE',
                'Length of Reign: %{customdata[3]} Years',
                'Agg. Height: %{customdata[4]} m',
                '<extra></extra>'
            ])
        )
    )
figure.update_xaxes(
    title_text='Time (Years BCE)',
    minor={'showgrid': True, 'dtick': 10, 'tick0': 0.1})
figure.update_yaxes(title_text='Aggregated Height (Meters)')
figure.update_layout(
    autosize=False,
    width=1000,
    height=800,
    title={'text': 'Total Height of All Pyramids At Each Complex'},
    xaxis={
        'tickvals': [i for i in range(-2700, -2100, 25)],
        'tickmode': 'array',
        'showticklabels': True,
        'ticks': 'outside',
        'ticklen': 5
    }
)
figure.show()

# NOTE: They want to see Ibi on this timeline, however his start and end are not listed, so
# he cannot be placed anywhere

# NOTE: Nebka also missing because there is no height listed for his pyramid (decided we won't include him)

In [9]:
queens = temp[temp['royal_status'] == 'Queen']
queen_data = queens[['pyramid_owner', 'dynasty', 'royal_status', 'daughter_of', 'royal_mother_title', 'likely_wife', 'wife_title', 'vizier', 'regent', 'relationship_to_king', 'height']]
queen_data['dynasty'] = queen_data['dynasty'].astype(int)

by_dyn = queen_data.groupby('dynasty', as_index=False)

'''fig_3d = px.scatter_3d(
    queen_data,
    x='vizier',
    y='likely_wife',
    z='regent',
    color='royal_mother_title'
)'''

# FACET is the word you keep looking for

vizier_dyn = queen_data.groupby('dynasty', as_index=False)['vizier'].value_counts()
fig = px.bar(
    vizier_dyn,
    x='dynasty',
    y='count',
    color='vizier',
    color_discrete_sequence=['red', 'blue'],
    barmode='stack',
    labels={
        'dynasty': 'Dynasty',
        'count': 'Total',
        'vizier': 'Vizier?'
    },
    title='Vizier Queen Pyramids By Dynasty'
)

fig.show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [10]:
royal_mother_title_dyn = queen_data.groupby('dynasty', as_index=False)['royal_mother_title'].value_counts()
fig = px.bar(
    royal_mother_title_dyn,
    x='dynasty',
    y='count',
    color='royal_mother_title',
    color_discrete_sequence=['red', 'blue'],
    barmode='stack',
    labels={
        'dynasty': 'Dynasty',
        'count': 'Total',
        'royal_mother_title': 'Royal Mother?'
    },
    title='Royal Mother Queen Pyramids By Dynasty'
)
fig.show()

In [11]:
likely_wife_dyn = queen_data.groupby('dynasty', as_index=False)['likely_wife'].value_counts()
fig = px.bar(
    likely_wife_dyn,
    x='dynasty',
    y='count',
    color='likely_wife',
    barmode='stack',
    labels={
        'dynasty': 'Dynasty',
        'count': 'Total',
        'likely_wife': 'Likely Wife?'
    },
    title='Wife Queen Pyramids By Dynasty'
)
fig.show()



In [12]:
regent_dyn = queen_data.groupby('dynasty', as_index=False)['regent'].value_counts()
fig = px.bar(
    regent_dyn,
    x='dynasty',
    y='count',
    color='regent',
    color_discrete_sequence=['red', 'blue'],
    barmode='stack',
    labels={
        'dynasty': 'Dynasty',
        'count': 'Total',
        'regent': 'Regent?'
    },
    title='Regent Queen Pyramids By Dynasty'
)

fig.show()

In [13]:
# Suggestion from meeting:
# Queen scatterplot representing the above with shapes, colors, outlines of shapes, etc.

# NOTE: The 3D scatterplot probably isn't ideal for a paper, can be unclear where exactly
# certain points are positioned. Try encoding all of the information in a 2d representation.
fig = px.scatter_3d(
    queen_data,
    x='dynasty',
    y='height',
    z='royal_mother_title',
    symbol='likely_wife',
    color='regent',
    symbol_map={True: 'cross', False: 'square'}
)
fig.show()

In [14]:
# Reshape queen data from wide to long (Binary categories get put into a new column, each category applied to a specific queen given a row, with the status of that category in another column)

melted_queens = queen_data.melt(ignore_index=False, id_vars=['dynasty', 'height', 'pyramid_owner', 'relationship_to_king', 'daughter_of'], value_vars=['vizier', 'regent', 'royal_mother_title', 'likely_wife', 'wife_title']).reset_index()

In [15]:
# Strip scatter plot version 1

melted_truth = melted_queens[melted_queens['value'] == True]

fig = px.strip(
    melted_truth,
    x='dynasty',
    y='height',
    color='variable'
)
fig.show()

In [16]:
# Strip catter plot version 2 (I think this is the way to go)

fig = px.strip(
    melted_truth,
    x='dynasty',
    y='height',
    #color='value',
    #color_discrete_sequence=['red', 'blue'],
    color='pyramid_owner',
    facet_col='variable',
    stripmode='overlay',
    labels={
        'dynasty': 'Dynasty',
        'height': 'Height of Pyramid (meters)',
        'variable': 'Status',
        'value': 'Has Status?'
    },
    title='Status of Queens with Pyramids By Dynasty',
    custom_data=['pyramid_owner', 'relationship_to_king', melted_truth['daughter_of'].fillna('NA'), 'height'],
)
fig.update_traces(
    hovertemplate='<br>'.join([
        'Queen: %{customdata[0]}',
        'Relationship to King: %{customdata[1]}',
        'Daughter of: %{customdata[2]}',
        'Pyramid Height: %{customdata[3]} m'
        '<extra></extra>'
    ]))
fig.show()