In [1]:
#Packages
print('Loading Packages')
import numpy as np
import pandas as pd
import pickle
from functools import reduce
import plotly.express as px


Loading Packages


In [2]:
#Load Data
volume_topics = pd.read_csv('../temporary/topic_weights.csv')
volume_categories = pd.read_csv('../temporary/volumes_scores.csv')
volumes = pd.merge(volume_topics, volume_categories, on = 'HTID')

In [3]:
volumes

Unnamed: 0,HTID,1,2,3,4,5,6,7,8,9,...,progress_regression,Year,optimism_percentile,industry_2_percentile,industry_3_percentile,optimistic_percentile,progress_percentile,pessimism_percentile,regression_percentile,progress_regression_percentile
0,uc1.b5568131,0.335310,0.000703,3.859994e-05,0.070415,0.094437,1.841183e-05,0.001295,0.000024,0.000154,...,0.0,1898.0,0.500003,0.342679,0.306504,0.565038,0.565038,0.565038,0.565038,0.500003
1,uc1.$b135547,0.002840,0.001595,3.160480e-05,0.001070,0.071715,1.507521e-05,0.000041,0.000020,0.067670,...,0.0,1832.0,0.500003,0.530289,0.573102,0.933051,0.933051,0.933051,0.933051,0.500003
2,hvd.32044106314859,0.000017,0.000032,1.514681e-05,0.000024,0.000045,7.224896e-06,0.000019,0.000009,0.000060,...,0.0,1896.0,0.500003,0.011956,0.011482,0.167479,0.167479,0.167479,0.167479,0.500003
3,uc1.$b29323,0.000014,0.000003,3.943318e-03,0.013454,0.000005,3.564930e-02,0.000039,0.000075,0.000006,...,0.0,1825.0,0.500003,0.137685,0.129903,0.410110,0.410110,0.410110,0.410110,0.500003
4,mdp.39015076816662,0.000002,0.000004,9.407721e-04,0.000003,0.000006,9.914233e-07,0.000003,0.020569,0.033567,...,0.0,1882.0,0.500003,0.112095,0.140451,0.969351,0.969351,0.969351,0.969351,0.500003
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
162678,chi.090018182,0.000023,0.000678,1.974364e-05,0.000191,0.064386,1.686437e-04,0.024865,0.000490,0.148955,...,0.0,1843.0,0.500003,0.812009,0.801820,0.958213,0.958213,0.958213,0.958213,0.500003
162679,hvd.32044089522510,0.020714,0.000236,1.241825e-01,0.000008,0.000014,2.231040e-06,0.000044,0.016562,0.068709,...,0.0,1874.0,0.500003,0.367033,0.402611,0.803292,0.803292,0.803292,0.803292,0.500003
162680,uc1.31175035187601,0.000134,0.019152,1.454623e-05,0.000258,0.001803,6.938422e-06,0.000136,0.000009,0.000644,...,0.0,1808.0,0.500003,0.609492,0.581733,0.909960,0.909960,0.909960,0.909960,0.500003
162681,aeu.ark+=13960=t09w1n868,0.000068,0.008213,5.900777e-05,0.000095,0.005885,2.814618e-05,0.010069,0.000037,0.027360,...,0.0,1857.0,0.500003,0.919260,0.919027,0.889460,0.889460,0.889460,0.889460,0.500003


In [4]:
#Global options
half_century = True

if half_century is True:
    years = []
    for year in range(1550, 1891, 50):
        years.append(year)
else:
    years = []
    for year in range(1550, 1891):
        years.append(year)

In [20]:
#Functions
def ternary_plots(data, color, path, legend_title, years = years, grayscale = False, size = None, decreasing_scale = False, size_max = 13, scale = 1):
    #'data' needs to be a dictionary of dataframes, with volumes as rows, and columns 'Religion', 'Political Economy', and 'Science'
    #'color': which variable color of dots will be based on
    #'path': directory to save output figures
    #'years': a list of years you want figures for
    #'grayscale': True if you want grayscale, will reverse color scale as well
    #'size': variable that determines size of dots, None by default
    #'increasing_scale': If 'True', size of dots will be bigger with bigger values of the 'size' variable
    
    
    for year in years:
        df = data[(data['Year'] >= (year-10)) & (data['Year'] <= (year+10))]
        print(year)

        if decreasing_scale is True:
            df['size_percentile_r'] = 1 - df['industry_3_percentile']
            size = 'size_percentile_r'


        fig = px.scatter_ternary(df, a = 'Religion', b = 'Political Economy', c = 'Science',
                                 color = str(color),
                                 size = df[str(size)]**scale,
                                 size_max=size_max,
                                 range_color=[0,1])
        
        fig.update_layout(title_text = str(year),
                        title_font_size=30,
                        font_size=20,
                        margin_l = 110,
                        legend_title_side = 'top',
                        coloraxis_colorbar_title_text = legend_title,
                        coloraxis_colorbar_title_side = 'top'
                        )
        
        fig.update_ternaries(bgcolor="white",
                        aaxis_linecolor="black",
                        baxis_linecolor="black",
                        caxis_linecolor="black"
                        )
        
        if grayscale is True:
            fig.update_layout(coloraxis = {'colorscale':'gray'})

        fig.update_traces(
            showlegend = False
        )

        if year == 1850:   
            fig.write_image(path + str(year) + '.png', width=900) #included because wider format needed for color scale
        
        else:
            fig.update(layout_coloraxis_showscale=False) #removes colorbar
            fig.write_image(path + str(year) + '.png') #only works with kaleido 0.1.0 for some reason, use 'conda install python-kaleido=0.1.0post1' on PC, also uses plotly 5.10.0
        
        # # Uncomment for no legend at all
        # fig.update(layout_coloraxis_showscale=False) #removes colorbar
        # fig.write_image(path + str(year) + '.png') #only works with kaleido 0.1.0 for some reason, use 'conda install python-kaleido=0.1.0post1' on PC, also uses plotly 5.10.0


In [6]:
volumes['13_percentile'] = volumes['13'].rank(pct=True)

In [7]:
volumes[['13', '13_percentile']]

Unnamed: 0,13,13_percentile
0,0.000025,0.575675
1,0.000021,0.552052
2,0.000010,0.445511
3,0.000001,0.017826
4,0.809524,0.998187
...,...,...
162678,0.000013,0.489959
162679,0.063110,0.970765
162680,0.000010,0.437925
162681,0.000039,0.624988


In [8]:
volumes

Unnamed: 0,HTID,1,2,3,4,5,6,7,8,9,...,Year,optimism_percentile,industry_2_percentile,industry_3_percentile,optimistic_percentile,progress_percentile,pessimism_percentile,regression_percentile,progress_regression_percentile,13_percentile
0,uc1.b5568131,0.335310,0.000703,3.859994e-05,0.070415,0.094437,1.841183e-05,0.001295,0.000024,0.000154,...,1898.0,0.500003,0.342679,0.306504,0.565038,0.565038,0.565038,0.565038,0.500003,0.575675
1,uc1.$b135547,0.002840,0.001595,3.160480e-05,0.001070,0.071715,1.507521e-05,0.000041,0.000020,0.067670,...,1832.0,0.500003,0.530289,0.573102,0.933051,0.933051,0.933051,0.933051,0.500003,0.552052
2,hvd.32044106314859,0.000017,0.000032,1.514681e-05,0.000024,0.000045,7.224896e-06,0.000019,0.000009,0.000060,...,1896.0,0.500003,0.011956,0.011482,0.167479,0.167479,0.167479,0.167479,0.500003,0.445511
3,uc1.$b29323,0.000014,0.000003,3.943318e-03,0.013454,0.000005,3.564930e-02,0.000039,0.000075,0.000006,...,1825.0,0.500003,0.137685,0.129903,0.410110,0.410110,0.410110,0.410110,0.500003,0.017826
4,mdp.39015076816662,0.000002,0.000004,9.407721e-04,0.000003,0.000006,9.914233e-07,0.000003,0.020569,0.033567,...,1882.0,0.500003,0.112095,0.140451,0.969351,0.969351,0.969351,0.969351,0.500003,0.998187
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
162678,chi.090018182,0.000023,0.000678,1.974364e-05,0.000191,0.064386,1.686437e-04,0.024865,0.000490,0.148955,...,1843.0,0.500003,0.812009,0.801820,0.958213,0.958213,0.958213,0.958213,0.500003,0.489959
162679,hvd.32044089522510,0.020714,0.000236,1.241825e-01,0.000008,0.000014,2.231040e-06,0.000044,0.016562,0.068709,...,1874.0,0.500003,0.367033,0.402611,0.803292,0.803292,0.803292,0.803292,0.500003,0.970765
162680,uc1.31175035187601,0.000134,0.019152,1.454623e-05,0.000258,0.001803,6.938422e-06,0.000136,0.000009,0.000644,...,1808.0,0.500003,0.609492,0.581733,0.909960,0.909960,0.909960,0.909960,0.500003,0.437925
162681,aeu.ark+=13960=t09w1n868,0.000068,0.008213,5.900777e-05,0.000095,0.005885,2.814618e-05,0.010069,0.000037,0.027360,...,1857.0,0.500003,0.919260,0.919027,0.889460,0.889460,0.889460,0.889460,0.500003,0.624988


In [24]:
print('Topic 13 triangles')
ternary_plots(data = volumes,
              color = 'progress_percentile',
              size = '13_percentile',
              legend_title='Progress (Percentile)',
              path = '../output/volume_triangles/topics/13/')

Topic 13 triangles
1550
1600
1650
1700
1750
1800
1850


In [26]:
volumes['13'].describe()

count    1.626830e+05
mean     1.246965e-02
std      7.938055e-02
min      6.244464e-08
25%      4.385838e-06
50%      1.389835e-05
75%      1.232260e-04
max      9.798988e-01
Name: 13, dtype: float64