In [2]:
import pandas as pd
import numpy as np
from scipy.stats import pearsonr
import matplotlib.pyplot as plt

In [111]:
# Load in just the columns we need from Maria's big dataset
df = pd.read_csv('mariaData.csv')
df = df[['ProlificParticipantID', 'font', 'logBouma', 'rsvpReadingSpeed']]
df['logRSVP'] = np.log10(df['rsvpReadingSpeed'])

In [112]:
df.head()

Unnamed: 0,ProlificParticipantID,font,logBouma,rsvpReadingSpeed,logRSVP
0,5563984afdf99b672b5749b6,Omfug,-0.129397,636.4874,2.80379
1,5563984afdf99b672b5749b6,TimesNewRoman,-0.770351,462.581968,2.665189
2,5563984afdf99b672b5749b6,Zapfino,,370.415773,2.568689
3,5563984afdf99b672b5749b6,Omfug,0.062502,264.183655,2.421906
4,5563984afdf99b672b5749b6,TimesNewRoman,-0.602134,423.924643,2.627289


In [113]:
# Create separate columns for logBouma and logRSVP for each font
df_wide = (
    df.pivot_table(
        index=['ProlificParticipantID'],
        columns='font',
        values=['logBouma','logRSVP']
    ).reset_index()
)

In [114]:
df_wide.head()

Unnamed: 0_level_0,ProlificParticipantID,logBouma,logBouma,logBouma,logBouma,logBouma,logBouma,logBouma,logBouma,logBouma,...,logRSVP,logRSVP,logRSVP,logRSVP,logRSVP,logRSVP,logRSVP,logRSVP,logRSVP,logRSVP
font,Unnamed: 1_level_1,Agoesa,Arial,Baskerville,Caslon,Courier,Edwardian,Extenda,Frutiger,Georgia,...,Omfug,Optimistic,ProximaNova,Rollerscript,Sabon,ScarletWood,TheSans,TimesNewRoman,Tiny,Zapfino
0,5563984afdf99b672b5749b6,,,,,,,,,,...,2.612848,,,,,,,2.646239,,2.436402
1,55b2d3f2fdf99b525bc839aa,,,,,,,,,,...,2.823243,,,,,,,2.948219,,2.563493
2,55ec49b67480920010aaa4e8,,,,,,,,,,...,2.513623,,,,,,,2.807362,,2.603427
3,5608a73fd7def20012b7389d,,-0.968167,,,,,-0.611771,,,...,,,,,,,,,,
4,5658000ca9872d0011e1ba26,,,,,,,,-1.017779,,...,,,,,2.991738,,,,,


In [115]:
df_wide.to_csv('crowdingRSVP_fonts.csv')

In [116]:
def correlation_fonts(df, font1, font2):
    '''
    Takes in a df and two fonts and returns the Pearson R and p-value
    with font1 logBouma and font2 logRSVP
    '''
    columns = [('ProlificParticipantID', ''),
               ('logBouma', font1),
               ('logRSVP', font2)]
    
    df_fonts = df.loc[:, columns]
    df_fonts = df_fonts.dropna()
    x = df_fonts[('logBouma', font1)]
    y = df_fonts[('logRSVP', font2)]
    
    return pearsonr(x, y)[0].round(4), pearsonr(x, y)[1].round(4)

In [117]:
correlation_fonts(df_wide, 'Arial', 'Arial')

(-0.4421, 0.0163)

In [118]:
# Run function for same-font pairs
single_fonts = [
    'Agoesa', 'Arial', 'Baskerville', 'Caslon', 'Courier', 'Edwardian',
    'Extenda', 'Frutiger', 'Georgia', 'HautRelief', 'LeMonde', 'Letraflex',
    'LiebeLotte', 'Museo', 'Omfug', 'Optimistic', 'ProximaNova',
    'Rollerscript', 'Sabon', 'ScarletWood', 'TheSans', 'TimesNewRoman',
    'Tiny', 'Zapfino'
]

for font in single_fonts:
    r, p = correlation_fonts(df_wide, font, font)
    print(f"{font} x {font}", (r, p))

Agoesa x Agoesa (-0.6284, 0.0003)
Arial x Arial (-0.4421, 0.0163)
Baskerville x Baskerville (-0.2957, 0.0756)
Caslon x Caslon (-0.3728, 0.0356)
Courier x Courier (-0.6098, 0.0002)
Edwardian x Edwardian (-0.3762, 0.0259)
Extenda x Extenda (-0.5316, 0.0036)
Frutiger x Frutiger (-0.4248, 0.0172)
Georgia x Georgia (-0.4767, 0.0033)
HautRelief x HautRelief (-0.4723, 0.0073)
LeMonde x LeMonde (-0.1668, 0.4057)
Letraflex x Letraflex (-0.6112, 0.0005)
LiebeLotte x LiebeLotte (-0.3981, 0.0218)
Museo x Museo (-0.5478, 0.0004)
Omfug x Omfug (-0.4319, 0.0024)
Optimistic x Optimistic (-0.4055, 0.0157)
ProximaNova x ProximaNova (-0.4179, 0.0376)
Rollerscript x Rollerscript (-0.0655, 0.7042)
Sabon x Sabon (-0.3387, 0.0623)
ScarletWood x ScarletWood (-0.218, 0.2016)
TheSans x TheSans (-0.3422, 0.0476)
TimesNewRoman x TimesNewRoman (-0.4126, 0.0032)
Tiny x Tiny (-0.2149, 0.215)
Zapfino x Zapfino (-0.1853, 0.2589)


In [119]:
# Run function for different-font pairs
font_pairs = [
    ('Omfug', 'TimesNewRoman'),
    ('TimesNewRoman', 'Zapfino'),
    ('Zapfino', 'Omfug'),
    ('Arial', 'Extenda'),
    ('Extenda', 'Letraflex'),
    ('Letraflex', 'Arial'),
    ('Frutiger', 'HautRelief'),
    ('HautRelief', 'Frutiger'),
    ('Sabon', 'Frutiger'),
    ('Optimistic', 'TheSans'),
    ('TheSans', 'Tiny'),
    ('Tiny', 'Optimistic'),
    ('Edwardian', 'Georgia'),
    ('Georgia', 'Museo'),
    ('Museo', 'Edwardian'),
    ('Caslon', 'LiebeLotte'),
    ('Courier', 'Caslon'),
    ('LiebeLotte', 'Courier'),
    ('Agoesa', 'LeMonde'),
    ('LeMonde', 'ProximaNova'),
    ('ProximaNova', 'Agoesa'),
    ('Baskerville', 'Rollerscript'),
    ('Rollerscript', 'ScarletWood'),
    ('ScarletWood', 'Baskerville'),
]

for f1, f2 in font_pairs:
    for a, b in [(f1, f2), (f2, f1)]:
        r, p = correlation_fonts(df_wide, a, b)
        print(f"{a} x {b}", (r, p))

Omfug x TimesNewRoman (-0.3599, 0.013)
TimesNewRoman x Omfug (-0.368, 0.0093)
TimesNewRoman x Zapfino (-0.4206, 0.0026)
Zapfino x TimesNewRoman (-0.2122, 0.1946)
Zapfino x Omfug (-0.3516, 0.0282)
Omfug x Zapfino (-0.3584, 0.0134)
Arial x Extenda (-0.4129, 0.029)
Extenda x Arial (-0.4268, 0.021)
Extenda x Letraflex (-0.5275, 0.0039)
Letraflex x Extenda (-0.561, 0.0019)
Letraflex x Arial (-0.5903, 0.0009)
Arial x Letraflex (-0.3859, 0.0426)
Frutiger x HautRelief (-0.5662, 0.0009)
HautRelief x Frutiger (-0.2468, 0.1807)
HautRelief x Frutiger (-0.2468, 0.1807)
Frutiger x HautRelief (-0.5662, 0.0009)
Sabon x Frutiger (-0.3939, 0.0284)
Frutiger x Sabon (-0.3129, 0.0865)
Optimistic x TheSans (-0.3939, 0.0192)
TheSans x Optimistic (-0.2958, 0.0894)
TheSans x Tiny (-0.4015, 0.0168)
Tiny x TheSans (-0.1172, 0.5026)
Tiny x Optimistic (-0.1248, 0.4749)
Optimistic x Tiny (-0.4327, 0.0094)
Edwardian x Georgia (-0.396, 0.0185)
Georgia x Edwardian (-0.582, 0.0002)
Georgia x Museo (-0.5944, 0.0001)
Mus