In [56]:
import pandas as pd

In [57]:
df = pd.read_csv('tech_websites_font_manual.csv')
df

Unnamed: 0,company,website,fonts
0,youtube,https://youtube.com,'Roboto;Arial;sans-serif'
1,google,https://google.com,'arial;sans-serif'
2,gmail,https://gmail.com,arial;sans-serif;Product Sans'
3,google drive,https://drive.google.com,'arial;sans-serif'
4,twitter,https://twitter.com,'TwitterChirp;-apple-system;BlinkMacSystemFon...
5,microsoft,https://microsoft.com,'Segoe UI;SegoeUI;Helvetica Neue;Helvetica;Ar...
6,amazon,https://amazon.com,'Arial;sans-serif'
7,facebook,https://facebook.com,'system-ui;-apple-system;BlinkMacSystemFont;....
8,instagram,https://instagram.com,'-apple-system;BlinkMacSystemFont;Segoe UI;Ro...
9,messenger,https://messenger.com,'Helvetica;Arial;sans-serif'


In [58]:
def total_count(company_font_dict):
    seen_font = {}
    for company, fonts in company_font_dict.items():

        for index, font in enumerate(fonts):
            if font in seen_font:
                seen_font[font] = 1 + seen_font[font] 
            else:
                seen_font[font] = 1
    return seen_font

In [59]:
def test_case0():
    input = {'yt': ['roboto', 'arial', 'sans-serif'],
             'google': ['arial', 'sans-serif']
            }
    expect = {'roboto': 1,
    'arial': 2,
    'sans-serif': 2}

    actual = total_count(input)
    assert actual == expect, 'is not good'
test_case0()

In [60]:
# algo
# rank
# [a,b,c] ...
# total rank = 0.5 ^ company1 rank + ...
def totally_scientific_ranking(company_font_dict):
    seen_font = {}
    for company, fonts in company_font_dict.items():

        for index, font in enumerate(fonts):
            if font in seen_font:
                current_rank = seen_font.get(font)
                new_rank = current_rank + pow(0.5, index)
                seen_font[font] = new_rank
            else:
                seen_font[font] = pow(0.5, index)
    return seen_font

In [61]:
def test_case1():
    input = {'yt': ['roboto', 'arial', 'sans-serif'],
             'google': ['arial', 'sans-serif']
            }
    expect = {'roboto': 1,
    'arial': 1.5,
    'sans-serif': 0.75}

    actual = totally_scientific_ranking(input)
    assert actual == expect, 'is not good'
test_case1()

In [62]:
# preprocess
# lowercase, list of strings
# new column 'formatted fonts'

# this is a series
font_series = df['fonts']
# font_df.head()
font_series = font_series.apply(lambda x: x.strip().replace('\'','').lower().split(';'))
font_series


0                           [roboto, arial, sans-serif]
1                                   [arial, sans-serif]
2                     [arial, sans-serif, product sans]
3                                   [arial, sans-serif]
4     [twitterchirp, -apple-system, blinkmacsystemfo...
5     [segoe ui, segoeui, helvetica neue, helvetica,...
6                                   [arial, sans-serif]
7     [system-ui, -apple-system, blinkmacsystemfont,...
8     [-apple-system, blinkmacsystemfont, segoe ui, ...
9                        [helvetica, arial, sans-serif]
10                       [helvetica, arial, sans-serif]
11    [inter, roobert, helvetica neue, helvetica, ar...
12    [proximanova, arial, tahoma, pingfangsc, sans-...
13    [sf pro text, sf pro icons, helvetica neue, he...
14                     [almaden sans, helvetica, arial]
15    [cash-market-rounded, helvetica neue, helvetic...
16    [netflix sans, helvetica neue, segoe ui, robot...
17    [gg sans, noto sans, helvetica neue, helve

In [63]:
df['formatted fonts'] = font_series

In [64]:
df

Unnamed: 0,company,website,fonts,formatted fonts
0,youtube,https://youtube.com,'Roboto;Arial;sans-serif',"[roboto, arial, sans-serif]"
1,google,https://google.com,'arial;sans-serif',"[arial, sans-serif]"
2,gmail,https://gmail.com,arial;sans-serif;Product Sans',"[arial, sans-serif, product sans]"
3,google drive,https://drive.google.com,'arial;sans-serif',"[arial, sans-serif]"
4,twitter,https://twitter.com,'TwitterChirp;-apple-system;BlinkMacSystemFon...,"[twitterchirp, -apple-system, blinkmacsystemfo..."
5,microsoft,https://microsoft.com,'Segoe UI;SegoeUI;Helvetica Neue;Helvetica;Ar...,"[segoe ui, segoeui, helvetica neue, helvetica,..."
6,amazon,https://amazon.com,'Arial;sans-serif',"[arial, sans-serif]"
7,facebook,https://facebook.com,'system-ui;-apple-system;BlinkMacSystemFont;....,"[system-ui, -apple-system, blinkmacsystemfont,..."
8,instagram,https://instagram.com,'-apple-system;BlinkMacSystemFont;Segoe UI;Ro...,"[-apple-system, blinkmacsystemfont, segoe ui, ..."
9,messenger,https://messenger.com,'Helvetica;Arial;sans-serif',"[helvetica, arial, sans-serif]"


In [65]:
# convert to dictionary
company_font_dict = {}
for ind, row in df.iterrows():
    company_font_dict[row['company']] = row['formatted fonts']
print(company_font_dict)
    

{'youtube': ['roboto', 'arial', 'sans-serif'], 'google': ['arial', 'sans-serif'], 'gmail': ['arial', 'sans-serif', 'product sans'], 'google drive': ['arial', 'sans-serif'], 'twitter': ['twitterchirp', '-apple-system', 'blinkmacsystemfont', 'segoe ui', 'roboto', 'helvetica', 'arial', 'sans-serif'], 'microsoft': ['segoe ui', 'segoeui', 'helvetica neue', 'helvetica', 'arial', 'sans-serif'], 'amazon': ['arial', 'sans-serif'], 'facebook': ['system-ui', '-apple-system', 'blinkmacsystemfont', '.sfnstext-regular', 'sans-serif'], 'instagram': ['-apple-system', 'blinkmacsystemfont', 'segoe ui', 'roboto', 'helvetica', 'arial', 'sans-serif'], 'messenger': ['helvetica', 'arial', 'sans-serif'], 'whatsapp': ['helvetica', 'arial', 'sans-serif'], 'twitch': ['inter', 'roobert', 'helvetica neue', 'helvetica', 'arial', 'sans-serif'], 'tiktok': ['proximanova', 'arial', 'tahoma', 'pingfangsc', 'sans-serif'], 'apple': ['sf pro text', 'sf pro icons', 'helvetica neue', 'helvetica', 'arial', 'sans-serif'], 'zoo

In [66]:
# ranking process
font_ranking = totally_scientific_ranking(company_font_dict)
font_ranking = dict(sorted(font_ranking.items(), key=lambda item: item[1], reverse=True))
font_ranking
ranking_df = pd.DataFrame(list(font_ranking.items()), columns=["font", "ranking"]) 
ranking_df

Unnamed: 0,font,ranking
0,arial,7.078125
1,helvetica,3.90625
2,sans-serif,3.382812
3,helvetica neue,2.625
4,-apple-system,2.0
5,segoe ui,1.625
6,roboto,1.3125
7,system-ui,1.25
8,twitterchirp,1.0
9,blinkmacsystemfont,1.0


In [67]:
# total process
font_total = total_count(company_font_dict)
font_total = dict(sorted(font_total.items(), key=lambda item: item[1], reverse=True))
font_total

# Add total to new column
ranking_df['frequency'] = ranking_df['font'].apply(lambda x: font_total.get(x))
ranking_df

Unnamed: 0,font,ranking,frequency
0,arial,7.078125,18
1,helvetica,3.90625,12
2,sans-serif,3.382812,20
3,helvetica neue,2.625,8
4,-apple-system,2.0,3
5,segoe ui,1.625,4
6,roboto,1.3125,4
7,system-ui,1.25,2
8,twitterchirp,1.0,1
9,blinkmacsystemfont,1.0,3


# ploty visualizations


In [68]:

# ranking bar
import plotly.express as px
import pandas as pd

fig = px.bar(x=ranking_df['font'], y=ranking_df['ranking'], title="FONTS 'SCIENTIFIC' RANKING")

# fig = px.bar(x=ranking_df['font'][:10], y=ranking_df['ranking'][:10], title="TOP 10 FONTS") # top 10 fonts

# showing the plot
fig.show()


In [69]:
# ranking pie
fig = px.pie(values=ranking_df['ranking'], names=ranking_df['font'], title="FONTS 'SCIENTIFIC' RANKING")
fig.show()


In [70]:
# Total usage bar
ranking_df = ranking_df.sort_values('frequency', ascending=False)
fig = px.bar(x=ranking_df['font'], y=ranking_df['frequency'], title="FONTS FREQUENCY")
# fig = px.bar(x=ranking_df['font'][:10], y=ranking_df['frequency'][:10], title="FONTS FREQUENCY") # top 10
fig.show()
# ranking pie
fig = px.pie(values=ranking_df['frequency'], names=ranking_df['font'], title="FONTS FREQUENCY")
fig.show()

In [78]:
# company top 3 fonts
# company_font_dict
# for key, val in company_font_dict.items():
#     df = pd.DataFrame(val, columns=[key.upper()])
#     print(df.iloc[:3])
#     print("\n")
company_top3_df = pd.DataFrame()
company_top3_df['company'] = df['company']
company_top3_df['1st'] = df['formatted fonts'].apply(lambda x: x[0].upper())
company_top3_df['2nd'] = df['formatted fonts'].apply(lambda x: x[1])
company_top3_df['3rd'] = df['formatted fonts'].apply(lambda x: x[2] if len(x)>=3 else '')
company_top3_df


Unnamed: 0,company,1st,2nd,3rd
0,youtube,ROBOTO,arial,sans-serif
1,google,ARIAL,sans-serif,
2,gmail,ARIAL,sans-serif,product sans
3,google drive,ARIAL,sans-serif,
4,twitter,TWITTERCHIRP,-apple-system,blinkmacsystemfont
5,microsoft,SEGOE UI,segoeui,helvetica neue
6,amazon,ARIAL,sans-serif,
7,facebook,SYSTEM-UI,-apple-system,blinkmacsystemfont
8,instagram,-APPLE-SYSTEM,blinkmacsystemfont,segoe ui
9,messenger,HELVETICA,arial,sans-serif
