In [85]:
import pandas as pd

In [86]:
df = pd.read_csv('tech_websites_font_manual.csv')
df

Unnamed: 0,company,website,fonts
0,youtube,https://youtube.com,'Roboto;Arial;sans-serif'
1,google,https://google.com,'arial;sans-serif'
2,gmail,https://gmail.com,arial;sans-serif;Product Sans'
3,google drive,https://drive.google.com,'arial;sans-serif'
4,twitter,https://twitter.com,'TwitterChirp;-apple-system;BlinkMacSystemFon...
5,microsoft,https://microsoft.com,'Segoe UI;SegoeUI;Helvetica Neue;Helvetica;Ar...
6,amazon,https://amazon.com,'Arial;sans-serif'
7,facebook,https://facebook.com,'system-ui;-apple-system;BlinkMacSystemFont;....
8,instagram,https://instagram.com,'-apple-system;BlinkMacSystemFont;Segoe UI;Ro...
9,messenger,https://messenger.com,'Helvetica;Arial;sans-serif'


In [87]:
# algo
# rank
# [a,b,c] ...
# total rank = 0.5 ^ company1 rank + ...
def totally_scientific_ranking(company_font_dict):
    seen_font = {}
    for company, fonts in company_font_dict.items():

        for index, font in enumerate(fonts):
            if font in seen_font:
                current_rank = seen_font.get(font)
                new_rank = current_rank + pow(0.5, index)
                seen_font[font] = new_rank
            else:
                seen_font[font] = pow(0.5, index)
    return seen_font

In [88]:
def test_case1():
    input = {'yt': ['roboto', 'arial', 'sans-serif'],
             'google': ['arial', 'sans-serif']
            }
    expect = {'roboto': 1,
    'arial': 1.5,
    'sans-serif': 0.75}

    actual = totally_scientific_ranking(input)
    assert actual == expect, 'is not good'
test_case1()

In [89]:
# preprocess
# lowercase, list of strings
# new column 'formatted fonts'

# this is a series
font_series = df['fonts']
# font_df.head()
font_series = font_series.apply(lambda x: x.strip().replace('\'','').lower().split(';'))
font_series


0                           [roboto, arial, sans-serif]
1                                   [arial, sans-serif]
2                     [arial, sans-serif, product sans]
3                                   [arial, sans-serif]
4     [twitterchirp, -apple-system, blinkmacsystemfo...
5     [segoe ui, segoeui, helvetica neue, helvetica,...
6                                   [arial, sans-serif]
7     [system-ui, -apple-system, blinkmacsystemfont,...
8     [-apple-system, blinkmacsystemfont, segoe ui, ...
9                        [helvetica, arial, sans-serif]
10                       [helvetica, arial, sans-serif]
11    [inter, roobert, helvetica neue, helvetica, ar...
12    [proximanova, arial, tahoma, pingfangsc, sans-...
13    [sf pro text, sf pro icons, helvetica neue, he...
14                     [almaden sans, helvetica, arial]
15    [cash-market-rounded, helvetica neue, helvetic...
16    [netflix sans, helvetica neue, segoe ui, robot...
17    [gg sans, noto sans, helvetica neue, helve

In [90]:
df['formatted fonts'] = font_series

In [91]:
df

Unnamed: 0,company,website,fonts,formatted fonts
0,youtube,https://youtube.com,'Roboto;Arial;sans-serif',"[roboto, arial, sans-serif]"
1,google,https://google.com,'arial;sans-serif',"[arial, sans-serif]"
2,gmail,https://gmail.com,arial;sans-serif;Product Sans',"[arial, sans-serif, product sans]"
3,google drive,https://drive.google.com,'arial;sans-serif',"[arial, sans-serif]"
4,twitter,https://twitter.com,'TwitterChirp;-apple-system;BlinkMacSystemFon...,"[twitterchirp, -apple-system, blinkmacsystemfo..."
5,microsoft,https://microsoft.com,'Segoe UI;SegoeUI;Helvetica Neue;Helvetica;Ar...,"[segoe ui, segoeui, helvetica neue, helvetica,..."
6,amazon,https://amazon.com,'Arial;sans-serif',"[arial, sans-serif]"
7,facebook,https://facebook.com,'system-ui;-apple-system;BlinkMacSystemFont;....,"[system-ui, -apple-system, blinkmacsystemfont,..."
8,instagram,https://instagram.com,'-apple-system;BlinkMacSystemFont;Segoe UI;Ro...,"[-apple-system, blinkmacsystemfont, segoe ui, ..."
9,messenger,https://messenger.com,'Helvetica;Arial;sans-serif',"[helvetica, arial, sans-serif]"


In [92]:
company_font_dict = {}
for ind, row in df.iterrows():
    # print(row['company'])
    # print(row['formatted fonts'])
    company_font_dict[row['company']] = row['formatted fonts']
print(company_font_dict)
    

{'youtube': ['roboto', 'arial', 'sans-serif'], 'google': ['arial', 'sans-serif'], 'gmail': ['arial', 'sans-serif', 'product sans'], 'google drive': ['arial', 'sans-serif'], 'twitter': ['twitterchirp', '-apple-system', 'blinkmacsystemfont', 'segoe ui', 'roboto', 'helvetica', 'arial', 'sans-serif'], 'microsoft': ['segoe ui', 'segoeui', 'helvetica neue', 'helvetica', 'arial', 'sans-serif'], 'amazon': ['arial', 'sans-serif'], 'facebook': ['system-ui', '-apple-system', 'blinkmacsystemfont', '.sfnstext-regular', 'sans-serif'], 'instagram': ['-apple-system', 'blinkmacsystemfont', 'segoe ui', 'roboto', 'helvetica', 'arial', 'sans-serif'], 'messenger': ['helvetica', 'arial', 'sans-serif'], 'whatsapp': ['helvetica', 'arial', 'sans-serif'], 'twitch': ['inter', 'roobert', 'helvetica neue', 'helvetica', 'arial', 'sans-serif'], 'tiktok': ['proximanova', 'arial', 'tahoma', 'pingfangsc', 'sans-serif'], 'apple': ['sf pro text', 'sf pro icons', 'helvetica neue', 'helvetica', 'arial', 'sans-serif'], 'zoo

In [93]:
# process
font_ranking = totally_scientific_ranking(company_font_dict)
font_ranking = dict(sorted(font_ranking.items(), key=lambda item: item[1], reverse=True))
font_ranking

{'arial': 7.078125,
 'helvetica': 3.90625,
 'sans-serif': 3.3828125,
 'helvetica neue': 2.625,
 '-apple-system': 2.0,
 'segoe ui': 1.625,
 'roboto': 1.3125,
 'system-ui': 1.25,
 'twitterchirp': 1.0,
 'blinkmacsystemfont': 1.0,
 'inter': 1.0,
 'proximanova': 1.0,
 'sf pro text': 1.0,
 'almaden sans': 1.0,
 'cash-market-rounded': 1.0,
 'netflix sans': 1.0,
 'gg sans': 1.0,
 'shopifysans': 1.0,
 'ubermove': 1.0,
 'paypalopen-regular': 1.0,
 'segoeui': 0.5,
 'roobert': 0.5,
 'sf pro icons': 0.5,
 'noto sans': 0.5,
 'ubermovetext': 0.5,
 'product sans': 0.25,
 'tahoma': 0.25,
 '.sfnstext-regular': 0.125,
 'pingfangsc': 0.125,
 'lucida grande': 0.125,
 'ubuntu': 0.0625}