# Chrome History
Following process uses an existing Google Extension to export Google Chrome history in JSON format.

In [1]:
import json
history = json.loads(open("history.json").read())
# this is Chrome history from 1/15/2018 ~ 4/15/2018

In [2]:
len(history)

19311

In [3]:
titles = []
for i in range(19311):
    titles.append(history[i]["title"])

### Languages
Categorize titles via regular expression

In [4]:
import re

In [5]:
none = [item for item in titles if len(item)==0]
len(none)

5337

In [6]:
kor = [item for item in titles if re.search(r"[ㄱ-ㅎㅏ-ㅣ가-힣]", item) and len(item)>0]
len(kor)

2977

- I added the range of Chinese characters [一-龥], because Japanese is consisted of Katakana, Hiragana, and Chinese letters.
- Another additions were [ㄱ-ㅎ] and [ㅏ-ㅣ], which are consonants and vowels. Although these letters are grammaticaly meant to be grouped into syllabic blocks, they're also used casually by themselves as abbreviations and slangs in internet.

In [7]:
en = [item for item in titles if re.search(r"^[^가-힣ㄱ-ㅎㅏ-ㅣぁ-んァ-ン一-龥]*$", item) and len(item)>0]
len(en)

10927

- As I checked, the rest are all Japanese contents, but since filtering by Chinese letter can theoretically include Chinese contents as well.

In [8]:
jp = [item for item in titles if re.search(r"^[^가-힣]*$", item) and re.search(r"[ぁ-んァ-ン一-龥]", item) and len(item)>0]
len(jp)

70

In [9]:
others = jp + none
len(others)

5407

en : kor = 10927 : 2977

### Counting
Checking if total number matches the sum

In [10]:
total_len = len(en) + len(kor) + len(others)
total_len == len(titles)

True

In [11]:
total_len - len(titles)

0

### Major services

In [17]:
kor_youtube = [line for line in kor if re.search(r"[Yy]ou[Tt]ube", line)]
len(kor_youtube)

695

In [18]:
en_youtube = [line for line in en if re.search(r"[Yy]ou[Tt]ube", line)]
len(en_youtube)

736

In [19]:
kor_google = [line for line in kor if re.search(r"[Gg]oogle", line)]
len(kor_google)

281

In [20]:
en_google = [line for line in en if re.search(r"[Gg]oogle", line)]
len(en_google)

2396

### What compose Korean sites?

In [21]:
kor_namu = [line for line in kor if re.search(r"나무위키", line)]
len(kor_namu)

697

In [22]:
kor_ppss = [line for line in kor if re.search(r"ㅍㅍㅅㅅ", line)]
len(kor_ppss)

54

In [23]:
kor_insta = [line for line in kor if re.search(r"[Ii]nstagram", line)]
len(kor_insta)

32

In [24]:
kor_naver = [line for line in kor if re.search(r"네이버", line)]
len(kor_naver)

519

In [25]:
# len(kor) == 2975
len(kor_namu) + len(kor_youtube) + len(kor_google) + len(kor_ppss) + len(kor_insta) + len(kor_naver)

2278

In [26]:
# 2278 : total = ?% : 100%
print(str(round((2278 * 100) / len(kor))) + "% of Korean titles is defined.\n" + str(100-round((2278 * 100) / len(kor))) + "% of them is undefined.")

77% of Korean titles is defined.
23% of them is undefined.


### How about English?

In [35]:
# len(en) == 10940
len(en_youtube) + len(en_google) + len(en_insta) + len(en_adobe) + len(en_me)

4236

In [29]:
en_insta = [line for line in en if re.search(r"[Ii]nstagram", line)]
len(en_insta)

95

In [30]:
en_adobe = [line for line in en if re.search(r"[Aa]dobe", line)]
len(en_adobe)

118

In [31]:
# blog or portfolio or personal/professional accounts
en_me = [line for line in en if re.search(r"\b(?:Alice Sun|alicehgsun)\b", line)]
len(en_me)

891

In [32]:
en_class = [line for line in en if re.search(r"[Ww]eek\d", line)]
len(en_class)

70

In [33]:
en_linkedin = [line for line in en if re.search(r"[Ll]inked[Ii]n", line)]
len(en_linkedin)

271

In [34]:
en_naver = [line for line in en if re.search(r"[Nn]aver", line)]
len(en_naver)

2

## Timeline

In [13]:
# total there're 19311 history records
print("Last record is " + str(history[0]["lastVisitTime"]) + "\nFirst record is " + str(history[19310]["lastVisitTime"]))

Last record is 4/15/2018, 9:24:32 PM
First record is 1/15/2018, 9:40:57 PM


In [14]:
april = []
march = []
february = []
january = []
for elt in history:
    match_apr = re.search(r"^4/", elt["lastVisitTime"])
    match_mar = re.search(r"^3/", elt["lastVisitTime"])
    match_feb = re.search(r"^2/", elt["lastVisitTime"])
    match_jan = re.search(r"^1/", elt["lastVisitTime"])
    if match_apr:
        april.append(elt["title"])
    elif match_mar:
        march.append(elt["title"])
    elif match_feb:
        february.append(elt["title"])
    elif match_jan:
        january.append(elt["title"])

In [15]:
january_en = [item for item in january if re.search(r"^[^가-힣ㄱ-ㅎㅏ-ㅣぁ-んァ-ン一-龥]*$", item) and len(item)>0]
january_kor = [item for item in january if re.search(r"[ㄱ-ㅎㅏ-ㅣ가-힣]", item) and len(item)>0]
print ("January en:", str(len(january_en)))
print ("January kor:", str(len(january_kor)))
# en : kor = 2386 : 470

print(str(round((len(january_en)*100)/(len(january_en)+len(january_kor)))) + "% is Enlgish")
print(str(round((len(january_kor)*100)/(len(january_en)+len(january_kor)))) + "% is Korean")

January en: 2385
January kor: 471
84% is Enlgish
16% is Korean


In [16]:
february_en = [item for item in february if re.search(r"^[^가-힣ㄱ-ㅎㅏ-ㅣぁ-んァ-ン一-龥]*$", item) and len(item)>0]
february_kor = [item for item in february if re.search(r"[ㄱ-ㅎㅏ-ㅣ가-힣]", item) and len(item)>0]
print ("February en:", str(len(february_en)))
print ("February kor:", str(len(february_kor)))

print(str(round((len(february_en)*100)/(len(february_en)+len(february_kor)))) + "% is Enlgish")
print(str(round((len(february_kor)*100)/(len(february_en)+len(february_kor)))) + "% is Korean")

February en: 2916
February kor: 908
76% is Enlgish
24% is Korean


In [17]:
march_en = [item for item in march if re.search(r"^[^가-힣ㄱ-ㅎㅏ-ㅣぁ-んァ-ン一-龥]*$", item) and len(item)>0]
march_kor = [item for item in march if re.search(r"[ㄱ-ㅎㅏ-ㅣ가-힣]", item) and len(item)>0]
print ("March en:", str(len(march_en)))
print ("March kor:", str(len(march_kor)))

print(str(round((len(march_en)*100)/(len(march_en)+len(march_kor)))) + "% is Enlgish")
print(str(round((len(march_kor)*100)/(len(march_en)+len(march_kor)))) + "% is Korean")

March en: 3655
March kor: 1099
77% is Enlgish
23% is Korean


In [18]:
april_en = [item for item in april if re.search(r"^[^가-힣ㄱ-ㅎㅏ-ㅣぁ-んァ-ン一-龥]*$", item) and len(item)>0]
april_kor = [item for item in april if re.search(r"[ㄱ-ㅎㅏ-ㅣ가-힣]", item) and len(item)>0]
print ("April en:", str(len(april_en)))
print ("April kor:", str(len(april_kor)))

print(str(round((len(april_en)*100)/(len(april_en)+len(april_kor)))) + "% is Enlgish")
print(str(round((len(april_kor)*100)/(len(april_en)+len(april_kor)))) + "% is Korean")

April en: 1971
April kor: 499
80% is Enlgish
20% is Korean


In [42]:
print("April total: " + str(len(april)) + "\nMarch total: " + str(len(march)) + "\nFebruary total: " + str(len(february)) + "\nJanuary total: " + str(len(january)))

April total: 3292
March total: 6869
February total: 5218
January total: 3932


### 30 days period

In [19]:
# total = 19311
# 1/15 - 2/13
# 2/14 - 3/15
# 3/16 - 4/15

month1 = []
month2 = []
month3 = []
for elt in history:
    match1 = re.search(r"\b(?:1/(1[5-9]|[2,3][0-9])|2/(\d|1[0-3]))\b", elt["lastVisitTime"])
    match2 = re.search(r"\b(?:2/(1[4-9]|2[0-8])|3/(\d|1[0-5]))\b", elt["lastVisitTime"])
    match3 = re.search(r"\b(?:3/(1[6-9]|[2,3][0-9])|4/(\d|1[0-5]))\b", elt["lastVisitTime"])
    
    if match1:
        month1.append(elt["title"])
    elif match2:
        month2.append(elt["title"])
    elif match3:
        month3.append(elt["title"])

In [20]:
len(history) == len(month3) + len(month2) + len(month1)

True

In [21]:
month1_en = [item for item in month1 if re.search(r"^[^가-힣ㄱ-ㅎㅏ-ㅣぁ-んァ-ン一-龥]*$", item) and len(item)>0]
month1_kor = [item for item in month1 if re.search(r"[ㄱ-ㅎㅏ-ㅣ가-힣]", item) and len(item)>0]
print ("Month 1 en:", str(len(month1_en)))
print ("Month 1 kor:", str(len(month1_kor)))

print(str(round((len(month1_en)*100)/(len(month1_en)+len(month1_kor)))) + "% is Enlgish")
print(str(round((len(month1_kor)*100)/(len(month1_en)+len(month1_kor)))) + "% is Korean")

Month 1 en: 4006
Month 1 kor: 806
83% is Enlgish
17% is Korean


In [22]:
month2_en = [item for item in month2 if re.search(r"^[^가-힣ㄱ-ㅎㅏ-ㅣぁ-んァ-ン一-龥]*$", item) and len(item)>0]
month2_kor = [item for item in month2 if re.search(r"[ㄱ-ㅎㅏ-ㅣ가-힣]", item) and len(item)>0]
print ("Month 2 en:", str(len(month2_en)))
print ("Month 2 kor:", str(len(month2_kor)))

print(str(round((len(month2_en)*100)/(len(month2_en)+len(month2_kor)))) + "% is Enlgish")
print(str(round((len(month2_kor)*100)/(len(month2_en)+len(month2_kor)))) + "% is Korean")

Month 2 en: 3370
Month 2 kor: 1090
76% is Enlgish
24% is Korean


In [23]:
month3_en = [item for item in month3 if re.search(r"^[^가-힣ㄱ-ㅎㅏ-ㅣぁ-んァ-ン一-龥]*$", item) and len(item)>0]
month3_kor = [item for item in month3 if re.search(r"[ㄱ-ㅎㅏ-ㅣ가-힣]", item) and len(item)>0]
print ("Month 3 en:", str(len(month3_en)))
print ("Month 3 kor:", str(len(month3_kor)))

print(str(round((len(month3_en)*100)/(len(month3_en)+len(month3_kor)))) + "% is Enlgish")
print(str(round((len(month3_kor)*100)/(len(month3_en)+len(month3_kor)))) + "% is Korean")

Month 3 en: 3551
Month 3 kor: 1081
77% is Enlgish
23% is Korean


### Weekly

In [24]:
# total = 19311
#1 1/15 - 1/21
#2 1/22 - 1/28
#3 1/29 - 2/4
#4 2/5 - 2/11
#5 2/12 - 2/18
#6 2/19 - 2/25
#7 2/26 - 3/4
#8 3/5 - 3/11
#9 3/12 - 3/18
#10 3/19 - 3/25
#11 3/26 - 4/1
#12 4/2 - 4/8
#13 4/9 - 4/15

week1 = []
week2 = []
week3 = []
week4 = []
week5 = []
week6 = []
week7 = []
week8 = []
week9 = []
week10 = []
week11 = []
week12 = []
week13 = []

for elt in history:
    match1 = re.search(r"\b(1/(1[5-9]|2[0,1]))\b", elt["lastVisitTime"])
    match2 = re.search(r"\b(1/2[2-8])\b", elt["lastVisitTime"])
    match3 = re.search(r"\b(?:1/(29|3[0-9])|2/[0-4]/2018)\b", elt["lastVisitTime"])
    match4 = re.search(r"\b(2/([5-9]/2018|1[0,1]))\b", elt["lastVisitTime"])
    match5 = re.search(r"\b(2/1[2-8])\b", elt["lastVisitTime"])
    match6 = re.search(r"\b(2/(19|2[0-5]))\b", elt["lastVisitTime"])
    match7 = re.search(r"\b(?:2/(2[6-9]|3[0-9])|3/[0-4]/2018)\b", elt["lastVisitTime"])
    match8 = re.search(r"\b(3/([5-9]/2018|1[0,1]))\b", elt["lastVisitTime"])
    match9 = re.search(r"\b(3/1[2-8])\b", elt["lastVisitTime"])
    match10 = re.search(r"\b(3/(19|2[0-5]))\b", elt["lastVisitTime"])
    match11 = re.search(r"\b(?:3/(2[6-9]|3[0-9])|4/1/2018)\b", elt["lastVisitTime"])
    match12 = re.search(r"\b(4/[2-8]/2018)\b", elt["lastVisitTime"])
    match13 = re.search(r"\b(4/(9|1[0-5]))\b", elt["lastVisitTime"])
    

    if match1:
        week1.append(elt["title"])
    elif match2:
        week2.append(elt["title"])
    elif match3:
        week3.append(elt["title"])
    elif match4:
        week4.append(elt["title"])
    elif match5:
        week5.append(elt["title"])
    elif match6:
        week6.append(elt["title"])
    elif match7:
        week7.append(elt["title"])
    elif match8:
        week8.append(elt["title"])
    elif match9:
        week9.append(elt["title"])
    elif match10:
        week10.append(elt["title"])
    elif match11:
        week11.append(elt["title"])
    elif match12:
        week12.append(elt["title"])
    elif match13:
        week13.append(elt["title"])

In [25]:
len(history) == len(week1) + len(week2) + len(week3) + len(week4) + len(week5) + len(week6) +len(week7) + len(week8) + len(week9) + len(week10) + len(week11) + len(week12) + len(week13)

True

In [26]:
week1_en = [item for item in week1 if re.search(r"^[^가-힣ㄱ-ㅎㅏ-ㅣぁ-んァ-ン一-龥]*$", item) and len(item)>0]
week1_kor = [item for item in week1 if re.search(r"[ㄱ-ㅎㅏ-ㅣ가-힣]", item) and len(item)>0]
print ("week1 en:", str(len(week1_en)))
print ("week1 kor:", str(len(week1_kor)))
week2_en = [item for item in week2 if re.search(r"^[^가-힣ㄱ-ㅎㅏ-ㅣぁ-んァ-ン一-龥]*$", item) and len(item)>0]
week2_kor = [item for item in week2 if re.search(r"[ㄱ-ㅎㅏ-ㅣ가-힣]", item) and len(item)>0]
print ("week2 en:", str(len(week2_en)))
print ("week2 kor:", str(len(week2_kor)))
week3_en = [item for item in week3 if re.search(r"^[^가-힣ㄱ-ㅎㅏ-ㅣぁ-んァ-ン一-龥]*$", item) and len(item)>0]
week3_kor = [item for item in week3 if re.search(r"[ㄱ-ㅎㅏ-ㅣ가-힣]", item) and len(item)>0]
print ("week3 en:", str(len(week3_en)))
print ("week3 kor:", str(len(week3_kor)))
week4_en = [item for item in week4 if re.search(r"^[^가-힣ㄱ-ㅎㅏ-ㅣぁ-んァ-ン一-龥]*$", item) and len(item)>0]
week4_kor = [item for item in week4 if re.search(r"[ㄱ-ㅎㅏ-ㅣ가-힣]", item) and len(item)>0]
print ("week4 en:", str(len(week4_en)))
print ("week4 kor:", str(len(week4_kor)))
week5_en = [item for item in week5 if re.search(r"^[^가-힣ㄱ-ㅎㅏ-ㅣぁ-んァ-ン一-龥]*$", item) and len(item)>0]
week5_kor = [item for item in week5 if re.search(r"[ㄱ-ㅎㅏ-ㅣ가-힣]", item) and len(item)>0]
print ("week5 en:", str(len(week5_en)))
print ("week5 kor:", str(len(week5_kor)))
week6_en = [item for item in week6 if re.search(r"^[^가-힣ㄱ-ㅎㅏ-ㅣぁ-んァ-ン一-龥]*$", item) and len(item)>0]
week6_kor = [item for item in week6 if re.search(r"[ㄱ-ㅎㅏ-ㅣ가-힣]", item) and len(item)>0]
print ("week6 en:", str(len(week6_en)))
print ("week6 kor:", str(len(week6_kor)))
week7_en = [item for item in week7 if re.search(r"^[^가-힣ㄱ-ㅎㅏ-ㅣぁ-んァ-ン一-龥]*$", item) and len(item)>0]
week7_kor = [item for item in week7 if re.search(r"[ㄱ-ㅎㅏ-ㅣ가-힣]", item) and len(item)>0]
print ("week7 en:", str(len(week7_en)))
print ("week7 kor:", str(len(week7_kor)))
week8_en = [item for item in week8 if re.search(r"^[^가-힣ㄱ-ㅎㅏ-ㅣぁ-んァ-ン一-龥]*$", item) and len(item)>0]
week8_kor = [item for item in week8 if re.search(r"[ㄱ-ㅎㅏ-ㅣ가-힣]", item) and len(item)>0]
print ("week8 en:", str(len(week8_en)))
print ("week8 kor:", str(len(week8_kor)))
week9_en = [item for item in week9 if re.search(r"^[^가-힣ㄱ-ㅎㅏ-ㅣぁ-んァ-ン一-龥]*$", item) and len(item)>0]
week9_kor = [item for item in week9 if re.search(r"[ㄱ-ㅎㅏ-ㅣ가-힣]", item) and len(item)>0]
print ("week9 en:", str(len(week9_en)))
print ("week9 kor:", str(len(week9_kor)))
week10_en = [item for item in week10 if re.search(r"^[^가-힣ㄱ-ㅎㅏ-ㅣぁ-んァ-ン一-龥]*$", item) and len(item)>0]
week10_kor = [item for item in week10 if re.search(r"[ㄱ-ㅎㅏ-ㅣ가-힣]", item) and len(item)>0]
print ("week10 en:", str(len(week10_en)))
print ("week10 kor:", str(len(week10_kor)))
week11_en = [item for item in week11 if re.search(r"^[^가-힣ㄱ-ㅎㅏ-ㅣぁ-んァ-ン一-龥]*$", item) and len(item)>0]
week11_kor = [item for item in week11 if re.search(r"[ㄱ-ㅎㅏ-ㅣ가-힣]", item) and len(item)>0]
print ("week11 en:", str(len(week11_en)))
print ("week11 kor:", str(len(week11_kor)))
week12_en = [item for item in week12 if re.search(r"^[^가-힣ㄱ-ㅎㅏ-ㅣぁ-んァ-ン一-龥]*$", item) and len(item)>0]
week12_kor = [item for item in week12 if re.search(r"[ㄱ-ㅎㅏ-ㅣ가-힣]", item) and len(item)>0]
print ("week12 en:", str(len(week12_en)))
print ("week12 kor:", str(len(week12_kor)))
week13_en = [item for item in week13 if re.search(r"^[^가-힣ㄱ-ㅎㅏ-ㅣぁ-んァ-ン一-龥]*$", item) and len(item)>0]
week13_kor = [item for item in week13 if re.search(r"[ㄱ-ㅎㅏ-ㅣ가-힣]", item) and len(item)>0]
print ("week13 en:", str(len(week13_en)))
print ("week13 kor:", str(len(week13_kor)))

week1 en: 970
week1 kor: 193
week2 en: 982
week2 kor: 238
week3 en: 891
week3 kor: 134
week4 en: 940
week4 kor: 200
week5 en: 525
week5 kor: 150
week6 en: 680
week6 kor: 360
week7 en: 947
week7 kor: 361
week8 en: 1171
week8 kor: 246
week9 en: 569
week9 kor: 100
week10 en: 753
week10 kor: 212
week11 en: 543
week11 kor: 315
week12 en: 817
week12 kor: 191
week13 en: 1139
week13 kor: 277


In [51]:
len(week1_en) + len(week2_en) + len(week3_en) + len(week4_en) + len(week5_en) + len(week6_en) +len(week7_en) + len(week8_en) + len(week9_en) + len(week10_en) + len(week11_en) + len(week12_en) + len(week13_en)

10929

In [52]:
kor_youtube

['김치련들 다 군대보낼수 있는 방법알려준다 - YouTube',
 '여팬심 자극, 꽃게춤 리액션ㅣ디바제시카(Deeva Jessica) - YouTube',
 "영화 '아가씨' 김민희 메이크업! l 이사배(Risabae Makeup) - YouTube",
 "'애봉이' 웹툰 마음의소리 분장 l 이사배(Risabae Makeup) - YouTube",
 'SOS! 메이크업진단! 눈이 왜 답답해 보일까요? 편 l 이사배(Risabae Makeup) - YouTube',
 '라스 이사배 - YouTube',
 '이사배 라스 크리스티나 성대모사 - YouTube',
 '양세형의 짤방공작소 - 이사배의 커버 메이크업 여기서 끝이 아니다! 20180318 - YouTube',
 '라스 이사배 대박 메이크업 실력+표정묘사(feat.아이유,수지,장첸까지 ㅋㅋ) - YouTube',
 '홍진호 이사배 - YouTube',
 '\'라디오스타\' 홍진호, 이사배에게 메이크업 받아.."이상해" - 엔터테인먼트 - YouTube',
 '이사배, 홍진호 이상형 언급에 "노골적이네요" - YouTube',
 '김태리 리틀 포레스트 - YouTube',
 '리틀 포레스트 2018 . 김태리 류준열 . 임순례 감독 - YouTube',
 '리틀 포레스트2:겨울과 봄 ( Little Forest: Winter&Spring, 2015) - 찐감자샐러드 , 감자빵 - YouTube',
 '[VIDEO] 리틀포레스트 여름편 홀토마토 스파게티 : Little Forest: summer&autumn, spaghetti with tomato : 꿀키 - YouTube',
 '[VIDEO] 리틀포레스트 여름편 홀토마토 스파게티 : Little Forest: summer&autumn, spaghetti with tomato : 꿀키 - YouTube',
 '영화 토스트 (Toast, 2010) - YouTube',
 '고양이는 집사랑 대화가 가능합니다 - 삐약이 쵸비 - YouTube',
 '리틀 포레스트

In [53]:
en_youtube

['Korea Bronze Girl Playing Oriana - YouTube',
 'History - YouTube',
 'YouTube',
 '０７：Daylife - YouTube',
 '０７：Daylife - YouTube',
 'girl who leapt through time painting - YouTube',
 'The Girl Who Leapt Through Time: The Painting (Fandub) - YouTube',
 'girl who leapt through time paused - YouTube',
 'girl who leapt through time stop - YouTube',
 'The Girl Who Leapt Through Time amv - YouTube',
 'Final Scene - Sweeney Todd - YouTube',
 'sweeney todd ending - YouTube',
 'Sweeney Todd - Worst pies in London (HD) - YouTube',
 '"Pretty Women" Sweeney Todd - YouTube',
 '"By the Sea" - Sweeney Todd : The Demon Barber of Fleet Street (2007) - Helena Bonham Carter - YouTube',
 '"Epiphany" - Sweeney Todd : The Demon Barber of Fleet Street (2007) - Johnny Depp - YouTube',
 '"A Little Priest" - Sweeney Todd : The Demon Barber of Fleet Street (2007) - YouTube',
 'Sweeney Todd: The Demon Barber of Fleet Street - Trailer - YouTube',
 'Johnny Depp Goes Off on Amber Heard... Hurls Wine Glass | TMZ - Yo