# Web Stats prologhub.com

## Download data

In [1]:
URL = "https://adrianistanlogs.blob.core.windows.net/prologhub-com/logs.parquet?sp=r&st=2024-10-20T15:49:38Z&se=2025-10-20T23:49:38Z&spr=https&sv=2022-11-02&sr=c&sig=KPosXJYfLwln%2FCt0BaIJshbbURRS90QxlJp%2Bpq%2Bj0fo%3D"

In [2]:
from urllib.request import urlretrieve

urlretrieve(URL, "logs-prologhub.parquet")

('logs-prologhub.parquet', <http.client.HTTPMessage at 0x7def3831c3e0>)

# Analysis

1. Most popular pages
2. Data sent
3. Browsers / Operating systems
4. Referrals
5. Protocols

In [3]:
import duckdb
import pandas as pd

%load_ext sql
conn = duckdb.connect()
%sql conn --alias duckdb
%config SqlMagic.displaylimit = None

In [4]:
%%sql
SELECT * FROM read_parquet('logs-prologhub.parquet')
LIMIT 50

remote_addr,remote_user,time,status,body_bytes_sent,http_referer,http_user_agent,http_x_forwarded_for,method,path,protocol
121.6.209.22,-,2024-10-11 14:08:46+02:00,404,153,-,NetNewsWire (RSS Reader; https://netnewswire.com/),-,GET,/updates.atom,HTTP/2.0
114.119.148.94,-,2024-10-11 14:11:41+02:00,200,6673,https://prologhub.com/?category=Web%20Development,"Mozilla/5.0 (Linux; Android 7.0;) AppleWebKit/537.36 (KHTML, like Gecko) Mobile Safari/537.36 (compatible; PetalBot;+https://webmaster.petalsearch.com/site/petalbot)",-,GET,/?tag=Modules,HTTP/1.1
112.86.225.145,-,2024-10-11 14:13:47+02:00,200,6673,-,Sogou web spider/4.0(+http://www.sogou.com/docs/help/webmasters.htm#07),-,GET,/,HTTP/1.1
121.6.209.22,-,2024-10-11 14:24:07+02:00,404,153,-,NetNewsWire (RSS Reader; https://netnewswire.com/),-,GET,/updates.atom,HTTP/2.0
86.245.69.118,-,2024-10-11 14:25:51+02:00,404,153,-,NetNewsWire (RSS Reader; https://netnewswire.com/),-,GET,/updates.atom,HTTP/2.0
65.19.138.34,-,2024-10-11 14:35:46+02:00,404,153,-,Feedly/1.0 (poller),-,GET,/updates.atom,HTTP/1.1
121.6.209.22,-,2024-10-11 14:39:39+02:00,404,153,-,NetNewsWire (RSS Reader; https://netnewswire.com/),-,GET,/updates.atom,HTTP/2.0
66.249.72.135,-,2024-10-11 15:00:45+02:00,301,169,-,"Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.6668.89 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)",-,GET,/consuming-json-api,HTTP/1.1
66.249.72.136,-,2024-10-11 15:00:46+02:00,200,4908,-,"Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.6668.89 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)",-,GET,/consuming-json-api/,HTTP/1.1
86.245.69.118,-,2024-10-11 15:35:54+02:00,404,153,-,NetNewsWire (RSS Reader; https://netnewswire.com/),-,GET,/updates.atom,HTTP/2.0


## Most popular pages


In [5]:
%%sql
SELECT path, COUNT(*) AS count
FROM read_parquet('logs-prologhub.parquet')
WHERE
    time >= DATE '2024-10-01' AND
    time <= DATE '2024-10-31'
GROUP BY path
ORDER BY count DESC
LIMIT 50

path,count
/updates.atom,261
/robots.txt,249
/,248
/using-postgresql-prolog/,128
/archive/,118
/using-postgresql-prolog,110
/scryer-prolog-meetup-2023-notes/,110
/scryer-prolog-meetup-2023-notes,108
/favicon.ico,64
/static/fonts.css,39


## Most popular pages (filtering probably robots)

In [6]:
%%sql
SELECT path, COUNT(*) AS count
FROM read_parquet('logs-prologhub.parquet')
WHERE
    time >= DATE '2024-10-01' AND
    time <= DATE '2024-10-31' AND
    http_user_agent LIKE 'Mozilla%' AND
    remote_addr NOT IN (
      SELECT DISTINCT remote_addr
      FROM read_parquet('logs-prologhub.parquet')
      WHERE path = '/robots.txt' OR path = '/rss.xml'
    )
GROUP BY path
ORDER BY count DESC
LIMIT 50

path,count
/,126
/static/B612-Regular.ttf,35
/static/fonts.css,34
/static/main.css,34
/wp-login.php,34
/favicon.ico,28
/updates.atom,22
/using-postgresql-prolog/,19
/xmlrpc.php,18
/wp,16


## Data sent (in MB)

In [7]:
%%sql
SELECT SUM(body_bytes_sent)/1000000 FROM read_parquet('logs-prologhub.parquet')

(sum(body_bytes_sent) / 1000000)
16.904777


In [8]:
%%sql
SELECT path, SUM(body_bytes_sent)/1000000 AS data_size
FROM read_parquet('logs-prologhub.parquet')
GROUP BY path
ORDER BY data_size DESC
LIMIT 20

path,data_size
/static/B612-Regular.ttf,4.478346
/,1.073344
/archive/,0.837092
/using-postgresql-prolog/,0.777757
/static/browser2.png,0.417776
/static/terminal3.png,0.281652
/static/Snoopys-Dark-and-Stormy-Night-Second-Line.jpg,0.278612
/static/browser1.png,0.26756
/tips-planning-documenting-and-testing-swi-prolog-project/,0.245232
/static/terminal1.png,0.218932


## Browsers and Operating systems

In [9]:
%%sql
SELECT http_user_agent, COUNT(*) AS count
FROM read_parquet('logs-prologhub.parquet')
GROUP BY http_user_agent
ORDER BY count DESC
LIMIT 20

http_user_agent,count
"Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.6668.89 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)",1057
Go-http-client/1.1,554
Mozilla/5.0 (compatible; AhrefsBot/7.0; +http://ahrefs.com/robot/),260
NetNewsWire (RSS Reader; https://netnewswire.com/),209
"Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; ClaudeBot/1.0; +claudebot@anthropic.com)",187
Mozilla/5.0 (compatible; SemrushBot/7~bl; +http://www.semrush.com/bot.html),178
"Mozilla/5.0 (Linux; Android 7.0;) AppleWebKit/537.36 (KHTML, like Gecko) Mobile Safari/537.36 (compatible; PetalBot;+https://webmaster.petalsearch.com/site/petalbot)",162
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36",160
Mozilla/5.0 (compatible; DotBot/1.2; +https://opensiteexplorer.org/dotbot; help@moz.com),99
"Mozlila/5.0 (Linux; Android 7.0; SM-G892A Bulid/NRD90M; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/60.0.3112.107 Moblie Safari/537.36",83


In [10]:
%%sql
SELECT 'Firefox' AS browser, COUNT(*) AS count
FROM read_parquet('logs-prologhub.parquet')
WHERE http_user_agent LIKE '%Firefox%' AND http_user_agent NOT LIKE '%bot%'
UNION
SELECT 'Chrome' AS browser, COUNT(*) AS count
FROM read_parquet('logs-prologhub.parquet')
WHERE http_user_agent LIKE '%Chrome%' AND http_user_agent NOT LIKE '%bot%'
UNION
SELECT 'Safari' AS browser, COUNT(*) AS count
FROM read_parquet('logs-prologhub.parquet')
WHERE http_user_agent LIKE '%Safari%' AND http_user_agent NOT LIKE '%bot%'
UNION
SELECT 'Edge' AS browser, COUNT(*) AS count
FROM read_parquet('logs-prologhub.parquet')
WHERE http_user_agent LIKE '%Edge%' AND http_user_agent NOT LIKE '%bot%'

browser,count
Chrome,536
Safari,625
Firefox,114
Edge,7


In [11]:
%%sql
SELECT 'Linux' AS os, COUNT(*) AS count
FROM read_parquet('logs-prologhub.parquet')
WHERE http_user_agent LIKE '%Linux%' AND http_user_agent NOT LIKE '%bot%'
UNION
SELECT 'Windows' AS os, COUNT(*) AS count
FROM read_parquet('logs-prologhub.parquet')
WHERE http_user_agent LIKE '%Windows%' AND http_user_agent NOT LIKE '%bot%'
UNION
SELECT 'macOS' AS os, COUNT(*) AS count
FROM read_parquet('logs-prologhub.parquet')
WHERE http_user_agent LIKE '%Mac%' AND http_user_agent NOT LIKE '%bot%'
UNION
SELECT 'Android' AS os, COUNT(*) AS count
FROM read_parquet('logs-prologhub.parquet')
WHERE http_user_agent LIKE '%Android%' AND http_user_agent NOT LIKE '%bot%'
UNION
SELECT 'iOS' AS os, COUNT(*) AS count
FROM read_parquet('logs-prologhub.parquet')
WHERE http_user_agent LIKE '%iOS%' AND http_user_agent NOT LIKE '%bot%'

os,count
Windows,483
Android,211
macOS,55
Linux,239
iOS,0


In [12]:
%%sql
SELECT http_user_agent, COUNT(*) AS count
FROM read_parquet('logs-prologhub.parquet')
WHERE http_user_agent NOT LIKE 'Mozilla%'
GROUP BY http_user_agent
ORDER BY count DESC
LIMIT 20

http_user_agent,count
Go-http-client/1.1,554
NetNewsWire (RSS Reader; https://netnewswire.com/),209
"Mozlila/5.0 (Linux; Android 7.0; SM-G892A Bulid/NRD90M; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/60.0.3112.107 Moblie Safari/537.36",83
DuckDuckBot/1.1; (+http://duckduckgo.com/duckduckbot.html),55
Sogou web spider/4.0(+http://www.sogou.com/docs/help/webmasters.htm#07),48
facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php),42
meta-externalagent/1.1 (+https://developers.facebook.com/docs/sharing/webmasters/crawler),34
CCBot/2.0 (https://commoncrawl.org/faq/),21
Aggregator/2.22.000 (Android/9; MSM8974),11
Go-http-client/2.0,9


## Referrals

In [13]:
%%sql
SELECT http_referer, COUNT(*) count
FROM read_parquet('logs-prologhub.parquet')
WHERE http_referer != '-'
GROUP BY http_referer
ORDER BY count DESC
LIMIT 20

http_referer,count
https://prologhub.com/?category=Web%20Development,122
https://prologhub.com/scryer-prolog-meetup-2023-notes,106
https://prologhub.com/using-postgresql-prolog,105
https://prologhub.com/,53
https://prologhub.com/tips-planning-documenting-and-testing-swi-prolog-project/,40
https://prologhub.com/static/fonts.css,33
http://prologhub.com/,26
https://prologhub.com/favicon.ico,22
http://prologhub.com,20
https://www.google.com/,12


In [18]:
%%sql
SELECT http_referer, COUNT(*) count
FROM read_parquet('logs-prologhub.parquet')
WHERE 
    http_referer != '-' AND
    http_referer NOT LIKE 'https://prologhub.com%' AND
    http_referer NOT LIKE 'http://prologhub.com%' AND
    http_referer NOT LIKE 'http://90.170.32.205%'
GROUP BY http_referer
ORDER BY count DESC
LIMIT 20

http_referer,count
https://www.google.com/,12
https://1pluslocksmith.com/,5
https://yandex.ru/,5
https://google.com/,4
https://learn-anything.xyz/,2
http://www.feiacmr.shop/,2
https://duckduckgo.com/,2
https://web.telegram.org/,1
https://www.bing.com/,1


## Protocols

In [15]:
%%sql
SELECT protocol, COUNT(*) AS count
FROM read_parquet('logs-prologhub.parquet')
GROUP BY protocol
ORDER BY count DESC
LIMIT 5

protocol,count
HTTP/1.1,2941
HTTP/2.0,1068
HTTP/1.0,5
,1


In [16]:
%%sql
SELECT method, COUNT(*) AS count
FROM read_parquet('logs-prologhub.parquet')
GROUP BY method
ORDER BY count DESC
LIMIT 10

method,count
GET,3838
HEAD,160
POST,16
,1


## Broken pages

In [17]:
%%sql
SELECT path, COUNT(*) AS count
FROM read_parquet('logs-prologhub.parquet')
WHERE
  status = 404
GROUP BY path
ORDER BY count DESC
LIMIT 20

path,count
/updates.atom,257
/robots.txt,208
/favicon.ico,53
/wp-login.php,22
/xmlrpc.php,12
/backup,8
/wp,8
/old,8
/new,8
/bk,8
