diff --git a/.gitignore b/.gitignore index c6127b3..5ba8c42 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +# C # Prerequisites *.d @@ -50,3 +51,149 @@ modules.order Module.symvers Mkfile.old dkms.conf + + +# Python +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + + +# Other +.vscode/ +data/*.txt \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..7ce96d0 --- /dev/null +++ b/README.md @@ -0,0 +1,31 @@ +# Pi Search + +从 π 的小数点后 **1,000,000,000** 位中搜索指定的数字串位置 + + Demo: [快速寻找 8 位生日](https://jasongzy.com/pi-search.php) + +## π 的数据来源 + +使用 [y-cruncher](http://www.numberworld.org/y-cruncher/) 生成了包含 10 亿位十进制数字字符的文本文档 `./data/Pi - Dec - Chudnovsky.txt`(约 950 MB,未包含在仓库内) + +为了加快 8 位生日的查询速度,预先遍历了 从 1900-01-01 至 2050-12-31 的所有合法日期,并将它们的位置存入了 SQLite 数据库(`./data/pi.db`)。`date_search_sqlite.py` 和 `pi-search.php` 均调用了此数据库以实现高速查询。 + +## 查询速度 + +如果直接遍历匹配原始的 txt 文件,搜索耗时将与目标出现的位置有关,最长约为 1.6 秒。 + +采用 SQLite 后,8 位生日的查询平均耗时在 1 ms 以下。 + +## 查询到的概率 + +从 N 位随机数中发现 d 位连续数字(N >> d)的概率为: + + + +因此可以推测,在 π 的前 10 亿位中找到任意 8 位数字的概率约为 99.995%,实测从 1900 年至 2050 年的所有合法 "年月日" 8 位字符串都能从中找到。 + +## Inspired by + +- [The Pi-Search Page](https://www.angio.net/pi/) +- [Irrational Numbers Search Engine](http://www.subidiom.com/pi/) +- [北大数院人 微信公众号](https://mp.weixin.qq.com/s?__biz=MzU3NzA0OTA5Mg==&mid=2247492451&idx=1&sn=e14e794bae706d7c2a4f964325d2e404) diff --git a/create_sqlite.py b/create_sqlite.py new file mode 100644 index 0000000..2c2fc9c --- /dev/null +++ b/create_sqlite.py @@ -0,0 +1,45 @@ +import datetime +import sqlite3 + +from pisearch import get_digits, search_in_Pi + + +def dateRange(start, end, step=1, format=r"%Y%m%d"): + strptime, strftime = datetime.datetime.strptime, datetime.datetime.strftime + days = (strptime(end, format) - strptime(start, format)).days + return [ + strftime(strptime(start, format) + datetime.timedelta(i), format) + for i in range(0, days, step) + ] + + +conn = sqlite3.connect(r"data/pi.db") +c = conn.cursor() + +c.execute( + """CREATE TABLE BIRTHDAY + (DATE INT PRIMARY KEY, + POSITION INT UNIQUE);""" +) + +date_list = dateRange("19000101", "20510101") +digits = get_digits(r"data/Pi - Dec - Chudnovsky.txt") +if digits: + for date in date_list: + position = search_in_Pi(date, digits) + if position: + t = (int(date), position) + c.execute( + "INSERT INTO BIRTHDAY (DATE,POSITION) \ + VALUES (?, ?)", + t, + ) + print("success: " + date) + conn.commit() + else: + print("Not found: " + date) +else: + print("Pi file error!") + +conn.commit() +conn.close() diff --git a/data/pi.db b/data/pi.db new file mode 100644 index 0000000..633fd4a Binary files /dev/null and b/data/pi.db differ diff --git a/date_search.py b/date_search.py new file mode 100644 index 0000000..7bd3b12 --- /dev/null +++ b/date_search.py @@ -0,0 +1,63 @@ +import sys +import time + +from pisearch import get_digits, search_pi + + +def judge_birthday(number: str): + day_in_month = { + "01": 31, + "02": 28, + "03": 31, + "04": 30, + "05": 31, + "06": 30, + "07": 31, + "08": 31, + "09": 30, + "10": 31, + "11": 30, + "12": 31, + } + if not (number.isdecimal()): + return False + if len(number) < 8: + # number = "19" + number + return False + year = int(number[:4]) + month = int(number[4:6]) + day = int(number[6:]) + if (year > 2050) or (year < 1900): + return False + if (month > 12) or (month < 1): + return False + else: + day_correct = day_in_month[number[4:6]] + if (year % 400 == 0) or (year % 4 == 0 and year % 100 != 0): + day_correct += 1 + if (day < 1) or (day > day_correct): + return False + return True + + +if __name__ == "__main__": + digits = get_digits(r"data/Pi - Dec - Chudnovsky.txt") + if digits: + if len(sys.argv) > 1: + date = sys.argv[1] + else: + print("Enter birthday (8 digits): ", end="") + date = input() + if judge_birthday(date) or True: + start_time = time.time() + position = search_pi(date, digits) + end_time = time.time() + print("Query takes %.2f ms" % ((end_time - start_time) * 1000.0)) + if position: + print('position of "%s" : %s' % (date, position)) + else: + print("Not found!") + else: + print("Invalid birthday!") + else: + print("Pi file error!") diff --git a/date_search_sqlite.py b/date_search_sqlite.py new file mode 100644 index 0000000..ba9b18a --- /dev/null +++ b/date_search_sqlite.py @@ -0,0 +1,34 @@ +import sqlite3 +import sys +import time +from date_search import judge_birthday + + +def query_pi(datebase: str, date: int): + try: + conn = sqlite3.connect(datebase) + c = conn.cursor() + cursor = c.execute("SELECT POSITION from BIRTHDAY where DATE=?", (date,)) + result = cursor.fetchone() + if not result: + return None + conn.close() + return result[0] + except Exception as e: + print(e) + + +if __name__ == "__main__": + if len(sys.argv) > 1: + date = sys.argv[1] + else: + print("Enter birthday (8 digits): ", end="") + date = input() + if judge_birthday(date): + start_time = time.time() + position = query_pi(r"data/pi.db", int(date)) + end_time = time.time() + print("Query takes %.2f ms" % ((end_time - start_time) * 1000.0)) + print(position) + else: + print("Invalid birthday!") diff --git a/pi-search.php b/pi-search.php new file mode 100644 index 0000000..6bf710d --- /dev/null +++ b/pi-search.php @@ -0,0 +1,141 @@ + + + + + + Pi Search + + + + + + + + +
+
+ Pi Search +
+ @jasongzy + + + + + +
+
+ +
+
+

+ + + + + +

从 π 的前 10 亿位中寻找你的生日!
+

+
+
+ date_range + + +
必须为正确的 8 位年月日格式
+
+


+ +
+ +
+ +

+
+ +

+ 20501231) { + echo '🤩 在你的时间线里,伟大复兴实现了吗?'; + } else { + $start_time = microtime(true); + class MyDB extends SQLite3 + { + function __construct() + { + $this->open('data/pi.db'); + } + } + $db = new MyDB(); + if (!$db) { + echo $db->lastErrorMsg(); + } + $result = @$db->query("SELECT POSITION from BIRTHDAY where DATE='{$date}'"); + if ($row = $result->fetchArray(SQLITE3_ASSOC)) { + echo '' . $date . ' 首次出现在 π 的小数点后第 ' . $row['POSITION'] . ' 位!'; + echo '
'; + echo '想要验证结果是否正确?'; + } else { + echo '🧐 那年的 2 月真的有 29 天吗?'; + } + $db->close(); + $end_time = microtime(true); + echo '

⚡ 查询耗时 ' . round(($end_time - $start_time) * 1000, 2) . ' ms'; + } + } + } + ?> +

+
+
+
+ 👉 任意数字串搜索 / 指定位数上下文显示 / 更多常数(e/√2)查询 +
+
+
+
+
+ + + \ No newline at end of file diff --git a/pisearch.py b/pisearch.py new file mode 100644 index 0000000..a34cb61 --- /dev/null +++ b/pisearch.py @@ -0,0 +1,65 @@ +import re +import sys + + +def get_digits(path: str): + print('Reading file: "%s" ...' % path) + try: + f = open(path, "r") + lines = f.read() + f.close() + return lines + except Exception as e: + print(e) + + +def search_pi(number: str, digits: str): + result = re.search(number, digits) + if result: + # minus 1: "3." (-2) and index start from 0 (+1) + return result.start() - 1 + + +def show_digits(position: int, digits: str, length: int = 20): + if position <= 0 or length <= 0: + return + position += 1 # +2-1 + if (position + length) > len(digits): + return + else: + return digits[position : position + length] + + +if __name__ == "__main__": + if len(sys.argv) > 1: + if len(sys.argv) == 2: # search_pi + date = sys.argv[1] + if not date.isdecimal(): + print("Invalid digits!") + else: + digits = get_digits(r"data/Pi - Dec - Chudnovsky.txt") + if digits: + position = search_pi(date, digits) + if position: + print(position) + else: + print("Not found!") + else: + print("Pi file error!") + else: # show_digits + position = sys.argv[1] + length = sys.argv[2] + if not (position.isdecimal() and length.isdecimal()): + print("Invalid position!") + else: + digits = get_digits(r"data/Pi - Dec - Chudnovsky.txt") + if digits: + result = show_digits(int(position), digits, int(length)) + if result: + print(result) + else: + print("Invalid position!") + else: + print("Pi file error!") + else: + print("No parameter is given!")