# Semantic Textual Similarity

Last updated: March 17, 2024

In [11]:
import os

from IPython.display import display
import pandas as pd
from pgvector.psycopg import register_vector
from sentence_transformers import SentenceTransformer
from sqlalchemy import create_engine

In [2]:
pg_engine = create_engine(os.getenv("DATABASE_URL"))

In [3]:
%%time

model = SentenceTransformer("all-mpnet-base-v2")

CPU times: user 1.36 s, sys: 1.98 s, total: 3.34 s
Wall time: 2.14 s


In [12]:
%%time

search = "traffic stops"
search_embedding = model.encode(search)

with pg_engine.connect() as conn:
    register_vector(conn.connection.driver_connection)
    df = pd.read_sql(
        f"""
        SELECT
            statute
            , section_name
            , content
            , content_embedding <=> %(search_embedding)s AS nearest
        FROM statutes
        ORDER BY content_embedding <=> %(search_embedding)s ASC
        """,
        conn,
        params={'search_embedding': search_embedding}
    )
with pd.option_context('display.max_colwidth', 400):
    display(df)

Unnamed: 0,statute,section_name,content,nearest
0,20-158,Vehicle control signs and signals,"§ 20‑158. Vehicle control signs and signals.\n(a)\tThe Department of Transportation, with reference to State highways, and local authorities, with reference to highways under their jurisdiction, are hereby authorized to control vehicles:\n(1)\tAt intersections, by erecting or installing stop signs requiring vehicles to come to a complete stop at the entrance to that portion of the intersectio...",0.454330
1,20-217,evidence of identity of driver,"§ 20‑217. Motor vehicles to stop for properly marked and designated school buses in certain instances; evidence of identity of driver.\n(a)\tWhen a school bus is displaying its mechanical stop signal or flashing red lights and the bus is stopped for the purpose of receiving or discharging passengers, the driver of any other vehicle that approaches the school bus from any direction on the same...",0.461436
2,20-161,removal of vehicles from public highway,"§ 20‑161. Stopping on highway prohibited; warning signals; removal of vehicles from public highway.\n(a)\tNo person shall park or leave standing any vehicle, whether attended or unattended, upon the main‑traveled portion of any highway or highway bridge with the speed limit posted less than 45 miles per hour unless the vehicle is disabled to such an extent that it is impossible to avoid stopp...",0.494937
3,20-142,Certain vehicles must stop at railroad grade crossing,"§ 20‑142.3. Certain vehicles must stop at railroad grade crossing.\n(a)\tBefore crossing at grade any track or tracks of a railroad, the driver of any school bus, any activity bus, any motor vehicle carrying passengers for compensation, any commercial motor vehicle listed in 49 C.F.R. § 392.10, and any motor vehicle with a capacity of 16 or more persons shall stop the vehicle within 50 feet b...",0.499138
4,20-142,Stop when traffic obstructed,"§ 20‑142.5. Stop when traffic obstructed.\nNo driver shall enter an intersection or a marked crosswalk or drive onto any railroad grade crossing unless there is sufficient space on the other side of the intersection, crosswalk, or railroad grade crossing to accommodate the vehicle he is operating without obstructing the passage of other vehicles, pedestrians, or railroad trains or on‑track eq...",0.536066
...,...,...,...,...
8107,15-145,Form of bill for perjury,"§ 15‑145. Form of bill for perjury.\nIn every indictment for willful and corrupt perjury it is sufficient to set forth the substance of the offense charged upon the defendant, and by what court, or before whom, the oath was taken (averring such court or person to have competent authority to administer the same), together with the proper averments to falsify the matter wherein the perjury is a...",1.117534
8108,28A-21-5,Vouchers presumptive evidence,"§ 28A‑21‑5. Vouchers presumptive evidence.\nVouchers, without other proof, are presumptive evidence of disbursement, unless impeached. If lost, the accounting party must, if required, make oath to that fact setting forth the manner of loss, and state the contents and purport of the voucher. (C.C.P., s. 480; Code, s. 1401; Rev., s. 101; C.S., s. 107; 1973, c. 1329, s. 3.)",1.117835
8109,10B-102,Scope of this Article,"§ 10B‑102. (Effective until July 1, 2024) Scope of this Article.\nArticle 1 of this Chapter applies to all acts authorized under this Article unless the provisions of Article 1 directly conflict with the provisions of this Article, in which case provisions of Article 2 shall control. (2005‑391, s. 4.)\n \n§ 10B‑102. (Effective July 1, 2024) Scope of this Article.\n(a)\tArticle 1 of this Cha...",1.121110
8110,10B-134,credential analysis,"§ 10B‑134.11. (Effective July 1, 2024) Verification of identity; identity proofing; credential analysis.\n(a)\tPrior to the remote electronic notarial act, the electronic notary shall verify each remotely located principal's identity through one of the following methods:\n(1)\tThe remotely located principal creating the electronic signature is personally known to the electronic notary.\n(2)\t...",1.121283


CPU times: user 74.4 ms, sys: 98.1 ms, total: 173 ms
Wall time: 330 ms


In [14]:
%%time

search = "internet"
search_embedding = model.encode(search)

with pg_engine.connect() as conn:
    register_vector(conn.connection.driver_connection)
    df = pd.read_sql(
        f"""
        SELECT
            statute
            , section_name
            , content
            , content_embedding <=> %(search_embedding)s AS nearest
        FROM statutes
        ORDER BY content_embedding <=> %(search_embedding)s ASC
        """,
        conn,
        params={'search_embedding': search_embedding}
    )
with pd.option_context('display.max_colwidth', 400):
    display(df)

Unnamed: 0,statute,section_name,content,nearest
0,14-453,Definitions,"Article 60.\nComputer‑Related Crime.\n§ 14‑453. Definitions.\nAs used in this Article, unless the context clearly requires otherwise, the following terms have the meanings specified:\n(1)\t""Access"" means to instruct, communicate with, cause input, cause output, cause data processing, or otherwise make use of any resources of a computer, computer system, or computer network.\n(1a)\t""Authorizat...",0.657100
1,14-208,Definitions,"§ 14‑208.6. Definitions.\nThe following definitions apply in this Article:\n(1a)\tAggravated offense. – Any criminal offense that includes either of the following: (i) engaging in a sexual act involving vaginal, anal, or oral penetration with a victim of any age through the use of force or the threat of serious violence; or (ii) engaging in a sexual act involving vaginal, anal, or oral penetr...",0.657393
2,14-453,Jurisdiction,"§ 14‑453.2. Jurisdiction.\nAny offense under this Article committed by the use of electronic communication may be deemed to have been committed where the electronic communication was originally sent or where it was originally received in this State. ""Electronic communication"" means the same as the term is defined in G.S. 14‑196.3(a). (2002‑157, s. 3.)",0.689741
3,14-456,Denial of government computer services to an authorized user,"§ 14‑456.1. Denial of government computer services to an authorized user.\n(a)\tAny person who willfully and without authorization denies or causes the denial of government computer services is guilty of a Class H felony. For the purposes of this section, the term ""government computer service"" means any service provided or performed by a government computer as defined in G.S. 14‑454.1.\n(b)\t...",0.701635
4,14-196,Cyberstalking,"§ 14‑196.3. Cyberstalking.\n(a)\tThe following definitions apply in this section:\n(1)\tElectronic communication. – Any transfer of signs, signals, writing, images, sounds, data, or intelligence of any nature, transmitted in whole or in part by a wire, radio, computer, electromagnetic, photoelectric, or photo‑optical system.\n(2)\tElectronic mail. – The transmission of information or communic...",0.712097
...,...,...,...,...
8107,22-4,Promise to revive debt of bankrupt,"§ 22‑4. Promise to revive debt of bankrupt.\nNo promise to pay a debt discharged by any decree of a court of competent jurisdiction, in any proceeding in bankruptcy, shall be received in evidence unless such promise is in writing and signed by the party to be charged therewith. (1899, c. 57; Rev., s. 978; C.S., s. 990.)",1.115908
8108,32-60,Effect of provisions in instrument,"§ 32‑60. Effect of provisions in instrument.\nIn those instances where the instrument creating the trust or other fiduciary relationship provides that the compensation of the fiduciary shall be the amount ""provided by law"", the ""maximum amount provided by law"", or other similar language, or references former G.S. 32‑50, this language shall be construed as an intention that the trustee or othe...",1.117999
8109,1-345,Life tenant recovers from remainderman,"§ 1‑345. Life tenant recovers from remainderman.\nIf the plaintiff claims only an estate for life in the land recovered and pays any sum allowed to the defendant for improvements, he or his personal representative may recover at the determination of his estate from the remainderman or reversioner, the value of the said improvements as they then exist, not exceeding the amount as paid by him, ...",1.119118
8110,1-507,Employment and compensation of professionals,"§ 1‑507.31. Employment and compensation of professionals.\n(a)\tEmployment. – To represent or assist the receiver in carrying out the receiver's duties, the receiver may employ attorneys, accountants, appraisers, brokers, agents, auctioneers, or other professionals that do not hold or represent an interest adverse to the receivership.\nA person is not disqualified for employment under this su...",1.127998


CPU times: user 126 ms, sys: 40.2 ms, total: 166 ms
Wall time: 779 ms
