Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

29 validity check email format #30

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .cljfmt.edn
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{:paths ["src" "tests/unit" "tests/integration"]}
1 change: 1 addition & 0 deletions .python-version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3.10.0/envs/dqt
4 changes: 2 additions & 2 deletions README.org
Original file line number Diff line number Diff line change
Expand Up @@ -166,11 +166,11 @@ Run =docker compose up= to have postgress running
bb test:watch
#+end_src

#+begin_src
#+begin_src shell
$ bin/koacha
#+end_src

#+begin_src
#+begin_src shell
$ bin/koacha --watch
#+end_src

Expand Down
14 changes: 14 additions & 0 deletions bin/dqt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/usr/bin/env bash

set -ex

# export CLASSPATH=//.m2/repository/org/postgresql/postgresql/42.3.5/postgresql-42.3.5.jar
# export CLASSPATH=/Users/WarFox/.m2/repository/org/postgresql/postgresql/42.3.5/postgresql-42.3.5.jar

# java -cp ~/.m2/repository/org/postgresql/postgresql/42.3.5/postgresql-42.3.5.jar:./target/dqt-0.1.0-SNAPSHOT.jar dqt.core run -d examples/postgres.edn -t examples/tables/employees.edn
# java -cp ./target/dqt-0.1.0-SNAPSHOT.jar dqt.core run -d examples/postgres.edn -t examples/tables/employees.edn
# java -jar ./target/dqt-0.1.0-SNAPSHOT.jar run -d examples/postgres.edn -t examples/tables/employees.edn

# java -cp ":./target/dqt-0.1.0-SNAPSHOT.jar" dqt.core run -d examples/postgres.edn -t examples/tables/employees.edn

java -cp "/Users/WarFox/.m2/repository/org/postgresql/postgresql/42.3.5/postgresql-42.3.5.jar:./target/dqt-0.1.0-SNAPSHOT.jar" dqt.core run -d examples/postgres.edn -t examples/tables/employees.edn
142 changes: 142 additions & 0 deletions dev/user.clj
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
(ns user
(:require
[dqt.core :as core]
[honey.sql :as honey]
[honey.sql.helpers :refer [select from where]]
[next.jdbc :as jdbc]
[next.jdbc.sql :as sql]))


(def db
{:dbtype "postgresql"
:dbname "postgres"
:host "localhost"
:port 5432
:user "postgres"
:password "postgres"
:ssl false
:sslfactory "org.postgresql.ssl.NonValidatingFactory"})


(def sqlmap
{:select [:first-name :last-name :hired-date]
:from [:employees]
:where [:= :employee-id 100]})


;; information-schema column
(->> {:select [:column-name :data-type :is-nullable]
:from :information_schema.columns
:where [:= :table-name "employees"]}
honey/format
(jdbc/execute! db))


(-> (select [[:count :*]])
(from :foo)
honey/format)


(-> (select :*)
(select :employee-id)
(select [[:count :*] :my-count])
(select [[:avg :salary] :average-salary])
(from :foo)
(where [:= :a 1] [:< :b 100])
honey/format)


(-> {:select [:id
[[:over
[[:avg :salary]
{:partition-by [:department]
:order-by [:designation]}
:Average]
[[:max :salary]
:w
:MaxSalary]]]]
:from [:employee]
:window [:w {:partition-by [:department]}]}
(honey/format {:pretty true}))


(-> (select :%count.*) (from :foo) honey/format)

(-> (select [:%count.*]) (from :foo) honey/format)

(-> (select [[:count :*]]) (from :foo) honey/format)

(-> (select :%max.id) (from :foo) honey/format)


;; not a list, but vectors or symbols
(-> (select [[:count :*] :my-count]
:employee-id
[[:avg :salary] :average-salary])
honey/format)


;; need a list
(-> {:select [[[:count :*] :my-count]
:employee-id
[[:avg :salary] :average-salary]]}
honey/format)


;; function call with :% notation
(-> {:select [[:%count.* :my-count]
:employee-id
[:%avg.salary :average-salary]]}
honey/format)


(jdbc/execute! ds
(honey/format (count* :employees)))


(sql/query ds
(honey/format (count* :employees)))


(comment

(defn create-files
[]
(migratus/create {:migration-dir "migrations"} "regions")
(migratus/create {:migration-dir "migrations"} "countries")
(migratus/create {:migration-dir "migrations"} "locations")
(migratus/create {:migration-dir "migrations"} "departments")
(migratus/create {:migration-dir "migrations"} "jobs")
(migratus/create {:migration-dir "migrations"} "employees")
(migratus/create {:migration-dir "migrations"} "dependents")))


(defn this-jar
"utility function to get the name of jar in which this function is invoked"
[& [ns]]
(-> (or ns (class *ns*))
.getProtectionDomain .getCodeSource .getLocation .toURI .getPath))


(defn version-3
[]
(this-jar "dqt.cli"))


(defn version-2
[]
(let [location (..
(Class/forName "dqt.core.$_main")
getProtectionDomain
getCodeSource
getLocation)]
(println location)
(println "Version "
(.getValue
(..
(java.util.jar.Manifest.
(.openStream
(java.net.URL.
(str "jar:" location "!/META-INF/MANIFEST.MF"))))
getMainAttributes)
"Build-number"))))
8 changes: 8 additions & 0 deletions examples/postgres.edn
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{:dbtype "postgresql"
:dbname "postgres"
:host #or [#env DATABASE_HOSTNAME "localhost"]
:user "postgres"
:password "postgres"
:ssl false
:classname "org.postgres.Driver"
:sslfactory "org.postgresql.ssl.NonValidatingFactory"}
38 changes: 38 additions & 0 deletions examples/tables/employees.edn
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
{:table-name :employees
:metrics [:row-count
:missing-count
:missing-percentage
:values-count
:values-percentage
:invalid-count
:invalid-percentage
:valid-count
:valid-percentage
:avg-length
:max-length
:min-length
:avg
:sum
:max
:min
:stddev
:variance]

:tests [[:row-count = 40]
[:avg-length-phone-number = 12]
[:stddev-salary > 4500]
[:sum-salary > 20000]
[:count-manager-id = 39]
[:missing-count-manager-id = 0]
[:missing-percentage-manager-id < 20]
[:values-percentage-manager-id < 30]
[:max-length-email <= 35]]

:columns [:dob
{:valid-format :date
:tests
[[:invalid-percentage == 0]]}
:first-name
{:valid-format :number-percentage
:tests
[[:invalid-percentage == 0]]}]}
3 changes: 3 additions & 0 deletions resources/sql/information-schema.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
SELECT column_name, data_type, is_nullable
FROM information_schema.COLUMNS
WHERE TABLE_NAME = 'employees';
3 changes: 3 additions & 0 deletions resources/sql/row-count.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
-- :name count
SELECT COUNT(*)
FROM employees;
1 change: 1 addition & 0 deletions src/dqt/config.clj
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
(ns dqt.config)
27 changes: 27 additions & 0 deletions src/dqt/driver.clj
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
(ns dqt.driver
(:import
(java.sql
Driver

Check warning on line 4 in src/dqt/driver.clj

View workflow job for this annotation

GitHub Actions / clj-kondo check

src/dqt/driver.clj#L4

[unused-import] Unused import Driver
DriverManager)))

;; (defn register
;; [driver]

;; )
;; Class drvClass = driverLoader.loadClass(driverClassName);
;; Driver driver = drvClass.newInstance();
;; (DriverManager/registerDriver)

(comment
;; for (Enumeration<Driver> e = DriverManager.getDrivers(); e.hasMoreElements();) {
;; Driver driver = e.nextElement();
;; drivers.add(driver.getClass().getName());
;; }
)
(defn- classes
[driver]
(-> driver .getClass .getName))

(defn drivers
[]
(map classes (enumeration-seq (DriverManager/getDrivers))))
8 changes: 6 additions & 2 deletions src/dqt/query_runner.clj
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,16 @@
[next.jdbc :as jdbc]
[next.jdbc.result-set :as rs]))

(defn ->sql
[q]
(honey/format q))

(defn execute!
"Execute query and builds result set with keys in kebab case"
([db query]
(execute! db query {}))
([db query opts]
(let [formatted-query (honey/format query)]
(let [formatted-query (->sql query)]
(println formatted-query)
(jdbc/execute! db formatted-query
(assoc opts
Expand All @@ -20,7 +24,7 @@
([db query]
(execute-one! db query {}))
([db query opts]
(let [formatted-query (honey/format query)]
(let [formatted-query (->sql query)]
(println formatted-query)
(jdbc/execute-one! db formatted-query
(assoc opts
Expand Down
7 changes: 7 additions & 0 deletions src/dqt/system.clj
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@
:columns-metadata (ig/ref ::columns-metadata-enriched))
::calculated-metrics {:columns (ig/ref ::columns-metadata-enriched)
:sql-metrics (ig/ref ::sql-metrics)}
::validity-checks {:columns (ig/ref ::columns-metadata-enriched)}
::test-results {:sql-metrics (ig/ref ::sql-metrics)
:calculated-metrics (ig/ref ::calculated-metrics)
:validity-checks (ig/ref ::validity-checks)
:tests (:tests options)}
::report (ig/ref ::test-results)})

Expand All @@ -39,6 +41,11 @@
[_ {:keys [db table-name columns-metadata]}]
(m/get-metrics db table-name columns-metadata))

(defmethod ig/init-key ::validity-checks
[_ {:keys [columns]}]
(println "validity checks")
(println columns))

(defmethod ig/init-key ::calculated-metrics
[_ {:keys [columns sql-metrics]}]
(into {} (map #(m/calculated-metrics % sql-metrics) columns)))
Expand Down
14 changes: 14 additions & 0 deletions src/dqt/validity_checks.clj
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
(ns dqt.validity-checks
(:require [dqt.query-runner :as q]))

Check warning on line 2 in src/dqt/validity_checks.clj

View workflow job for this annotation

GitHub Actions / clj-kondo check

src/dqt/validity_checks.clj#L2

[unused-namespace] namespace dqt.query-runner is required but never used

;; select EmailAddress from FindInvalidEmailAddressDemo
;; -> where EmailAddress NOT LIKE '%_@_%._%';

(def email-regex #"%_@_%._%")

(defn email
"Returns list of invalid emails from the column"
[column table]
{:select [column]
:from table
:where [column :not :like email-regex]})
22 changes: 22 additions & 0 deletions tables/department.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
table_name: department
metrics:
- row_count
- missing_count
- missing_percentage
- values_count
- values_percentage
- invalid_count
- invalid_percentage
- valid_count
- valid_percentage
- avg_length
- max_length
- min_length
- avg
- sum
- max
- min
- stddev
- variance
tests:
- row_count > 0
22 changes: 22 additions & 0 deletions tables/employee_department.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
table_name: employee_department
metrics:
- row_count
- missing_count
- missing_percentage
- values_count
- values_percentage
- invalid_count
- invalid_percentage
- valid_count
- valid_percentage
- avg_length
- max_length
- min_length
- avg
- sum
- max
- min
- stddev
- variance
tests:
- row_count > 0
Loading