Skip to content

Commit

Permalink
fetch_method db column (#19)
Browse files Browse the repository at this point in the history
- Add fetch_method column to urls table
- Update db creation sql and surrounding methods
- Change 'create' terminology to 'migrate' in prep to move towards real migrations
- Add "ForMigration" MySQL option, replacing "WithoutSchema" to make usage a little clearer
- Always destroy MySQL db between tests
  • Loading branch information
efixler committed Apr 22, 2024
1 parent e56a6e9 commit 2929f7b
Show file tree
Hide file tree
Showing 13 changed files with 125 additions and 68 deletions.
2 changes: 1 addition & 1 deletion cmd/scrape/main.go
Expand Up @@ -52,7 +52,7 @@ func main() {
if clear {
clearDatabase(dbFactory)
return
} else if dbFlags.Create {
} else if dbFlags.Migrate {
createDatabase(dbFactory)
return
} else if maintain {
Expand Down
17 changes: 11 additions & 6 deletions internal/cmd/database.go
Expand Up @@ -53,10 +53,10 @@ type DatabaseFlags struct {
database *envflags.Value[DatabaseSpec]
username *envflags.Value[string]
password *envflags.Value[string]
Create bool
Migrate bool
}

func AddDatabaseFlags(baseEnv string, flags *flag.FlagSet, createFlag bool) *DatabaseFlags {
func AddDatabaseFlags(baseEnv string, flags *flag.FlagSet, migrateFlag bool) *DatabaseFlags {
dbFlags := &DatabaseFlags{
database: NewDatabaseValue(baseEnv, DefaultDatabase),
username: envflags.NewString(baseEnv+"_USER", ""),
Expand All @@ -65,8 +65,13 @@ func AddDatabaseFlags(baseEnv string, flags *flag.FlagSet, createFlag bool) *Dat
dbFlags.database.AddTo(flags, "database", "Database type:path")
dbFlags.username.AddTo(flags, "db-user", "Database user")
dbFlags.password.AddTo(flags, "db-password", "Database password")
if createFlag {
flags.BoolVar(&dbFlags.Create, "create", false, "Create the database and exit")
if migrateFlag {
flags.BoolVar(
&dbFlags.Migrate,
"migrate",
false,
"Migrate the database to the latest version (creating if necessary)",
)
}
return dbFlags
}
Expand All @@ -76,7 +81,7 @@ func (f DatabaseFlags) String() DatabaseSpec {
}

func (f DatabaseFlags) Database() (store.Factory, error) {
return database(f.database.Get(), f.username.Get(), f.password.Get(), f.Create)
return database(f.database.Get(), f.username.Get(), f.password.Get(), f.Migrate)
}

func (f DatabaseFlags) MustDatabase() store.Factory {
Expand All @@ -100,7 +105,7 @@ func database(spec DatabaseSpec, username string, password string, noSchema bool
mysql.Password(password),
}
if noSchema {
options = append(options, mysql.WithoutSchema())
options = append(options, mysql.ForMigration())
}
return mysql.Factory(options...), nil
default:
Expand Down
4 changes: 2 additions & 2 deletions internal/storage/mysql/create.go
Expand Up @@ -20,8 +20,8 @@ func (s *Store) createSQL() (string, error) {
// The connection we need to use for create must be schema-less so
// that we can create the database, so we need to override that with
// the default schema here.
if conf.DBName == "" {
conf.DBName = dbSchema
if conf.TargetSchema == "" {
return "", errors.New("can't create database, empty target schema")
}
var buf bytes.Buffer
if err := tmpl.Execute(&buf, conf); err != nil {
Expand Down
38 changes: 24 additions & 14 deletions internal/storage/mysql/create.sql
@@ -1,11 +1,9 @@
-- {{.DBName}}
-- {{.TargetSchema}}
BEGIN;
CREATE DATABASE IF NOT EXISTS `{{.DBName}}` DEFAULT CHARACTER SET 'utf8mb4' COLLATE 'utf8mb4_0900_ai_ci' ;
USE {{.DBName}} ;
CREATE DATABASE IF NOT EXISTS `{{.TargetSchema}}` DEFAULT CHARACTER SET 'utf8mb4' COLLATE 'utf8mb4_0900_ai_ci' ;
USE {{.TargetSchema}} ;

DROP TABLE IF EXISTS `urls`;

CREATE TABLE `urls` (
CREATE TABLE IF NOT EXISTS `urls` (
`id` BIGINT UNSIGNED NOT NULL,
`url` VARCHAR(255) CHARACTER SET 'utf8mb4' COLLATE 'utf8mb4_0900_ai_ci' NOT NULL,
`parsed_url` VARCHAR(255) CHARACTER SET 'utf8mb4' COLLATE 'utf8mb4_0900_ai_ci' NOT NULL,
Expand All @@ -15,18 +13,30 @@ CREATE TABLE `urls` (
`content_text` MEDIUMTEXT CHARACTER SET 'utf8mb4' COLLATE 'utf8mb4_0900_ai_ci' NULL,
PRIMARY KEY (`id`));

DROP TABLE IF EXISTS `id_map`;

CREATE TABLE `id_map` (
CREATE TABLE IF NOT EXISTS `id_map` (
`requested_id` BIGINT UNSIGNED NOT NULL,
`canonical_id` BIGINT UNSIGNED NOT NULL,
PRIMARY KEY (`requested_id`)
);
);

CREATE ROLE IF NOT EXISTS scrape_app;
GRANT SELECT, INSERT, UPDATE, DELETE on {{.DBName}}.* to scrape_app;
CREATE ROLE IF NOT EXISTS scrape_admin;
GRANT ALL ON {{.DBName}}.* to scrape_admin;
-- Following two statements are added to support tracking headless
-- fetched state (or other alternate fetch methods)
-- The following cannot be executed idempotently
-- TODO: Goose migrations
ALTER TABLE urls ADD column fetch_method
INT UNSIGNED
NOT NULL DEFAULT 0;

CREATE INDEX fetch_method_expires_index ON urls (
expires DESC,
fetch_method ASC
);


CREATE ROLE IF NOT EXISTS scrape_app;
GRANT SELECT, INSERT, UPDATE, DELETE on {{.TargetSchema}}.* to scrape_app;
CREATE ROLE IF NOT EXISTS scrape_admin;
GRANT ALL ON {{.TargetSchema}}.* to scrape_admin;

COMMIT;
SET AUTOCOMMIT = 1;
19 changes: 15 additions & 4 deletions internal/storage/mysql/create_test.go
Expand Up @@ -10,22 +10,33 @@ import (
// Mysql integration tests assume a running mysql server
// at localhost:3306 with a root login and no password.

func testStore() *Store {
func testDatabaseForCreate(t *testing.T) *Store {
db, _ := New(
Username("root"),
Password(""),
NetAddress("localhost:3306"),
Schema("scrape_test"),
ForMigration(),
)
return db.(*Store)
// todo: enable alternate names when also creating
// the database.
t.Cleanup(func() {
if _, err := db.Exec("DROP DATABASE IF EXISTS scrape_test;"); err != nil {
t.Logf("error dropping test mysql database: %q", err)
}
if err := db.Close(); err != nil {
t.Errorf("Error closing mysql database: %v", err)
}
})
return db // .(*Store)
}

func TestCreate(t *testing.T) {
db := testStore()
db := testDatabaseForCreate(t)
err := db.Open(context.Background())
if err != nil {
t.Errorf("Error opening database: %v", err)
}
defer db.Close()
err = db.Create()
if err != nil {
t.Errorf("Error creating database: %v", err)
Expand Down
1 change: 0 additions & 1 deletion internal/storage/mysql/mysql.go
Expand Up @@ -17,7 +17,6 @@ func Factory(options ...Option) store.Factory {
}

func New(options ...Option) (*Store, error) {

config := defaultConfig()
for _, opt := range options {
if err := opt(&config); err != nil {
Expand Down
9 changes: 6 additions & 3 deletions internal/storage/mysql/options.go
Expand Up @@ -22,7 +22,7 @@ const (
TCP ConnectionType = "tcp"
Unix ConnectionType = "unix"
DefaultPort = 3306
dbSchema = "scrape"
defaultSchema = "scrape"
utf8mb4General Collation = "utf8mb4_general_ci"
utf8mb4Unicode9 Collation = "utf8mb4_0900_ai_ci"
DefaultMaxConnections = 32
Expand Down Expand Up @@ -73,12 +73,14 @@ func Password(password string) Option {
func Schema(name string) Option {
return func(c *Config) error {
c.DBName = name
c.TargetSchema = name
return nil
}
}

func WithoutSchema() Option {
func ForMigration() Option {
return func(c *Config) error {
c.TargetSchema = c.DBName
c.DBName = ""
return nil
}
Expand All @@ -93,6 +95,7 @@ func WithQueryTimeout(timeout time.Duration) Option {

type Config struct {
mysql.Config
TargetSchema string // here for schema-less conns, e.g. for create
queryTimeout time.Duration
maxConns int
connMaxLifetime time.Duration
Expand All @@ -101,7 +104,7 @@ type Config struct {
func defaultConfig() Config {
cfg := mysql.NewConfig()
cfg.Net = string(TCP)
cfg.DBName = dbSchema
cfg.DBName = defaultSchema
cfg.Loc = time.UTC
cfg.Collation = string(utf8mb4Unicode9)
cfg.Timeout = DefaultTimeout // dial timeout
Expand Down
28 changes: 23 additions & 5 deletions internal/storage/mysql/options_test.go
Expand Up @@ -113,12 +113,30 @@ func TestAddress(t *testing.T) {
}
}

func TestWithoutSchema(t *testing.T) {
func TestForMigration(t *testing.T) {
t.Parallel()
c := defaultConfig()
WithoutSchema()(&c)
if c.DBName != "" {
t.Errorf("WithoutSchema: unexpected schema: %s", c.DBName)
tests := []struct {
name string
schema string
expectedTarget string
}{
{"default", "", defaultSchema},
{"explicit default", defaultSchema, defaultSchema},
{"override", "scrape_test", "scrape_test"},
}

for _, test := range tests {
c := defaultConfig()
if test.schema != "" {
Schema(test.schema)(&c)
}
ForMigration()(&c)
if c.DBName != "" {
t.Errorf("Expected empty dsn schema, got %q", c.DBName)
}
if c.TargetSchema != test.expectedTarget {
t.Errorf("ForMigration: expected target schema %q, got %q", test.expectedTarget, c.TargetSchema)
}
}
}

Expand Down
19 changes: 16 additions & 3 deletions internal/storage/mysql_loader_test.go
Expand Up @@ -6,6 +6,7 @@ import (
"bytes"
"context"
_ "embed"
"fmt"
"testing"
"text/template"

Expand All @@ -22,23 +23,28 @@ const (
var createSQL string

type mysqlConfig struct {
DBName string
TargetSchema string
}

var (
createTemplate = template.Must(template.New("create").Parse(createSQL))
dbConfig = mysqlConfig{DBName: testSchema}
dbConfig = mysqlConfig{TargetSchema: testSchema}
)

// Returns a new SQLStorage instance for testing. Each instance returns
// a freshly created db. Since a 'USE' statement is included in the create.sql
// subsequent queries will continue to use the test database.
func db(t *testing.T) *SQLStorage {
func getTestDatabase(t *testing.T) *SQLStorage {
db := New(database.MySQL, dsn)
err := db.Open(context.TODO())
if err != nil {
t.Fatalf("Error opening database: %v", err)
}
t.Cleanup(func() {
if err := db.Close(); err != nil {
t.Errorf("Error closing mysql database: %v", err)
}
})
var buf bytes.Buffer
if err = createTemplate.Execute(&buf, dbConfig); err != nil {
t.Fatalf("Error generating database create sql: %v", err)
Expand All @@ -47,5 +53,12 @@ func db(t *testing.T) *SQLStorage {
if err != nil {
t.Fatalf("Error creating database: %v", err)
}
t.Cleanup(func() {
q := fmt.Sprintf("DROP DATABASE %v;", dbConfig.TargetSchema)
if _, err := db.Exec(q); err != nil {
t.Logf("error dropping mysql test database: %v", err)
}

})
return db
}
22 changes: 15 additions & 7 deletions internal/storage/sqlite/create.sql
@@ -1,5 +1,4 @@
--
-- File generated with SQLiteStudio v3.4.4 on Thu Dec 14 22:50:06 2023
--
-- Text encoding used: UTF-8
--
Expand All @@ -11,9 +10,7 @@ BEGIN TRANSACTION;

-- Table: id_map

DROP TABLE IF EXISTS id_map;

CREATE TABLE id_map (
CREATE TABLE IF NOT EXISTS id_map (
requested_id INTEGER PRIMARY KEY ON CONFLICT REPLACE
NOT NULL,
canonical_id INTEGER NOT NULL
Expand All @@ -23,9 +20,7 @@ STRICT;


-- Table: urls
DROP TABLE IF EXISTS urls;

CREATE TABLE urls (
CREATE TABLE IF NOT EXISTS urls (
id INTEGER PRIMARY KEY ON CONFLICT REPLACE
NOT NULL,
url TEXT NOT NULL
Expand All @@ -39,5 +34,18 @@ CREATE TABLE urls (
WITHOUT ROWID,
STRICT;


-- Following two statements are added to support tracking headless
-- fetched state (or other alternate fetch methods)
-- The following cannot be executed idempotently
-- TODO: Goose migrations
ALTER TABLE urls ADD column fetch_method INTEGER NOT NULL DEFAULT 0;

CREATE INDEX IF NOT EXISTS fetch_method_expires_index ON urls (
expires DESC,
fetch_method ASC
);

COMMIT TRANSACTION;
PRAGMA wal_checkpoint(RESTART);

4 changes: 0 additions & 4 deletions internal/storage/sqlite/stats_test.go
Expand Up @@ -17,10 +17,6 @@ func TestStats(t *testing.T) {
if err != nil {
t.Fatal(err)
}
err = db.Create()
if err != nil {
t.Fatal(err)
}
sany, err := db.Stats()
if err != nil {
t.Fatal(err)
Expand Down
6 changes: 5 additions & 1 deletion internal/storage/sqlite_loader_test.go
Expand Up @@ -18,12 +18,16 @@ const (
//go:embed sqlite/create.sql
var createSQL string

func db(t *testing.T) *SQLStorage {
func getTestDatabase(t *testing.T) *SQLStorage {
db := New(database.SQLite, dsn)
err := db.Open(context.TODO())
if err != nil {
t.Fatalf("Error opening database: %v", err)
}
t.Cleanup(func() {
t.Logf("Cleaning up SQLite test database")
db.Close()
})
_, err = db.DB.Exec(createSQL)
if err != nil {
t.Fatalf("Error creating database: %v", err)
Expand Down

0 comments on commit 2929f7b

Please sign in to comment.