Skip to content

Commit

Permalink
Merge branch 'stage' into production
Browse files Browse the repository at this point in the history
  • Loading branch information
moz-dfeller committed Jan 10, 2024
2 parents 82b9448 + 67c8ddd commit 07f9401
Show file tree
Hide file tree
Showing 12 changed files with 342 additions and 102 deletions.
96 changes: 96 additions & 0 deletions scripts/import-downloaders.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
const mysql = require('mysql')
const path = require('path')
const fs = require('fs/promises')
const { getConfig } = require('../server/js/config-helper')

const DOWNLOADERS_FOLDER = path.join(__dirname, 'downloaders')

const { MYSQLHOST, MYSQLUSER, MYSQLPASS, MYSQLDBNAME } = getConfig()

const dbConfig = {
host: MYSQLHOST,
user: MYSQLUSER,
password: MYSQLPASS,
database: MYSQLDBNAME,
}

const db = mysql.createConnection(dbConfig)

const BATCH_SIZE = 1000

const queryDb = (query, params) =>
new Promise((resolve, reject) => {
db.query(query, params, (err, results, fields) => {
if (err) reject(err)
try {
const res = results[0]
resolve(res)
} catch {
console.log(results)
reject('Something went wrong')
}
})
})

const sleep = ms => {
return new Promise(resolve => setTimeout(resolve, ms))
}

const processRelease = async filepath => {
const contents = await fs.readFile(path.join(DOWNLOADERS_FOLDER, filepath))
const users = JSON.parse(contents)
const userEmails = users.map(entry => entry.user.email)
const referrer = 'Hugging Face'
const releaseDir = filepath.split('.json')[0]

const referrerId = await queryDb(`SELECT id FROM referrers WHERE name = ?`, [
referrer,
])
const datasetId = await queryDb(
`SELECT id FROM datasets WHERE release_dir = ?`,
[releaseDir]
)
const unknownLocaleId = await queryDb(
`SELECT id FROM locales WHERE name = ?`,
['unknown']
)

let start = 0
let end = BATCH_SIZE
let batch = userEmails.slice(start, end)

while (batch.length > 0) {
const values = batch.map(email => [
email,
unknownLocaleId.id,
datasetId.id,
referrerId.id,
])

console.log(values)

await queryDb(
`
INSERT INTO downloaders (email, locale_id, dataset_id, referrer_id)
VALUES ?
`,
[values]
)

start += BATCH_SIZE
end += BATCH_SIZE
batch = userEmails.slice(start, end)

await sleep(2000)
}
}

const main = async () => {
const releases = await fs.readdir(DOWNLOADERS_FOLDER)

for (const release of releases) {
await processRelease(release)
}
}

main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import { pipe } from 'fp-ts/lib/function'
import { taskEither as TE } from 'fp-ts'
import { queryDb } from '../../../infrastructure/db/mysql'

export const fetchUserClientEmailById = (id: string) =>
pipe(
queryDb(`
SELECT email FROM user_clients WHERE client_id = ?
`)([id]),
TE.map(
([[result]]: Array<Array<{ email: string }> | Array<any>>) => result.email
)
)
Original file line number Diff line number Diff line change
Expand Up @@ -12,24 +12,30 @@ import {
} from '../../repository/bulk-submission-repository'
import { JobQueue } from '../../../../infrastructure/queues/types/JobQueue'
import { BulkSubmissionUploadJob } from '../../../../infrastructure/queues/types/BulkSubmissionJob'
import { fetchUserClientEmailById } from '../../repository/user-client-repository'

export const addBulkSubmission =
(getLocaleId: (a: string) => TE.TaskEither<Error, number>) =>
(fetchClientEmail: (id: string) => TE.TaskEither<Error, string>) =>
(createBulkSubmissionFilepath: (locale: string, data: string) => string) =>
(jobQueue: JobQueue<BulkSubmissionUploadJob>) =>
(insertIntoDb: (a: BulkSubmission) => TE.TaskEither<Error, boolean>) =>
(cmd: AddBulkSubmissionCommand) => {
return pipe(
TE.Do,
TE.let('filepath', () => createBulkSubmissionFilepath(cmd.locale, cmd.file)),
TE.let('filepath', () =>
createBulkSubmissionFilepath(cmd.locale, cmd.file)
),
TE.bind('localeId', () => getLocaleId(cmd.locale)),
TE.bind('addToQueue', ({ filepath }) =>
TE.bind('userClientEmail', () => fetchClientEmail(cmd.submitter)),
TE.bind('addToQueue', ({ filepath, userClientEmail }) =>
TE.fromTask(
jobQueue.addJob({
filepath,
filename: cmd.filename,
localeName: cmd.locale,
data: cmd.file,
userClientEmail: userClientEmail,
})
)
),
Expand All @@ -50,6 +56,10 @@ export const addBulkSubmission =
)
}

export const addBulkSubmissionCommandHandler = addBulkSubmission(getLocaleIdF)(createBulkSubmissionFilepath)(BulkSubmissionUploadJobQueue)(
insertBulkSubmissionIntoDb
)
export const addBulkSubmissionCommandHandler =
addBulkSubmission
(getLocaleIdF)
(fetchUserClientEmailById)
(createBulkSubmissionFilepath)
(BulkSubmissionUploadJobQueue)
(insertBulkSubmissionIntoDb)
5 changes: 5 additions & 0 deletions server/src/core/bulk-submissions/types/bulk-submission.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,14 @@ export type BulkSubmission = {
source: string
}

export type BulkSubmissionContactInformation = {
email: string
}

export type BulkSubmissionEmailData = {
emailTo: string
filepath: string
filename: string
languageLocale: string
contact: BulkSubmissionContactInformation
}
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ export const processBulkSubmissionUpload =
filepath: downloadUrl,
filename: job.data.filename,
languageLocale: job.data.localeName,
contact: {
email: job.data.userClientEmail
}
})
),
TE.fold(
Expand All @@ -69,10 +72,10 @@ export const processBulkSubmissionUpload =
)()
}

export const bulkSubmissionUploadProcessor = processBulkSubmissionUpload(
uploadToBucket(BULK_SUBMISSION_BUCKET)
)(doesFileExistInBucket(BULK_SUBMISSION_BUCKET))(
makePublicInBucket(BULK_SUBMISSION_BUCKET)
)(getPublicUrlFromBucket(BULK_SUBMISSION_BUCKET))(
sendBulkSubmissionNotificationEmail
)
export const bulkSubmissionUploadProcessor =
processBulkSubmissionUpload
(uploadToBucket(BULK_SUBMISSION_BUCKET))
(doesFileExistInBucket(BULK_SUBMISSION_BUCKET))
(makePublicInBucket(BULK_SUBMISSION_BUCKET))
(getPublicUrlFromBucket(BULK_SUBMISSION_BUCKET))
(sendBulkSubmissionNotificationEmail)
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,5 @@ export type BulkSubmissionUploadJob = {
filepath: string
filename: string
localeName: string
userClientEmail: string
}
Loading

0 comments on commit 07f9401

Please sign in to comment.