Skip to content

Commit

Permalink
feat: update user-fetch scripts (#30)
Browse files Browse the repository at this point in the history
  • Loading branch information
raisedadead committed Apr 6, 2023
1 parent 3034312 commit 913c7b9
Show file tree
Hide file tree
Showing 11 changed files with 787 additions and 1 deletion.
19 changes: 19 additions & 0 deletions accounts/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
## Setup

```
# install dependencies
npm ci
# create the .env file
cp sample.env .env
```

You will need to edit the `.env` file for your specific set up.

## Usage

### Generating the mailing list

```sh
npm run mailing-list ./emails.csv
```
71 changes: 71 additions & 0 deletions accounts/get-duplicate--emails.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
require('dotenv').config();

const { MongoClient } = require('mongodb');
const ora = require('ora');

const { MONGODB_URI } = process.env;

async function main() {
const uri =
MONGODB_URI ||
'mongodb://localhost:27017/freecodecamp?retryWrites=true&w=majority';
const client = new MongoClient(uri, {
useNewUrlParser: true,
useUnifiedTopology: true,
poolSize: 500
});

await client.connect();
await getDuplicateEmails(client);
}

main().catch(console.error);

async function getDuplicateEmails(client) {
const pipeline = [
{
$sort: { email: 1 }
},
{
$match: {
email: { $exists: true },
email: { $ne: '' },
email: { $ne: null }
}
},
{
$group: {
_id: '$email',
dups: { $addToSet: { user_id: '$_id', username: '$username' } },
count: { $sum: 1 }
}
},
{
$match: { count: { $gt: 1 } }
},
{
$out: 'duplicateEmails'
}
];

const options = {
allowDiskUse: true,
maxTimeMS: 0,
hint: 'email_1',
comment: 'duplicates',
bypassDocumentValidation: true
};

const spinner = ora('Aggregating...');
spinner.start();

const cursor = client
.db('freecodecamp')
.collection('user')
.aggregate(pipeline, options);

await cursor.toArray().then(() => {
client.close();
spinner.succeed('done.');
});
}
71 changes: 71 additions & 0 deletions accounts/get-duplicate--usernames.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
require('dotenv').config();

const { MongoClient } = require('mongodb');
const ora = require('ora');

const { MONGODB_URI } = process.env;

async function main() {
const uri =
MONGODB_URI ||
'mongodb://localhost:27017/freecodecamp?retryWrites=true&w=majority';
const client = new MongoClient(uri, {
useNewUrlParser: true,
useUnifiedTopology: true,
poolSize: 500
});

await client.connect();
await getDuplicateUsernames(client);
}

main().catch(console.error);

async function getDuplicateUsernames(client) {
const pipeline = [
{
$sort: { username: 1 }
},
{
$match: {
username: { $exists: true },
username: { $ne: '' },
username: { $ne: null }
}
},
{
$group: {
_id: '$username',
dups: { $addToSet: { user_id: '$_id', email: '$email' } },
count: { $sum: 1 }
}
},
{
$match: { count: { $gt: 1 } }
},
{
$out: 'duplicateUsernames'
}
];

const options = {
allowDiskUse: true,
maxTimeMS: 0,
hint: 'username_1',
comment: 'duplicates',
bypassDocumentValidation: true
};

const spinner = ora('Aggregating...');
spinner.start();

const cursor = client
.db('freecodecamp')
.collection('user')
.aggregate(pipeline, options);

await cursor.toArray().then(() => {
client.close();
spinner.succeed('done.');
});
}
110 changes: 110 additions & 0 deletions accounts/get-emails.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
/* eslint-disable no-process-exit */
require('dotenv').config();

const Stream = require('stream');
const fs = require('fs');
const assert = require('assert');
const mongodb = require('mongodb');
const validator = require('validator');
const emailValidator = require('email-validator');
const ora = require('ora');

const MongoClient = mongodb.MongoClient;
const filePath = process.argv[2];

const validOutput = fs.createWriteStream(filePath, { encoding: 'utf8' });
const invalidOutput = fs.createWriteStream('./invalidEmails.csv', {
encoding: 'utf8'
});

validOutput.write('email,unsubscribeId\n');
invalidOutput.write('email,unsubscribeId\n');

const rs = new Stream.Readable({ objectMode: true });
rs._read = function () {};

rs.on('data', ({ email, unsubscribeId }) => {
if (validator.isEmail(email)) {
validOutput.write(`${email},${unsubscribeId}\n`);
} else if (emailValidator.validate(email)) {
validOutput.write(`${email},${unsubscribeId}\n`);
} else {
invalidOutput.write(`${email},${unsubscribeId}\n`);
}
});

assert(
filePath,
`
This script must be called with a filepath argument like so:
npm run mailing-list -- './emails.csv'
`
);

const {
MONGO_DB,
MONGO_PASSWORD,
MONGO_RS,
MONGO_USER,
MONGODB_URI
} = process.env;

MongoClient.connect(
MONGODB_URI,
{
useNewUrlParser: true,
useUnifiedTopology: true,
replicaSet: MONGO_RS,
auth: { user: MONGO_USER, password: MONGO_PASSWORD },
poolSize: 20
},
function (err, client) {
if (err) {
throw err;
}
const db = client.db(MONGO_DB);

const stream = db
.collection('user')
.find(
{
$and: [
{ email: { $exists: true } },
{ email: { $ne: '' } },
{ email: { $ne: null } },
{ email: { $not: /(test|fake)/i } },
{
$or: [
{ sendQuincyEmail: true },
{ sendQuincyEmail: { $exists: false } },
{ sendQuincyEmail: null }
]
}
]
},
{
email: 1,
unsubscribeId: 1
}
)
.batchSize(100)
.stream();

const spinner = ora('Begin querying emails ...');
spinner.start();

stream.on('data', ({ email, unsubscribeId }) => {
const data = { email, unsubscribeId };
spinner.text = `Getting info for: ${email}\n`;
rs.push(data);
});

stream.on('end', () => {
rs.push(null);
client.close();
spinner.succeed(`Completed compiling mailing list.`);
});
}
);
36 changes: 36 additions & 0 deletions accounts/outdated/get-certified.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
console.log('\n\nThis script needs to be re-written\n\n');

/* eslint-disable no-process-exit */
// require('dotenv').load();
// var mongodb = require('mongodb');
// var MongoClient = mongodb.MongoClient;

// MongoClient.connect(process.env.MONGODB_URI, function(err, database) {
// if (err) {
// throw err;
// }

// database.collection('user').aggregate([
// {
// $match: {
// $and: [
// { isFrontEndCert: true },
// { isBackEndCert: true },
// { isDataVisCert: true }
// ]
// }
// },
// {
// $group: {
// _id: 1,
// usernames: { $addToSet: '$username' }
// }
// }
// ], function(err, results) {
// if (err) { throw err; }

// console.log('\n@' + results[0].usernames.join('\n@'));
// // console.log(results[0].usernames.length);
// process.exit(0);
// });
// });
45 changes: 45 additions & 0 deletions accounts/outdated/get-dup-emails.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
console.log('\n\nThis script needs to be re-written\n\n');

// /* eslint-disable no-process-exit */
// require('dotenv').load();
// var mongodb = require('mongodb');
// var MongoClient = mongodb.MongoClient;

// MongoClient.connect(process.env.MONGODB_URI, function(err, database) {
// if (err) {
// throw err;
// }

// database.collection('user').aggregate([
// {
// $group: {
// _id: '$email',
// count: { $sum: 1 }
// }
// },
// {
// $match: {
// _id: { $ne: null },
// count: { $gt: 1 }
// }
// },
// {
// $project: {
// email: '$_id',
// _id: 0
// }
// },
// {
// $group: {
// _id: 1,
// usernames: { $addToSet: '$email' }
// }
// }
// ], function(err, results) {
// if (err) { throw err; }

// // console.log('\n@' + results[0].usernames.join('\n@'));
// console.log(results[0].usernames.length);
// process.exit(0);
// });
// });
Loading

0 comments on commit 913c7b9

Please sign in to comment.