Skip to content

Commit

Permalink
Sanitise fake email domain
Browse files Browse the repository at this point in the history
In order to more closely conform to RFC 1035 and actually generate valid
fake emails, we only allow alphanumeric characters or hyphens in the
text output.

See: https://datatracker.ietf.org/doc/html/rfc1035#section-2.3.1

Resolves #52
  • Loading branch information
jezen committed May 3, 2024
1 parent 450bcfd commit d48464c
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 18 deletions.
4 changes: 4 additions & 0 deletions ChangeLog.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Changelog for fakedata

## 1.0.4

* Only generate fake email domains with alphanumeric characters and hyphens. [#53](https://github.com/fakedata-haskell/fakedata/pull/53)

## 1.0.3

* [Make the `Fake` type synonym partially applied](https://github.com/fakedata-haskell/fakedata/pull/45)
Expand Down
2 changes: 1 addition & 1 deletion fakedata.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ cabal-version: 1.12
-- see: https://github.com/sol/hpack

name: fakedata
version: 1.0.3
version: 1.0.4
synopsis: Library for producing fake data
description: Please see the README on GitHub at <https://github.com/psibi/fakedata#readme>
category: Random, Fake, FakeData
Expand Down
2 changes: 1 addition & 1 deletion package.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name: fakedata
version: 1.0.3
version: 1.0.4
github: "psibi/fakedata"
license: BSD3
author: "Sibi Prabakaran"
Expand Down
37 changes: 21 additions & 16 deletions src/Faker/Company.hs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ module Faker.Company
) where

import Data.Monoid ((<>))
import Data.Text
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Vector as V
import Faker
import Faker.Internal
Expand All @@ -44,7 +45,7 @@ bs =
Fake
(\settings -> do
vec :: V.Vector (V.Vector Text) <- companyBsProvider settings
let item :: V.Vector (IO Text) = V.map (\v -> rvec settings v) vec
let item :: V.Vector (IO Text) = V.map (rvec settings) vec
item' :: IO (V.Vector Text) = sequence item
items <- item'
let txt = V.foldl1' (\a b -> a <> " " <> b) items
Expand Down Expand Up @@ -72,27 +73,31 @@ domain :: Fake Text
domain = do
suffix <- F.domainSuffix
companyName <- name
pure $ fixupName companyName <> "." <> suffix
pure $ sanitise companyName <> "." <> suffix
where
-- Replaces spaces with hyphens and filters out anything that isn't an
-- alphanumeric character or a hyphen, so the domain has a better chance of
-- conforming to RFC 1035.
--
-- See: https://datatracker.ietf.org/doc/html/rfc1035#section-2.3.1
sanitise :: Text -> Text
sanitise = T.filter (\c -> isAlphaNum c || c == '-') . T.replace " " "-"

-- | Generates an email like "jappie_klooster@crazychairauction.com"
--
-- @since 0.8.1
--
email :: Fake Text
email = do
humanName <- F.name
number <- F.fromRange @Int (0, 999999999) -- reasonable uniqueness
domainName <- domain
let numText :: Text
numText = pack $ show number
pure $ fixupName humanName <> "-" <> numText <> "@" <> domainName

-- | Ensures the spaces are replaced by "_",
-- and no special characters are in the name.
-- So "Elizabeth Warder!" becomes "Elizabeth_Warder".
-- Any fancy symbols such as "!@#$" etc are filtered out.
--
-- @since 0.8.1
--
fixupName :: Text -> Text
fixupName = Data.Text.filter (\c -> isAlphaNum c || c == '_') . replace " " "_"
numText = T.pack $ show number
pure $ sanitise humanName <> "-" <> numText <> "@" <> domainName
where
-- Ensures the spaces are replaced by "_",
-- and no special characters are in the name.
-- So "Elizabeth Warder!" becomes "Elizabeth_Warder".
-- Any fancy symbols such as "!@#$" etc are filtered out.
sanitise :: Text -> Text
sanitise = T.filter (\c -> isAlphaNum c || c == '_') . T.replace " " "_"

0 comments on commit d48464c

Please sign in to comment.