From 0f934092d84194b673262fa3cf6db14e1e285807 Mon Sep 17 00:00:00 2001 From: ksaric Date: Wed, 4 Jul 2018 09:28:42 +0200 Subject: [PATCH 1/2] [TSD-47] Create SQLite schema and insert data using HTTP. --- log-classifier.cabal | 2 + schema.sql | 38 +++++++++++ src/CLI.hs | 3 + src/DataSource/DB.hs | 137 ++++++++++++++++++++++++++++++++++------ src/DataSource/Http.hs | 132 +++++++++++++++++++++++++++++++++----- src/DataSource/Types.hs | 81 +++++++++++++++++++++--- src/Lib.hs | 83 ++++++++++++++++++++++-- test/Spec.hs | 17 +++-- 8 files changed, 438 insertions(+), 55 deletions(-) create mode 100644 schema.sql diff --git a/log-classifier.cabal b/log-classifier.cabal index 9520f3e..a02b29a 100644 --- a/log-classifier.cabal +++ b/log-classifier.cabal @@ -41,6 +41,7 @@ library , reflection , regex-tdfa , text + , time , universum , zip-archive , QuickCheck @@ -68,6 +69,7 @@ library MonadFailDesugaring TupleSections StrictData + ExplicitForAll executable log-classifier-exe hs-source-dirs: app diff --git a/schema.sql b/schema.sql new file mode 100644 index 0000000..0fefe6c --- /dev/null +++ b/schema.sql @@ -0,0 +1,38 @@ +-- PRAGMA foreign_keys = "1"; + +CREATE TABLE `ticket_info` ( + `tiId` INTEGER, + `tiRequesterId` INTEGER NOT NULL, + `tiAssigneeId` INTEGER, + `tiUrl` TEXT NOT NULL, + `tiTags` TEXT NOT NULL, + `tiStatus` TEXT NOT NULL, + PRIMARY KEY(tiId) +) WITHOUT ROWID; + +CREATE TABLE `ticket_comment` ( + `id` INTEGER, + `ticket_id` INTEGER NOT NULL, + `body` TEXT NOT NULL, + `is_public` INTEGER NOT NULL, + `author_id` INTEGER NOT NULL, + PRIMARY KEY(id), + FOREIGN KEY(`ticket_id`) REFERENCES ticket_info(tId) +) WITHOUT ROWID; + +CREATE TABLE `comment_attachment` ( + `aId` INTEGER, + `comment_id` INTEGER NOT NULL, + `aURL` TEXT NOT NULL, + `aContentType` TEXT NOT NULL, + `aSize` INTEGER NOT NULL, + PRIMARY KEY(aId), + FOREIGN KEY(`comment_id`) REFERENCES ticket_comment ( ticket_id ) +) WITHOUT ROWID; + +CREATE TABLE `attachment_content` ( + `attachment_id` INTEGER, + `content` BLOB NOT NULL, + PRIMARY KEY(attachment_id), + FOREIGN KEY(`attachment_id`) REFERENCES comment_attachment(aId) +); diff --git a/src/CLI.hs b/src/CLI.hs index 53f08a4..16921e4 100644 --- a/src/CLI.hs +++ b/src/CLI.hs @@ -19,6 +19,7 @@ data CLI | ProcessTicket Int -- ^ Process ticket of an given ticket id | ProcessTickets -- ^ Process all the tickets in Zendesk | ShowStatistics -- ^ Show statistics + | ExportData -- ^ Export data. TODO(ks): From N days before! deriving (Show) -- | Parser for ProcessTicket @@ -42,6 +43,8 @@ cli = hsubparser $ mconcat (progDesc "Process Zendesk ticket of an given ticket id")) , command "show-stats" (info (pure ShowStatistics) (progDesc "Print list of ticket Ids that agent has been assigned")) + , command "export-data" (info (pure ExportData) + (progDesc "Print list of ticket Ids that agent has been assigned")) ] -- | Get CLI arguments from command line diff --git a/src/DataSource/DB.hs b/src/DataSource/DB.hs index af993a3..e45ffc3 100644 --- a/src/DataSource/DB.hs +++ b/src/DataSource/DB.hs @@ -8,14 +8,23 @@ module DataSource.DB ( withDatabase , withProdDatabase , cachedZendeskLayer + -- * For updating local database + , insertTicketInfo + , insertTicketComments + , insertCommentAttachments + -- * Delete data + , deleteAllData + , deleteCommentAttachments + , deleteTicketComments + , deleteTickets ) where import Universum import Data.Text (split) -import Database.SQLite.Simple (FromRow (..), NamedParam (..), SQLData (..), close, field, - open, queryNamed, query_) +import Database.SQLite.Simple (FromRow (..), NamedParam (..), SQLData (..), close, + executeNamed, execute_, field, open, queryNamed, query_) import Database.SQLite.Simple.FromField (FromField (..), ResultError (..), returnError) import Database.SQLite.Simple.Internal (Connection, Field (..)) import Database.SQLite.Simple.Ok (Ok (..)) @@ -33,14 +42,16 @@ import DataSource.Types (Attachment (..), AttachmentContent (..), Atta -- that we need to sync occasionaly. cachedZendeskLayer :: (MonadIO m, MonadReader Config m) => ZendeskLayer m cachedZendeskLayer = ZendeskLayer - { zlGetTicketInfo = liftIO . getTicketInfoByTicketId - , zlListAssignedTickets = liftIO . getAllAssignedTicketsByUser - , zlListRequestedTickets = liftIO . getAllRequestedTicketsByUser - , zlListUnassignedTickets = error "We won't use this for now!" + { zlGetTicketInfo = getTicketInfoByTicketId + , zlListDeletedTickets = error "We won't use this for now." + , zlListAssignedTickets = getAllAssignedTicketsByUser + , zlListRequestedTickets = getAllRequestedTicketsByUser + , zlListUnassignedTickets = getAllUnassignedTicketsByUser , zlListAdminAgents = error "We won't use this for now!" - , zlGetAttachment = liftIO . DataSource.DB.getAttachmentContent - , zlGetTicketComments = liftIO . getTicketComments + , zlGetAttachment = DataSource.DB.getAttachmentContent + , zlGetTicketComments = getTicketComments , zlPostTicketComment = error "We won't use this for now!" + , zlExportTickets = error "This is redundant since we mostly require it for caching!" } -- Database instances @@ -100,11 +111,24 @@ instance ToField UserId where instance ToField CommentId where toField (CommentId commentId) = SQLInteger . fromIntegral $ commentId +instance ToField CommentBody where + toField (CommentBody commentBody) = SQLText $ commentBody + instance ToField AttachmentId where toField (AttachmentId attachmentId) = SQLInteger . fromIntegral $ attachmentId +instance ToField TicketURL where + toField (TicketURL tiUrl) = SQLText $ tiUrl + +instance ToField TicketTags where + toField (TicketTags tiTags) = SQLText . fromString $ intercalate "," $ map toString tiTags + +instance ToField TicketStatus where + toField (TicketStatus tiStatus) = SQLText $ tiStatus -- | A general resource closing function. +-- The issue with this is that we currently can't use any concurrency +-- primitives, but that will be fixed in the future. withDatabase :: forall a. String -> (Connection -> IO a) -> IO a withDatabase dbName dbOperation = bracket @@ -113,28 +137,36 @@ withDatabase dbName dbOperation = dbOperation -- | A production resource closing function. -withProdDatabase :: forall a. (Connection -> IO a) -> IO a -withProdDatabase = withDatabase "./prod.db" +withProdDatabase :: forall m a. (MonadIO m) => (Connection -> IO a) -> m a +withProdDatabase = liftIO . withDatabase "./prod.db" -_getTicketsInfo :: IO [TicketInfo] +------------------------------------------------------------ +-- Query +------------------------------------------------------------ + +_getTicketsInfo :: forall m. (MonadIO m) => m [TicketInfo] _getTicketsInfo = withProdDatabase $ \conn -> query_ conn "SELECT * FROM ticket_info" -getTicketInfoByTicketId :: TicketId -> IO (Maybe TicketInfo) +getTicketInfoByTicketId :: forall m. (MonadIO m) => TicketId -> m (Maybe TicketInfo) getTicketInfoByTicketId ticketId = withProdDatabase $ \conn -> - safeHead <$> queryNamed conn "SELECT * FROM ticket_info WHERE ticket_id = :id" [":id" := ticketId] + safeHead <$> queryNamed conn "SELECT * FROM ticket_info WHERE tId = :id" [":id" := ticketId] -getAllAssignedTicketsByUser :: UserId -> IO [TicketInfo] +getAllAssignedTicketsByUser :: forall m. (MonadIO m) => UserId -> m [TicketInfo] getAllAssignedTicketsByUser userId = withProdDatabase $ \conn -> queryNamed conn "SELECT * FROM ticket_info WHERE assignee_id = :id" [":id" := userId] -getAllRequestedTicketsByUser :: UserId -> IO [TicketInfo] +getAllUnassignedTicketsByUser :: forall m. (MonadIO m) => m [TicketInfo] +getAllUnassignedTicketsByUser = withProdDatabase $ \conn -> + query_ conn "SELECT * FROM ticket_info WHERE assignee_id = NULL" + +getAllRequestedTicketsByUser :: forall m. (MonadIO m) => UserId -> m [TicketInfo] getAllRequestedTicketsByUser userId = withProdDatabase $ \conn -> queryNamed conn "SELECT * FROM ticket_info WHERE requester_id = :id" [":id" := userId] --- | A join would be more performance, but KISS for now. -getTicketComments :: TicketId -> IO [Comment] +-- | A join would be more performant, but KISS for now. +getTicketComments :: forall m. (MonadIO m) => TicketId -> m [Comment] getTicketComments ticketId = do commentsInfo <- getTicketIdComments ticketId @@ -150,15 +182,80 @@ getTicketComments ticketId = do , cAuthor = commentAuthorId } where - getTicketIdComments :: TicketId -> IO [(CommentId, CommentBody, Bool, Integer)] + getTicketIdComments :: TicketId -> m [(CommentId, CommentBody, Bool, Integer)] getTicketIdComments ticketId' = withProdDatabase $ \conn -> queryNamed conn "SELECT tc.id, tc.body, tc.is_public, tc.author_id FROM ticket_comment tc WHERE tc.ticket_id = :id" [":id" := ticketId'] - getCommentAttachments :: CommentId -> IO [Attachment] + getCommentAttachments :: CommentId -> m [Attachment] getCommentAttachments commentId = withProdDatabase $ \conn -> queryNamed conn "SELECT * FROM comment_attachments WHERE comment_id = :id" [":id" := commentId] -getAttachmentContent :: Attachment -> IO (Maybe AttachmentContent) +-- | We use a different database here since a simple calculation shows that +-- this database will be huge. If an average log takes around 10Mb, saving +-- 1000's or 10000's logs in a database is very space intensive. +-- Since most of the time we want to deal with the regular information not +-- relating to attachments, it makes sense to separate the DB's. Another option +-- is to use another database (KV storage/database) for this. +-- TODO(ks): For now, let's delay this decision for a bit. +getAttachmentContent :: forall m. (MonadIO m) => Attachment -> m (Maybe AttachmentContent) getAttachmentContent Attachment{..} = withProdDatabase $ \conn -> safeHead <$> queryNamed conn "SELECT * FROM attachment_content WHERE attachment_id = :id" [":id" := aId] +------------------------------------------------------------ +-- DML +------------------------------------------------------------ + +-- TODO(ks): withTransaction + +insertTicketInfo :: forall m. (MonadIO m) => TicketInfo -> m () +insertTicketInfo TicketInfo{..} = withProdDatabase $ \conn -> + executeNamed conn "INSERT INTO ticket_info (tiId, tiRequesterId, tiAssigneeId, tiUrl, tiTags, tiStatus) VALUES (:tiId, :tiRequesterId, :tiAssigneeId, :tiUrl, :tiTags, :tiStatus)" + [ ":tiId" := tiId + , ":tiRequesterId" := tiRequesterId + , ":tiAssigneeId" := tiAssigneeId + , ":tiUrl" := tiUrl + , ":tiTags" := tiTags + , ":tiStatus" := tiStatus + ] + +insertTicketComments :: forall m. (MonadIO m) => TicketId -> Comment -> m () +insertTicketComments ticketId Comment{..} = withProdDatabase $ \conn -> + executeNamed conn "INSERT INTO ticket_comment (id, ticket_id, body, is_public, author_id) VALUES (:id, :ticket_id, :body, :is_public, :author_id)" + [ ":id" := cId + , ":ticket_id" := ticketId + , ":body" := cBody + , ":is_public" := cPublic + , ":author_id" := cAuthor + ] + +insertCommentAttachments :: forall m. (MonadIO m) => Comment -> Attachment -> m () +insertCommentAttachments Comment{..} Attachment{..} = withProdDatabase $ \conn -> + executeNamed conn "INSERT INTO comment_attachment (aId, comment_id, aURL, aContentType, aSize) VALUES (:aId, :comment_id, :aURL, :aContentType, :aSize)" + [ ":aId" := aId + , ":comment_id" := cId + , ":aURL" := aURL + , ":aContentType" := aContentType + , ":aSize" := aSize + ] + +deleteCommentAttachments :: forall m. (MonadIO m) => m () +deleteCommentAttachments = withProdDatabase $ \conn -> + execute_ conn "DELETE FROM comment_attachment" + +deleteTicketComments :: forall m. (MonadIO m) => m () +deleteTicketComments = withProdDatabase $ \conn -> + execute_ conn "DELETE FROM ticket_comment" + +deleteTickets :: forall m. (MonadIO m) => m () +deleteTickets = withProdDatabase $ \conn -> + execute_ conn "DELETE FROM ticket_info" + +-- | Delete all data. +-- Here we can show that we lack composition, and the step we +-- need to achive it. +deleteAllData :: forall m. (MonadIO m) => m () +deleteAllData = withProdDatabase $ \conn -> do + execute_ conn "DELETE FROM comment_attachment" + execute_ conn "DELETE FROM ticket_comment" + execute_ conn "DELETE FROM ticket_info" + diff --git a/src/DataSource/Http.hs b/src/DataSource/Http.hs index 5bbc0a3..f5cffb0 100644 --- a/src/DataSource/Http.hs +++ b/src/DataSource/Http.hs @@ -11,7 +11,9 @@ module DataSource.Http import Universum +import Control.Concurrent (threadDelay) import Control.Monad.Reader (ask) + import Data.Aeson (FromJSON, ToJSON, Value, encode, parseJSON) import Data.Aeson.Text (encodeToLazyText) import Data.Aeson.Types (Parser, parseEither) @@ -21,12 +23,13 @@ import Network.HTTP.Simple (Request, addRequestHeader, getResponseBody import DataSource.Types (Attachment (..), AttachmentContent (..), Comment (..), CommentBody (..), CommentId (..), Config (..), - FromPageResultList (..), IOLayer (..), PageResultList (..), - Ticket (..), TicketId (..), TicketInfo (..), TicketTag (..), - User, UserId (..), ZendeskAPIUrl (..), ZendeskLayer (..), - ZendeskResponse (..), parseComments, parseTicket, - renderTicketStatus, showURL) + DeletedTicket (..), ExportFromTime (..), FromPageResultList (..), + IOLayer (..), PageResultList (..), Ticket (..), TicketId (..), + TicketInfo (..), TicketTag (..), User, UserId (..), + ZendeskAPIUrl (..), ZendeskLayer (..), ZendeskResponse (..), + parseComments, parseTicket, renderTicketStatus, showURL) +-- ./mitmproxy --mode reverse:https://iohk.zendesk.com -p 4001 -- | The default configuration. defaultConfig :: Config @@ -35,7 +38,7 @@ defaultConfig = { cfgAgentId = 0 , cfgZendesk = "https://iohk.zendesk.com" , cfgToken = "" - , cfgEmail = "daedalus-bug-reports@iohk.io" + , cfgEmail = "kristijan.saric@iohk.io" , cfgAssignTo = 0 , cfgKnowledgebase = [] , cfgNumOfLogsToAnalyze = 5 @@ -45,9 +48,14 @@ defaultConfig = } -- | The basic Zendesk layer. +-- The convention: +-- - get returns a single result (wrapped in @Maybe@) +-- - list returns multiple results +-- - post submits a result (maybe PUT?!) basicZendeskLayer :: (MonadIO m, MonadReader Config m) => ZendeskLayer m basicZendeskLayer = ZendeskLayer { zlGetTicketInfo = getTicketInfo + , zlListDeletedTickets = listDeletedTickets , zlListRequestedTickets = listRequestedTickets , zlListAssignedTickets = listAssignedTickets , zlListUnassignedTickets = listUnassignedTickets @@ -55,6 +63,7 @@ basicZendeskLayer = ZendeskLayer , zlPostTicketComment = postTicketComment , zlGetAttachment = getAttachment , zlGetTicketComments = getTicketComments + , zlExportTickets = getExportedTickets } basicIOLayer :: (MonadIO m, MonadReader Config m) => IOLayer m @@ -68,6 +77,7 @@ basicIOLayer = IOLayer emptyZendeskLayer :: forall m. (Monad m) => ZendeskLayer m emptyZendeskLayer = ZendeskLayer { zlGetTicketInfo = \_ -> error "Not implemented zlGetTicketInfo!" + , zlListDeletedTickets = pure [] , zlListRequestedTickets = \_ -> error "Not implemented zlListRequestedTickets!" , zlListAssignedTickets = \_ -> error "Not implemented zlListAssignedTickets!" , zlListUnassignedTickets = pure [] @@ -75,6 +85,7 @@ emptyZendeskLayer = ZendeskLayer , zlPostTicketComment = \_ -> error "Not implemented zlPostTicketComment!" , zlGetAttachment = \_ -> error "Not implemented zlGetAttachment!" , zlGetTicketComments = \_ -> error "Not implemented zlGetTicketComments!" + , zlExportTickets = \_ -> error "Not implemented zlExportTickets!" } -- | Get single ticket info. @@ -89,6 +100,18 @@ getTicketInfo ticketId = do let req = apiRequest cfg url liftIO $ Just <$> apiCall parseTicket req +-- | Return list of deleted tickets. +listDeletedTickets + :: forall m. (MonadIO m, MonadReader Config m) + => m [DeletedTicket] +listDeletedTickets = do + cfg <- ask + + let url = showURL $ DeletedTicketsURL + let req = apiRequest cfg url + + iteratePages req + -- | Return list of ticketIds that has been requested by config user. listRequestedTickets :: forall m. (MonadIO m, MonadReader Config m) @@ -127,7 +150,9 @@ listUnassignedTickets = do iteratePages req -listAdminAgents :: forall m. (MonadIO m, MonadReader Config m) => m [User] +listAdminAgents + :: forall m. (MonadIO m, MonadReader Config m) + => m [User] listAdminAgents = do cfg <- ask let url = showURL AgentGroupURL @@ -135,27 +160,83 @@ listAdminAgents = do iteratePages req +-- | Export tickets from Zendesk - https://developer.zendesk.com/rest_api/docs/core/incremental_export +-- NOTE: If count is less than 1000, then stop paginating. +-- Otherwise, use the next_page URL to get the next page of results. +getExportedTickets + :: forall m. (MonadIO m, MonadReader Config m) + => ExportFromTime + -> m [TicketInfo] +getExportedTickets time = do + cfg <- ask + let url = showURL $ ExportDataByTimestamp time + putTextLn $ "/api/v2" <> url + let req = apiRequestAbsolute cfg url + putTextLn "OUT!" + iterateExportedTicketsWithDelay req + where + + iterateExportedTicketsWithDelay + :: Request + -> m [TicketInfo] + iterateExportedTicketsWithDelay req = do + cfg <- ask + + let go :: [TicketInfo] -> Text -> IO [TicketInfo] + go list' nextPage' = do + threadDelay $ 10 * 1000000 -- Wait, Zendesk allows for 10 per minute. + + let req' = apiRequestAbsolute cfg nextPage' + (PageResultList pagen nextPagen count) <- apiCall parseJSON req' + case nextPagen of + Just nextUrl -> if maybe False (>= 1000) count + then go (list' <> pagen) nextUrl + else pure (list' <> pagen) + + Nothing -> pure (list' <> pagen) + + + (PageResultList page0 nextPage _) <- liftIO $ apiCall parseJSON req + putTextLn $ show page0 + putTextLn $ show nextPage + case nextPage of + Just nextUrl -> liftIO $ go page0 nextUrl + Nothing -> pure page0 + -- | Iterate all the ticket pages and combine into a result. iteratePages :: forall m a. (MonadIO m, MonadReader Config m, FromPageResultList a) => Request -> m [a] -iteratePages req = do +iteratePages req = iteratePagesWithDelay 0 req + +-- | Iterate all the ticket pages and combine into a result. Wait for +-- some time in-between the requests. +iteratePagesWithDelay + :: forall m a. (MonadIO m, MonadReader Config m, FromPageResultList a) + => Int + -> Request + -> m [a] +iteratePagesWithDelay seconds req = do cfg <- ask let go :: [a] -> Text -> IO [a] go list' nextPage' = do - let req' = apiRequestAbsolute cfg nextPage' - (PageResultList pagen nextPagen) <- apiCall parseJSON req' - case nextPagen of - Just nextUrl -> go (list' <> pagen) nextUrl - Nothing -> pure (list' <> pagen) + -- Wait for @Int@ seconds. + threadDelay $ seconds * 1000000 - (PageResultList page0 nextPage) <- liftIO $ apiCall parseJSON req + let req' = apiRequestAbsolute cfg nextPage' + (PageResultList pagen nextPagen _) <- apiCall parseJSON req' + case nextPagen of + Just nextUrl -> go (list' <> pagen) nextUrl + Nothing -> pure (list' <> pagen) + + (PageResultList page0 nextPage _) <- liftIO $ apiCall parseJSON req case nextPage of Just nextUrl -> liftIO $ go page0 nextUrl Nothing -> pure page0 + -- | Send API request to post comment postTicketComment :: (MonadIO m, MonadReader Config m) @@ -211,7 +292,13 @@ getTicketComments tId = do let url = showURL $ TicketCommentsURL tId let req = apiRequest cfg url - liftIO $ apiCall parseComments req + result <- apiCallSafe parseComments req + + -- TODO(ks): For now return empty if there is an exception. + -- After we have exception handling, we propagate this up. + case result of + Left _ -> pure [] + Right r -> pure r ------------------------------------------------------------ -- HTTP utility @@ -222,14 +309,27 @@ addJsonBody :: ToJSON a => a -> Request -> Request addJsonBody body req = setRequestBodyJSON body $ setRequestMethod "PUT" req -- | Make an api call +-- TODO(ks): Switch to @Either@. apiCall :: FromJSON a => (Value -> Parser a) -> Request -> IO a apiCall parser req = do + putTextLn $ show req v <- getResponseBody <$> httpJSON req case parseEither parser v of Right o -> pure o Left e -> error $ "couldn't parse response " <> toText e <> "\n" <> decodeUtf8 (encode v) +-- | Make a safe api call. +apiCallSafe + :: forall m a. (MonadIO m, FromJSON a) + => (Value -> Parser a) + -> Request + -> m (Either String a) +apiCallSafe parser req = do + putTextLn $ show req + v <- getResponseBody <$> httpJSON req + pure $ parseEither parser v + -- | General api request function apiRequest :: Config -> Text -> Request apiRequest Config{..} u = setRequestPath (encodeUtf8 path) $ @@ -240,7 +340,7 @@ apiRequest Config{..} u = setRequestPath (encodeUtf8 path) $ parseRequest_ (toString (cfgZendesk <> path)) where path :: Text - path = "/api/v2/" <> u + path = "/api/v2" <> u -- | Api request but use absolute path apiRequestAbsolute :: Config -> Text -> Request diff --git a/src/DataSource/Types.hs b/src/DataSource/Types.hs index 91da4ff..b39ce3b 100644 --- a/src/DataSource/Types.hs +++ b/src/DataSource/Types.hs @@ -13,6 +13,7 @@ module DataSource.Types , CommentOuter (..) , PageResultList (..) , RequestType (..) + , DeletedTicket (..) , Ticket (..) , TicketId (..) , TicketURL (..) @@ -27,6 +28,7 @@ module DataSource.Types , UserEmail (..) , ZendeskResponse (..) , FromPageResultList (..) + , ExportFromTime (..) , parseComments , parseTicket , renderTicketStatus @@ -48,9 +50,10 @@ module DataSource.Types import Universum import Data.Aeson (FromJSON, ToJSON, Value (Object), object, parseJSON, toJSON, - withObject, (.:), (.=)) + withObject, (.:), (.:?), (.=)) import Data.Aeson.Types (Parser) import qualified Data.Text as T +import Data.Time.Clock.POSIX (POSIXTime) import LogAnalysis.Types (Knowledge) import Test.QuickCheck (Arbitrary (..), elements, listOf1, vectorOf) @@ -127,11 +130,13 @@ tokenPath = "./tmp-secrets/token" assignToPath :: FilePath assignToPath = "./tmp-secrets/assign_to" + -- | The Zendesk API interface that we want to expose. -- We don't want anything to leak out, so we expose only the most relevant information, -- anything relating to how it internaly works should NOT be exposed. data ZendeskLayer m = ZendeskLayer { zlGetTicketInfo :: TicketId -> m (Maybe TicketInfo) + , zlListDeletedTickets :: m [DeletedTicket] , zlListRequestedTickets :: UserId -> m [TicketInfo] , zlListAssignedTickets :: UserId -> m [TicketInfo] , zlListUnassignedTickets :: m [TicketInfo] @@ -139,6 +144,7 @@ data ZendeskLayer m = ZendeskLayer , zlGetTicketComments :: TicketId -> m [Comment] , zlGetAttachment :: Attachment -> m (Maybe AttachmentContent) , zlPostTicketComment :: ZendeskResponse -> m () + , zlExportTickets :: ExportFromTime -> m [TicketInfo] } -- | The IOLayer interface that we can expose. @@ -164,8 +170,12 @@ instance ToURL UserId where instance ToURL TicketId where toURL (TicketId ticketId) = show ticketId +instance ToURL ExportFromTime where + toURL (ExportFromTime time) = show . floor . toRational $ time + data ZendeskAPIUrl = AgentGroupURL + | DeletedTicketsURL | UserRequestedTicketsURL UserId | UserAssignedTicketsURL UserId | UserUnassignedTicketsURL @@ -173,10 +183,12 @@ data ZendeskAPIUrl | TicketAgentURL TicketId | UserInfoURL | TicketCommentsURL TicketId + | ExportDataByTimestamp ExportFromTime deriving (Eq, Generic) showURL :: ZendeskAPIUrl -> Text -showURL AgentGroupURL = "users.json?role%5B%5D=admin&role%5B%5D=agent" +showURL AgentGroupURL = "/users.json?role%5B%5D=admin&role%5B%5D=agent" +showURL DeletedTicketsURL = "/deleted_tickets.json" showURL (UserRequestedTicketsURL userId) = "/users/" <> toURL userId <> "/tickets/requested.json" showURL (UserAssignedTicketsURL userId) = "/users/" <> toURL userId <> "/tickets/assigned.json" showURL (UserUnassignedTicketsURL) = "/search.json?query=" <> urlEncode "type:ticket assignee:none" <> "&sort_by=created_at&sort_order=asc" @@ -184,6 +196,7 @@ showURL (TicketsURL ticketId) = "/tickets/" <> toURL ticketId <> " showURL (TicketAgentURL ticketId) = "https://iohk.zendesk.com/agent/tickets/" <> toURL ticketId showURL (UserInfoURL) = "/users/me.json" showURL (TicketCommentsURL ticketId) = "/tickets/" <> toURL ticketId <> "/comments.json" +showURL (ExportDataByTimestamp time) = "https://iohk.zendesk.com/api/v2/incremental/tickets.json?start_time=" <> toURL time -- | Plain @Text@ to @Text@ encoding. -- https://en.wikipedia.org/wiki/Percent-encoding @@ -360,11 +373,21 @@ data User = User , uEmail :: !UserEmail -- ^ Email of the user } deriving (Eq, Show, Generic) +-- Ideally, we might add more fields here, for now it's good enough. +data DeletedTicket = DeletedTicket + { dtId :: !TicketId + } deriving (Eq, Show, Generic) + data PageResultList a = PageResultList { prlResults :: ![a] , prlNextPage :: !(Maybe Text) + , prlCount :: !(Maybe Int) } +newtype ExportFromTime = ExportFromTime + { getExportFromTime :: POSIXTime + } deriving (Eq, Show, Ord, Generic) + ------------------------------------------------------------ -- Arbitrary instances ------------------------------------------------------------ @@ -463,6 +486,18 @@ instance Arbitrary User where , uName = userName , uEmail = userEmail } + +instance Arbitrary POSIXTime where + arbitrary = do + randomInt <- arbitrary + pure . abs . fromInteger $ randomInt + +instance Arbitrary ExportFromTime where + arbitrary = ExportFromTime <$> arbitrary + +instance Arbitrary DeletedTicket where + arbitrary = DeletedTicket <$> arbitrary + ------------------------------------------------------------ -- FromJSON instances ------------------------------------------------------------ @@ -501,33 +536,58 @@ instance FromJSON Comment where class FromPageResultList a where fromPageResult :: Value -> Parser (PageResultList a) +-- TODO(ks): Okey, we should probably move this into separate types... +-- The confusing thing is - they _ARE_ the same type, I would say ZenDesk API +-- is not properly constructed. instance FromPageResultList TicketInfo where fromPageResult obj = asum [ ticketListParser obj , resultsTicketsParser obj + , exportTicketsParser obj ] where -- | The case when we have the simple parser from tickets. ticketListParser :: Value -> Parser (PageResultList TicketInfo) ticketListParser = withObject "ticketList" $ \o -> PageResultList - <$> o .: "tickets" - <*> o .: "next_page" + <$> o .: "tickets" + <*> o .: "next_page" + <*> o .:? "count" -- | The case when we get results back from a query using the -- search API. resultsTicketsParser :: Value -> Parser (PageResultList TicketInfo) resultsTicketsParser (Object o) = PageResultList - <$> o .: "results" - <*> o .: "next_page" + <$> o .: "results" + <*> o .: "next_page" + <*> o .:? "count" resultsTicketsParser _ = fail "Cannot parse PageResultList from search API." + -- | The case when we get results back from a query using the + -- export API. + exportTicketsParser :: Value -> Parser (PageResultList TicketInfo) + exportTicketsParser (Object o) = + PageResultList + <$> o .: "tickets" + <*> o .: "next_page" + <*> o .:? "count" + exportTicketsParser _ = fail "Cannot parse PageResultList from search API." + + instance FromPageResultList User where fromPageResult = withObject "userList" $ \o -> PageResultList - <$> o .: "users" - <*> o .: "next_page" + <$> o .: "users" + <*> o .: "next_page" + <*> o .:? "count" + +instance FromPageResultList DeletedTicket where + fromPageResult = withObject "deleted_tickets" $ \o -> + PageResultList + <$> o .: "deleted_tickets" + <*> o .: "next_page" + <*> o .:? "count" instance (FromPageResultList a) => FromJSON (PageResultList a) where parseJSON = fromPageResult @@ -564,6 +624,10 @@ instance FromJSON User where , uEmail = userEmail } +instance FromJSON DeletedTicket where + parseJSON (Object o) = DeletedTicket <$> o .: "id" + parseJSON _ = fail "Cannot parse deleted ticket API." + ------------------------------------------------------------ -- ToJSON instances ------------------------------------------------------------ @@ -633,3 +697,4 @@ renderTicketStatus AnalyzedByScriptV1_0 = "analyzed-by-script-v1.0" renderTicketStatus AnalyzedByScriptV1_1 = "analyzed-by-script-v1.1" renderTicketStatus NoKnownIssue = "no-known-issues" renderTicketStatus NoLogAttached = "no-log-files" + diff --git a/src/Lib.hs b/src/Lib.hs index 277a36a..a669be6 100644 --- a/src/Lib.hs +++ b/src/Lib.hs @@ -16,16 +16,20 @@ module Lib import Universum import Data.Attoparsec.Text.Lazy (eitherResult, parse) +import Data.List (nub) import Data.Text (isInfixOf, stripEnd) +import Data.Time +import Data.Time.Clock.POSIX import CLI (CLI (..), getCliArgs) import DataSource (App, Attachment (..), AttachmentContent (..), Comment (..), - CommentBody (..), Config (..), IOLayer (..), - TicketId (..), TicketInfo (..), TicketStatus (..), TicketTag (..), - TicketTags (..), User (..), UserId (..), ZendeskLayer (..), - ZendeskResponse (..), asksIOLayer, asksZendeskLayer, assignToPath, - defaultConfig, knowledgebasePath, renderTicketStatus, - runApp, tokenPath) + CommentBody (..), Config (..), DeletedTicket (..), ExportFromTime (..), + IOLayer (..), TicketId (..), TicketInfo (..), TicketStatus (..), + TicketTag (..), TicketTags (..), User (..), UserId (..), + ZendeskLayer (..), ZendeskResponse (..), asksIOLayer, asksZendeskLayer, + assignToPath, defaultConfig, deleteAllData, insertCommentAttachments, + insertTicketComments, insertTicketInfo, knowledgebasePath, + renderTicketStatus, runApp, tokenPath) import LogAnalysis.Classifier (extractErrorCodes, extractIssuesFromLogs, prettyFormatAnalysis, prettyFormatLogReadError, prettyFormatNoIssues, prettyFormatNoLogs) @@ -63,6 +67,73 @@ runZendeskMain = do (ProcessTicket ticketId) -> void $ runApp (processTicket (TicketId ticketId)) cfg ProcessTickets -> void $ runApp processTickets cfg ShowStatistics -> runApp showStatistics cfg + ExportData -> runApp exportZendeskDataToLocalDB cfg + +-- | The function for exporting Zendesk data to our local DB so +-- we can have faster analysis and runs. +-- Test scenarios: +-- - Zendesk returns duplicate issues (yeah, that happens) +-- - Check termination on count <= 1000 +exportZendeskDataToLocalDB :: App () +exportZendeskDataToLocalDB = do + + let day = fromMaybe (error "Invalid date!") $ fromGregorianValid 2017 6 28 + let utctime = UTCTime day 0 + let posixTime = utcTimeToPOSIXSeconds utctime + + let exportFromTime :: ExportFromTime + exportFromTime = ExportFromTime posixTime + + getTicketComments <- asksZendeskLayer zlGetTicketComments + listDeletedTickets <- asksZendeskLayer zlListDeletedTickets + + exportTickets <- asksZendeskLayer zlExportTickets + -- Yeah, not sure why this happens. Very weird. + exportedTickets <- nub <$> exportTickets exportFromTime + + -- Yeah, there is a strange behaviour when we export tickets, + -- we might get deleted tickets as well. + deletedTickets <- listDeletedTickets + + -- Yep, some more strange behaviour. Not deleted, just not found. + -- TODO(ks): We can remove this once we have exception handling. + let deletedAndMissingTickets = + deletedTickets <> + [ DeletedTicket { dtId = TicketId 18317 } + , DeletedTicket { dtId = TicketId 18316 } + , DeletedTicket { dtId = TicketId 18263 } + , DeletedTicket { dtId = TicketId 18258 } + , DeletedTicket { dtId = TicketId 18256 } + ] + + let notDeletedExportedTickets = + sort $ filter (not . isDeletedTicket deletedAndMissingTickets) exportedTickets + + -- first, let's delete any data that's left here + deleteAllData + + void $ forM notDeletedExportedTickets $ \exportedTicket -> do + let ticketId = tiId exportedTicket + + -- First fetch comments, FAIL-FAST + ticketComments <- getTicketComments ticketId + insertTicketInfo exportedTicket + + -- For all the ticket comments + forM ticketComments $ \ticketComment -> do + insertTicketComments ticketId ticketComment + + -- For all the ticket comment attachments + forM (cAttachments ticketComment) $ \ticketAttachment -> + insertCommentAttachments ticketComment ticketAttachment + + -- TODO(ks): We currently don't download the attachment since that + -- would take a ton of time and would require probably up to 100Gb of space! + where + -- Filter for deleted tickets. + isDeletedTicket :: [DeletedTicket] -> TicketInfo -> Bool + isDeletedTicket dTickets TicketInfo{..} = + tiId `elem` map dtId dTickets -- TODO(hs): Remove this function since it's not used collectEmails :: App () diff --git a/test/Spec.hs b/test/Spec.hs index 680c5e9..2419637 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -4,13 +4,14 @@ import Universum import Test.Hspec (Spec, describe, hspec, it, pending, shouldBe) import Test.Hspec.QuickCheck (modifyMaxSuccess) -import Test.QuickCheck (Gen, arbitrary, forAll, listOf, listOf1, property, elements) +import Test.QuickCheck (Gen, arbitrary, elements, forAll, listOf, listOf1, property) import Test.QuickCheck.Monadic (assert, monadicIO, pre, run) -import DataSource (App, Comment (..), Config (..), IOLayer (..), TicketId (..), - TicketInfo (..), TicketStatus (..), TicketTags (..), User, UserId (..), - ZendeskAPIUrl (..), ZendeskLayer (..), ZendeskResponse (..), - basicIOLayer, defaultConfig, emptyZendeskLayer, runApp, showURL) +import DataSource (App, Comment (..), Config (..), ExportFromTime (..), IOLayer (..), + TicketId (..), TicketInfo (..), TicketStatus (..), TicketTags (..), + User, UserId (..), ZendeskAPIUrl (..), ZendeskLayer (..), + ZendeskResponse (..), basicIOLayer, defaultConfig, emptyZendeskLayer, + runApp, showURL) import Lib (filterAnalyzedTickets, listAndSortTickets, processTicket) -- TODO(ks): What we are really missing is a realistic @Gen ZendeskLayer m@. @@ -260,6 +261,12 @@ validShowURLSpec = let typedURL = showURL $ TicketCommentsURL ticketId untypedURL = "/tickets/" <> show (getTicketId ticketId) <> "/comments.json" in typedURL == untypedURL + it "returns valid ExportDataByTimestamp" $ do + property $ \timestamp -> + let typedURL = showURL $ ExportDataByTimestamp timestamp + untypedURL = "/incremental/tickets.json?start_time=" <> (show . floor . toRational $ getExportFromTime timestamp) + in typedURL == untypedURL + filterAnalyzedTicketsSpec :: Spec filterAnalyzedTicketsSpec = From 146fc502b636cb9a447be00e49c4b0d0ea5a301b Mon Sep 17 00:00:00 2001 From: ksaric Date: Thu, 5 Jul 2018 09:56:42 +0200 Subject: [PATCH 2/2] [TSD-47] Fix review comments. --- src/DataSource/Http.hs | 6 +----- src/Lib.hs | 4 ++-- test/Spec.hs | 15 ++++++++------- 3 files changed, 11 insertions(+), 14 deletions(-) diff --git a/src/DataSource/Http.hs b/src/DataSource/Http.hs index 326c16f..b2adb23 100644 --- a/src/DataSource/Http.hs +++ b/src/DataSource/Http.hs @@ -40,7 +40,7 @@ defaultConfig = { cfgAgentId = 0 , cfgZendesk = "https://iohk.zendesk.com" , cfgToken = "" - , cfgEmail = "kristijan.saric@iohk.io" + , cfgEmail = "daedalus-bug-reports@iohk.io" , cfgAssignTo = 0 , cfgKnowledgebase = [] , cfgNumOfLogsToAnalyze = 5 @@ -173,9 +173,7 @@ getExportedTickets getExportedTickets time = do cfg <- ask let url = showURL $ ExportDataByTimestamp time - putTextLn $ "/api/v2" <> url let req = apiRequestAbsolute cfg url - putTextLn "OUT!" iterateExportedTicketsWithDelay req where @@ -200,8 +198,6 @@ getExportedTickets time = do (PageResultList page0 nextPage _) <- liftIO $ apiCall parseJSON req - putTextLn $ show page0 - putTextLn $ show nextPage case nextPage of Just nextUrl -> liftIO $ go page0 nextUrl Nothing -> pure page0 diff --git a/src/Lib.hs b/src/Lib.hs index 151e232..5e3e1ba 100644 --- a/src/Lib.hs +++ b/src/Lib.hs @@ -18,8 +18,8 @@ import Universum import Data.Attoparsec.Text.Lazy (eitherResult, parse) import Data.List (nub) import Data.Text (isInfixOf, stripEnd) -import Data.Time -import Data.Time.Clock.POSIX +import Data.Time (UTCTime (..), fromGregorianValid) +import Data.Time.Clock.POSIX (utcTimeToPOSIXSeconds) import CLI (CLI (..), getCliArgs) import DataSource (App, Attachment (..), AttachmentContent (..), Comment (..), diff --git a/test/Spec.hs b/test/Spec.hs index 6e9b510..221a0be 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -7,12 +7,12 @@ import Test.Hspec.QuickCheck (modifyMaxSuccess) import Test.QuickCheck (Gen, arbitrary, elements, forAll, listOf, listOf1, property) import Test.QuickCheck.Monadic (assert, monadicIO, pre, run) -import DataSource (App, Attachment (..), Comment (..), Config (..), IOLayer (..), - Ticket (..), TicketId (..), TicketInfo (..), TicketStatus (..), - TicketTags (..), User, UserId (..), ZendeskAPIUrl (..), - ZendeskLayer (..), ZendeskResponse (..), basicIOLayer, - createResponseTicket, defaultConfig, emptyZendeskLayer, runApp, - showURL) +import DataSource (App, Attachment (..), Comment (..), Config (..), ExportFromTime (..), + IOLayer (..), Ticket (..), TicketId (..), TicketInfo (..), + TicketStatus (..), TicketTags (..), User, UserId (..), + ZendeskAPIUrl (..), ZendeskLayer (..), ZendeskResponse (..), + basicIOLayer, createResponseTicket, defaultConfig, emptyZendeskLayer, + runApp, showURL) import Lib (filterAnalyzedTickets, listAndSortTickets, processTicket) import Statistics (filterTicketsByStatus, filterTicketsWithAttachments, @@ -317,7 +317,8 @@ validShowURLSpec = it "returns valid ExportDataByTimestamp" $ do property $ \timestamp -> let typedURL = showURL $ ExportDataByTimestamp timestamp - untypedURL = "/incremental/tickets.json?start_time=" <> (show . floor . toRational $ getExportFromTime timestamp) + untypedURL = "https://iohk.zendesk.com/api/v2/incremental/tickets.json?start_time=" + <> (show . floor . toRational $ getExportFromTime timestamp) in typedURL == untypedURL