------------

In [2]:
{-# LANGUAGE OverloadedStrings #-}

import Data.Aeson
import qualified Data.ByteString.Lazy as B
import Data.Text.Prettyprint.Doc

-- Define a data type to represent the structure of the JSON
data Location = Location
  { database:: String
  , dbdesc:: String
  , host:: String
  , loc_name :: String
  , loc_class :: String
  , loc_directory :: String
  , loc_remote_node :: String
  , loc_remote_login :: String
  , loc_remote_pwd :: String
  , loc_remote_port :: Int
  , loc_db_name :: String
  , loc_db_user :: String
  , loc_description :: String
  } deriving (Show)

instance Pretty Location where
  pretty loc =
    vsep
      [ "Location {"
      , indent 2 ("database = " <> pretty (database loc))
      , indent 2 ("dbdesc = " <> pretty (dbdesc loc))
      , indent 2 ("host = " <> pretty (host loc))   
      , indent 2 ("loc_name = " <> pretty (loc_name loc))
      , indent 2 ("loc_class = " <> pretty (loc_class loc))
      , indent 2 ("loc_directory = " <> pretty (loc_directory loc))
      , indent 2 ("loc_remote_node = " <> pretty (loc_remote_node loc))
      , indent 2 ("loc_remote_login = " <> pretty (loc_remote_login loc))
      , indent 2 ("loc_remote_pwd = " <> pretty (loc_remote_pwd loc))
      , indent 2 ("loc_remote_port = " <> pretty (loc_remote_port loc))
      , indent 2 ("loc_db_name = " <> pretty (loc_db_name loc))
      , indent 2 ("loc_db_user = " <> pretty (loc_db_user loc))
      , indent 2 ("loc_description = " <> pretty (loc_description loc))
      , "}"
      ]

-- Define an instance to decode JSON into the Location data type
instance FromJSON Location where
  parseJSON = withObject "Location" $ \v -> Location
  
    <$> v .: "database"
    <*> v .: "dbdesc"
    <*> v .: "host"
    <*> v .: "loc_name"
    <*> v .: "loc_class"
    <*> v .: "loc_directory"
    <*> v .: "loc_remote_node"
    <*> v .: "loc_remote_login"
    <*> v .: "loc_remote_pwd"
    <*> v .: "loc_remote_port"
    <*> v .: "loc_db_name"
    <*> v .: "loc_db_user"
    <*> v .: "loc_description"

-- Read JSON data from the file
jsonData <- B.readFile "data2.json"
  
-- Parse JSON using eitherDecode function
let parsedData = eitherDecode jsonData :: Either String [Location]


In [3]:
data Location' = Location'
  { database':: String
  , dbdesc':: String
  , host':: String
  , loc_name' :: String
  , loc_class' :: String
  , loc_directory'::String
  , loc_remote_node' :: String  -- agent used 
  , loc_remote_login' :: String -- user agent 
  , loc_remote_port' :: Int     -- agent port
  , loc_db_user' :: String
  , loc_description' :: String
  , db_node_name :: String      -- infra location where data is stored
  , db_instance :: String  -- logical locaton
  } deriving (Show)
  
instance Pretty Location' where
  pretty loc =
    vsep
      [ "Location {"
      , indent 2 ("database         = " <> pretty (database' loc))
      , indent 2 ("dbdesc           = " <> pretty (dbdesc' loc))
      , indent 2 ("host             = " <> pretty (host' loc))   
      , indent 2 ("loc_class        = " <> pretty (loc_class' loc))
      , indent 3 ("loc_name         = " <> pretty (loc_name' loc))
      , indent 3 ("loc_directory    = " <> pretty (loc_directory' loc))     
      , indent 3 ("loc_remote_node  = " <> pretty (loc_remote_node' loc))
      , indent 3 ("loc_remote_login = " <> pretty (loc_remote_login' loc))
      , indent 3 ("loc_remote_port  = " <> pretty (loc_remote_port' loc))
      , indent 3 ("loc_db_user      = " <> pretty (loc_db_user' loc))
      , indent 3 ("loc_description  = " <> pretty (loc_description' loc))
      , indent 3 ("db_node_name     = " <> pretty (db_node_name loc))
      , indent 3 ("db_instance      = " <> pretty (db_instance loc))
      , "}"
      ]  

In [4]:
loc2Location :: Location -> String -> [String] -> Location'
loc2Location x classSuffix [db_user,node_name,db_instance] = 
                              Location' {  database'= database x
                                         , dbdesc'= dbdesc x
                                         , host'=  host x
                                         , loc_name'  = loc_name x 
                                         , loc_class' = loc_class x ++ classSuffix
                                         , loc_directory' = loc_directory x
                                         , loc_remote_node'  = loc_remote_node x
                                         , loc_remote_login' = loc_remote_login x
                                         , loc_remote_port'  = loc_remote_port x
                                         , loc_db_user'      = db_user                  ---- is it agent user or db user?
                                         , loc_description'  = loc_description x
                                         , db_node_name      = node_name
                                         , db_instance       = db_instance
                                         }
                                         

In [5]:
import Text.Read (readMaybe) 
import Data.Maybe (fromMaybe )
import Data.List.Split (splitOn)
import Data.List (intercalate)

parseRedshift :: Location  ->[String]
parseRedshift l  = getUSer l : (concat . filterCols . fromMaybe []. parseJson .  getJsonStr)  l  
   where
       getJsonStr  = drop 1 . dropWhile (/= '=') . loc_db_name  -- get loc_db_name and return string after first '='
       parseJson s = readMaybe s :: Maybe [[String]]            -- parse to list
       filterCols  = map tail .  filter (\[k, _] -> k `elem` ["db_node", "db_name"])
       getUSer = head . splitOn "/" . loc_db_user 
              
parseLoc :: Location  -> Location'
parseLoc l
    | loc_class l == "redshift"   = loc2Location l ""  $ parseRedshift l
    | loc_class l == "file" && ((head . loc_directory)   l  == '/'    )  = loc2Location l "_locdir"  [loc_remote_login  l ,loc_remote_node l, loc_directory l]
    | loc_class l == "file" && ((take 4 . loc_directory) l  == "s3s:" )  = loc2Location l "_s3s" ["###",getBacket l ,getPrefix l]
    | loc_class l == "file" && ((take 5 . loc_directory) l  == "sftp:")  = loc2Location l "_sftp" ["###",getBacket l ,getsftpFolder l]
    | loc_class l `elem` ["mysql","greenplum","postgresql"]              = loc2Location l ""   [ getUserMysql l , getNodeMysql l, getInstMysql l]
    | loc_class l == "sqlserver"    = loc2Location l ""  [getUserMsSql l, getNodeMsSql l,getInstMsSql l ]   
    | loc_class l == "teradata"     = loc2Location l ""   [getUserTd l, loc_db_name l,loc_db_name l ] 
    | loc_class l == "salesforce"   = loc2Location l ""   [getUserSf l, getNodeSf l,getInstSf l ] 
    | otherwise   = loc2Location l ""  ["###","###","###"]   
    where 
    getBacket = head . splitOn "/" . last. splitOn "@" .loc_directory
    getPrefix =  intercalate  "/" .tail . splitOn "/" . last. splitOn "@" .loc_directory
    getsftpNode   = getBacket
    getsftpFolder =  getPrefix
    --
    getNodeMysql  =  head . splitOn "~" . loc_db_name
    getInstMysql  =  last . splitOn "~" . loc_db_name
    getUserMysql  =  head . splitOn "/" . loc_db_user
    --
    getNodeMsSql  =  head . splitOn "\\" . loc_db_name
    getInstMsSql  =  last . splitOn "\\" . loc_db_name
    getUserMsSql  =  getUserMysql
    --
    getUserTd     =  getUserMysql
    --
    getNodeSf     = last .splitOn "//" . loc_directory
    getInstSf     = last .splitOn "@" . head . splitOn "/" . loc_db_user
    getUserSf     = head .splitOn "@" . head . splitOn "/" . loc_db_user

In [26]:
import Data.Aeson
import qualified Data.Vector as V
import Data.List (intercalate)
import Data.Text (Text, unpack)
import Data.List.Split (splitOn)
import qualified Data.ByteString.Lazy.Char8 as LBS

valueFromString :: LBS.ByteString -> Maybe Value
valueFromString  s = decode  s :: Maybe Value

unjust :: Maybe a -> a
unjust (Just a) = a

strfromString :: Value -> String
strfromString (String x) = unpack  x

unArray :: Value -> [Value]
unArray (Array x )  = V.toList x

unString :: [Value] -> [String]
unString  [String x , String y] = [unpack  x, unpack y]
unString  [String x , Array y]  = [unpack  x , listFromArray (Array y) ]

listFromArray    = intercalate ";" . map strfromString . unArray

getKafkaString   = LBS.pack . last . splitOn "="

parseKafkaString = map(unString . unArray) . unArray . unjust . valueFromString 

getval :: String -> [[String]] -> String
getval key records =
    case filter (\[k, _] -> k == key) records of
        [] -> ""  -- Return an empty string when no match is found
        [[_, value]] -> value
        _ -> error "Multiple matches found"  -- Handle this case as needed

getvals :: [String] -> [[String]] -> [String]
getvals keys lst  = map (\key-> getval key lst) keys

parseKafka = getvals ["urls","ssl_key"] . parseKafkaString . getKafkaString


let kafkaStr = "JSON=[[\"urls\",[\"b-1.us-dev-msk-cluster.x8853p.c1.kafka.us-east-1.amazonaws.com:9094\",\"b-2.us-dev-msk-cluster.x8853p.c1.kafka.us-east-1.amazonaws.com:9094\",\"b-3.us-dev-msk-cluster.x8853p.c1.kafka.us-east-1.amazonaws.com:9094\"]],[\"broker_ca\",\"/data/kafka_certs/MSKPOCKey.pem\"],[\"ssl_cert\",\"/data/kafka_devcerts/kafka/signed-certificate-from-acm\"],[\"ssl_key\",\"/data/kafka_devcerts/kafka/key.pem\"],[\"ssl_key_pwd\",\"!{Itbo2aix.D3x7/Yp}!\"]]"


parseKafka kafkaStr

["b-1.us-dev-msk-cluster.x8853p.c1.kafka.us-east-1.amazonaws.com:9094;b-2.us-dev-msk-cluster.x8853p.c1.kafka.us-east-1.amazonaws.com:9094;b-3.us-dev-msk-cluster.x8853p.c1.kafka.us-east-1.amazonaws.com:9094","/data/kafka_devcerts/kafka/key.pem"]

------------

In [14]:
(last . splitOn "=" . loc_db_name )  $ lll!!1
(last . splitOn "=" . loc_db_name )  $ ll!!1

: 

In [3]:
-------------

In [13]:
import Data.Either
l = fromRight [] parsedData
length l

2596

In [34]:
(map parseKafka  . map loc_db_name) ll

[["b-1.odp-us-prod-daas-msk.2mp3k5.c7.kafka.us-east-1.amazonaws.com:9094;b-3.odp-us-prod-daas-msk.2mp3k5.c7.kafka.us-east-1.amazonaws.com:9094;b-2.odp-us-prod-daas-msk.2mp3k5.c7.kafka.us-east-1.amazonaws.com:9094","/data/kafka_prod/kafka_prod/key.pem"],["b-1.us-dev-msk-cluster.x8853p.c1.kafka.us-east-1.amazonaws.com:9094;b-2.us-dev-msk-cluster.x8853p.c1.kafka.us-east-1.amazonaws.com:9094;b-3.us-dev-msk-cluster.x8853p.c1.kafka.us-east-1.amazonaws.com:9094","/home/hvr2/hvr_home/lib/cert/key.pem"],["b-1.us-dev-msk-cluster.x8853p.c1.kafka.us-east-1.amazonaws.com:9094;b-2.us-dev-msk-cluster.x8853p.c1.kafka.us-east-1.amazonaws.com:9094;b-3.us-dev-msk-cluster.x8853p.c1.kafka.us-east-1.amazonaws.com:9094","/data/kafka_devcerts/kafka/key.pem"],["b-1.odp-us-innovation-daas.kifwzy.c8.kafka.us-east-1.amazonaws.com:9094;b-2.odp-us-innovation-daas.kifwzy.c8.kafka.us-east-1.amazonaws.com:9094;b-3.odp-us-innovation-daas.kifwzy.c8.kafka.us-east-1.amazonaws.com:9094","/data/kafka_devcerts/kafka/key.pem"

In [14]:
-- helper function to get 
ll =  filter (\x ->  loc_class x  == "kafka" ) l
lll =  filter (\x ->  loc_class x  == "kafka" ) l
map pretty    ll

[Location {
  database = ushvr04
  dbdesc = US hvr prod
  host = odp-us-prod-hvr-metadata-db.cjyhx859wwhg.us-east-1.rds.amazonaws.com
  loc_name = kafpr
  loc_class = kafka
  loc_directory = 
  loc_remote_node = 10.242.109.196
  loc_remote_login = hvr
  loc_remote_pwd = !{.wruWtPV}!
  loc_remote_port = 4343
  loc_db_name = JSON=[["urls",["b-1.odp-us-prod-daas-msk.2mp3k5.c7.kafka.us-east-1.amazonaws.com:9094","b-3.odp-us-prod-daas-msk.2mp3k5.c7.kafka.us-east-1.amazonaws.com:9094","b-2.odp-us-prod-daas-msk.2mp3k5.c7.kafka.us-east-1.amazonaws.com:9094"]],["broker_ca","/data/kafka_prod/MSKPOCKey.pem"],["ssl_cert","/data/kafka_prod/kafka_prod/signed-certificate-from-acm"],["ssl_key","/data/kafka_prod/kafka_prod/key.pem"],["ssl_key_pwd","!{KQzkyRXnkSraWtnX}!"]]
  loc_db_user = 
  loc_description = kafka prod
},Location {
  database = ushvr04
  dbdesc = US hvr prod
  host = odp-us-prod-hvr-metadata-db.cjyhx859wwhg.us-east-1.rds.amazonaws.com
  loc_name = kafk
  loc_class = kafka
  loc_directo

In [9]:
--pretty $ l!!6
--loc2Location (l!!6) "_RED" ["db_user","node_name","node_instance"]
--loc2Location (l!!6) "_RED"  $ parseRedshift (l!!6)

In [10]:
--l6 =  l!!6
--pretty l6
--l6' = loc2Location l6 "_RED" ["db_user","node_name","node_instance"]
--pretty l6'

In [10]:
l' = map  parseLoc l

In [19]:
 map pretty (filter (\x ->  loc_class' x == "redshift" ) l' )

[Location {
  loc_class         = redshift
   loc_name         = dsred
   loc_directory    = 
   loc_remote_node  = 10.242.112.153
   loc_remote_login = hvr2
   loc_remote_port  = 9090
   loc_db_user      = 502830612
   loc_description  = Redshift data sharing cluster
   db_node_name     = redshift-data-shared-poc.c8ziwm1qxh67.us-east-1.redshift.amazonaws.com
   db_instance      = redshiftpocdb
},Location {
  loc_class         = redshift
   loc_name         = redst
   loc_directory    = 
   loc_remote_node  = 10.242.112.153
   loc_remote_login = gehc_hvr
   loc_remote_port  = 4343
   loc_db_user      = redshiftuser
   loc_description  = redshift development
   db_node_name     = us-innovation-redshift.c8ziwm1qxh67.us-east-1.redshift.amazonaws.com
   db_instance      = redshift_hvr
},Location {
  loc_class         = redshift
   loc_name         = rdite
   loc_directory    = 
   loc_remote_node  = 10.242.112.153
   loc_remote_login = hvr2
   loc_remote_port  = 9090
   loc_db_user      = 