-
Notifications
You must be signed in to change notification settings - Fork 1
/
OutputToTGF.sql
158 lines (124 loc) · 5.86 KB
/
OutputToTGF.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
USE GraphDemo
GO
--parameters values for this database: https://docs.microsoft.com/en-us/sql/relational-databases/graphs/sql-graph-sample?view=sql-server-ver15
--list of nodes in format schema.nodetable.nameForLabel;schema.nodetable.nameForLabel;
--done this way because it is a lot easier to manually edit
DECLARE @NodeList nvarchar(4000) = 'dbo.person.name;dbo.Restaurant.name;dbo.City.name'
DECLARE @NodeList nvarchar(4000) = 'dbo.person.name;dbo.city.name'
--list of edges in format schema.edgeTable
DECLARE @EdgeList nvarchar(4000) = 'dbo.friendOf;dbo.likes;dbo.livesIn;dbo.locatedIn;'
--used to determine formatting of name in output
DECLARE @DefaultNodeType nvarchar(100) = '?' --I want to output them all
DECLARE @DefaultEdgeType nvarchar(100) = '?'
DECLARE @LabelNonDefaultEdgeFlag bit = 1
--if node or edge type doesn't match this value exactly, the node will be named NameForLabel (NodeType) and the edge will
--not have a label if it matches
DECLARE @NodeTableList table (SchemaName sysname, TableName sysname, NodeNameColumn sysname PRIMARY KEY (SchemaName, TableName))
DECLARE @EdgeTableList table (SchemaName sysname, TableName sysname, EdgeNameColumn sysname NULL PRIMARY KEY (SchemaName, TableName))
SET NOCOUNT ON;
DECLARE @crlf nvarchar(2) = CHAR(13) + CHAR(10)
--parse the two strings and put into tables
INSERT INTO @NodeTableList(SchemaName, TableName, NodeNameColumn)
SELECT PARSENAME(value,3), PARSENAME(value,2), PARSENAME(value,1)
FROM STRING_SPLIT(@NodeList,';')
WHERE PARSENAME(value,1) IS NOT NULL
INSERT INTO @EdgeTableList(SchemaName, TableName)
SELECT PARSENAME(value,2), PARSENAME(value,1)
FROM STRING_SPLIT(@EdgeList,';')
WHERE PARSENAME(value,1) IS NOT NULL
--create table to hold the nodes and edges. Nodes and edges each have their own id
--sequence, but we need them to be unique. Hence I added an identity column to node
DROP TABLE IF EXISTS #NodeOutput, #EdgeOutput
CREATE TABLE #NodeOutput
(
NodeOutputId int IDENTITY PRIMARY KEY,
NodeSchema sysname,
NodeTable sysname,
NodeId int,
NodeName varchar(100),
UNIQUE (NodeSchema, NodeTable, NodeId)
)
CREATE TABLE #EdgeOutput
(
EdgeSchema varchar(1000),
EdgeTable varchar(1000),
FromNodeOutputId int NULL,
ToNodeOutputId int NULL,
EdgeName varchar(100)
)
DECLARE @NodeCursor CURSOR,
@EdgeCursor CURSOR,
@NodeName sysname,
@EdgeName sysname,
@SchemaName sysname,
@NodeNameColumn sysname,
@SQLQuery nvarchar(MAX)
--cursoring over the different nodes and adding them with dynamic SQL
SET @NodeCursor = CURSOR FOR (SELECT SchemaName,TableName,NodeNameColumn FROM @NodeTableList)
OPEN @NodeCursor
WHILE 1=1
BEGIN
FETCH NEXT FROM @NodeCursor INTO @SchemaName, @NodeName,@NodeNameColumn
IF @@FETCH_STATUS <> 0
BREAK
--fetching the id from the JSON for the pseudocolumn $node_id
SELECT @SQLQuery = 'INSERT INTO #NodeOutput (NodeSchema, NodeTable, NodeId, NodeName)' + @crlf +
'SELECT ''' + REPLACE(@SchemaName,'''','''''') + ''', '''+ REPLACE(@NodeName,'''','''''') + ''', JSON_VALUE(CAST($node_id AS nvarchar(1000)),''$.id''), '
+ QUOTENAME(@NodeNameColumn) + ' + ' + CASE WHEN @DefaultNodeType = @NodeName OR @LabelNonDefaultEdgeFlag = 0 THEN '''''' ELSE ''' (' + REPLACE(@NodeName,'''','''''') + ')''' END +
' FROM ' + QUOTENAME(@SchemaName) + '.' + QUOTENAME(@NodeName)
EXEC (@SQLQuery)
END;
SET @EdgeCursor = CURSOR FOR (SELECT SchemaName, TableName FROM @EdgeTableList)
OPEN @EdgeCursor
WHILE 1=1
BEGIN
FETCH NEXT FROM @edgeCursor INTO @SchemaName, @EdgeName
IF @@FETCH_STATUS <> 0
BREAK
--fetching the id from the JSON for the pseudocolumn $from and $to_id and using those values
--to join to the #Node table I created to get the surrogate key for the output
--
SELECT @SQLQuery = 'WITH Parts AS (
SELECT JSON_VALUE(CAST($from_id AS nvarchar(1000)),''$.schema'') AS FromNodeSchema,
JSON_VALUE(CAST($from_id AS nvarchar(1000)),''$.table'') AS FromNodeTable,
JSON_VALUE(CAST($from_id AS nvarchar(1000)),''$.id'') AS FromNodeId,
JSON_VALUE(CAST($To_id AS nvarchar(1000)),''$.schema'') AS ToNodeSchema,
JSON_VALUE(CAST($to_id AS nvarchar(1000)),''$.table'') AS ToNodeTable,
JSON_VALUE(CAST($to_id AS nvarchar(1000)),''$.id'') AS ToNodeId,
CASE WHEN ''' + REPLACE(@EdgeName,'''','''''') +''' <> ''' + REPLACE(@DefaultEdgeType,'''','''''') + ''' THEN ''' + REPLACE(@EdgeName,'''','''''') + ''' ELSE '''' END AS EdgeName
FROM ' + QUOTENAME(@SchemaName) + '.' + QUOTENAME(@EdgeName) + '
)
INSERT INTO #EdgeOutput(EdgeSchema, EdgeTable, FromNodeOutputId, ToNodeOutputId, EdgeName)
SELECT ''' + REPLACE(@SchemaName,'''','''''') + ''' AS EdgeSchema,
''' + REPLACE(@EdgeName,'''','''''') + ''' as EdgeName,
FromNodeOutput.NodeOutputId AS FromNodeOutputId,
ToNodeOutput.NodeOutputId AS ToNodeOutputId,
EdgeName
FROM Parts
JOIN #NodeOutput AS FromNodeOutput
ON FromNodeOutput.NodeSchema = Parts.FromNodeSchema
AND FromNodeOutput.NodeTable = Parts.FromNodeTable
AND FromNodeOutput.NodeId = Parts.FromNodeId
JOIN #NodeOutput AS ToNodeOutput
ON ToNodeOutput.NodeSchema = Parts.ToNodeSchema
AND ToNodeOutput.NodeTable = Parts.ToNodeTable
AND ToNodeOutput.NodeId = Parts.ToNodeId'
EXEC (@SQLQuery)
END;
GO
--after this, build the output into a temp table for ordering purposes
DECLARE @Output table (Ordering int IDENTITY, outputValue nvarchar(1000))
--get the nodes
INSERT INTO @Output(outputValue)
SELECT CONCAT(#NodeOutput.NodeOutputId, ' ', NodeName) FROM #NodeOutput
--add the separator
INSERT INTO @Output(outputValue)
SELECT '#'
--get the edges and their names
INSERT INTO @Output(outputValue)
SELECT CONCAT(#EdgeOutput.FromNodeOutputId, ' ', #EdgeOutput.ToNodeOutputId, ' ',#EdgeOutput.EdgeName) FROM #EdgeOutput
--return the output value, which I am pasting into a file for simplicity sake. Could easily be automated, but this was
--easy enough
SELECT [@Output].outputValue
FROM @Output
ORDER BY [@Output].Ordering