## Working with making schemas from JSONs

In [1]:
import os
import sys

notebook_dir = os.getcwd()
project_root = os.path.abspath(os.path.join(notebook_dir, ".."))
print(project_root)
sys.path.insert(0, project_root)

from src.DataCreator.DataSets.DataSetGenStandard import DataSetGenStandard
from pyspark.sql import SparkSession
from pyspark.sql.types import *

spark = SparkSession.builder \
    .appName("Test Data Set Generation") \
    .enableHiveSupport() \
    .master("local[*]") \
    .getOrCreate()

c:\Users\dalej\Documents\_Coding\DragonRegen


In [2]:
from src.DataCreator.SchemaGenerators.SchemaSpark import SchemaSpark
from src.MetaFort.AILoggingTables import AILoggingTables

s_sql_col = SchemaSpark.generate_schema_sql(AILoggingTables.d_system_tables)
print(s_sql_col)

{'actions': '    [action_id] IntegerType() NOT NULL,\n    [name] StringType() NOT NULL,\n    [description] StringType() NULL,\n    [created_at] TimestampType() NOT NULL,\n    [updated_at] TimestampType() NOT NULL,\n    [status] StringType() NOT NULL,\n    [error_code] StringType() NULL,\n    [error_message] StringType() NULL,\n    [error_timestamp] TimestampType() NULL,\n    [metadata] StringType() NULL,\n    CONSTRAINT [PK_actions] PRIMARY KEY CLUSTERED ([action_id])', 'sub_actions': '    [sub_action_id] IntegerType() NOT NULL,\n    [action_id] IntegerType() NOT NULL,\n    [name] StringType() NOT NULL,\n    [description] StringType() NULL,\n    [sequence_number] IntegerType() NOT NULL,\n    [created_at] TimestampType() NOT NULL,\n    [updated_at] TimestampType() NOT NULL,\n    [status] StringType() NOT NULL,\n    [error_code] StringType() NULL,\n    [error_message] StringType() NULL,\n    [error_timestamp] TimestampType() NULL,\n    [metadata] StringType() NULL,\n    CONSTRAINT [PK_su

In [3]:
sturct_col = SchemaSpark.generate_schema(AILoggingTables.d_system_tables)
print(sturct_col)
print(sturct_col['metrics'])

{'metrics': StructType([StructField('metric_id', IntegerType(), False), StructField('action_id', IntegerType(), True), StructField('sub_action_id', IntegerType(), True), StructField('request_id', IntegerType(), True), StructField('metric_type', StringType(), False), StructField('metric_name', StringType(), False), StructField('value', FloatType(), False), StructField('unit', StringType(), True), StructField('timestamp', TimestampType(), False), StructField('dimensions', StringType(), True)])}
StructType([StructField('metric_id', IntegerType(), False), StructField('action_id', IntegerType(), True), StructField('sub_action_id', IntegerType(), True), StructField('request_id', IntegerType(), True), StructField('metric_type', StringType(), False), StructField('metric_name', StringType(), False), StructField('value', FloatType(), False), StructField('unit', StringType(), True), StructField('timestamp', TimestampType(), False), StructField('dimensions', StringType(), True)])


In [4]:
for i in sturct_col['metrics']:
    print(i)

StructField('metric_id', IntegerType(), False)
StructField('action_id', IntegerType(), True)
StructField('sub_action_id', IntegerType(), True)
StructField('request_id', IntegerType(), True)
StructField('metric_type', StringType(), False)
StructField('metric_name', StringType(), False)
StructField('value', FloatType(), False)
StructField('unit', StringType(), True)
StructField('timestamp', TimestampType(), False)
StructField('dimensions', StringType(), True)


In [5]:
df_test_metrics = spark.createDataFrame([],schema=sturct_col['metrics'])
df_test_metrics.show(truncate=False)
df_test_metrics.printSchema()

+---------+---------+-------------+----------+-----------+-----------+-----+----+---------+----------+
|metric_id|action_id|sub_action_id|request_id|metric_type|metric_name|value|unit|timestamp|dimensions|
+---------+---------+-------------+----------+-----------+-----------+-----+----+---------+----------+
+---------+---------+-------------+----------+-----------+-----------+-----+----+---------+----------+

root
 |-- metric_id: integer (nullable = false)
 |-- action_id: integer (nullable = true)
 |-- sub_action_id: integer (nullable = true)
 |-- request_id: integer (nullable = true)
 |-- metric_type: string (nullable = false)
 |-- metric_name: string (nullable = false)
 |-- value: float (nullable = false)
 |-- unit: string (nullable = true)
 |-- timestamp: timestamp (nullable = false)
 |-- dimensions: string (nullable = true)



# MS SQL 

In [6]:
import pyodbc
driver = "ODBC Driver 17 for SQL Server"
server = 'localhost\\SQLEXPRESS' 
# server = "Andrew=PC\\SQLEXPRESS"
database = "MetaFort"
conn_str = (
    f"DRIVER={driver};"
    f"SERVER={server};"
    f"DATABASE={database};"
    f"Trusted_Connection=yes;"
)
connection = pyodbc.connect(conn_str)
cursor = connection.cursor()
print(f"Connected to MS SQL Server database: {database}")

Connected to MS SQL Server database: MetaFort


In [7]:
from src.DataCreator.SchemaGenerators.SchemaMSSQL import SchemaMSSQL
from src.MetaFort.AILoggingTables import AILoggingTables

s_sql_col = SchemaMSSQL.generate_schema_sql(AILoggingTables.d_system_tables)
print(s_sql_col)


{'actions': '    [action_id] INT NOT NULL,\n    [name] NVARCHAR(255) NOT NULL,\n    [description] NVARCHAR(255) NULL,\n    [created_at] DATETIME2 NOT NULL,\n    [updated_at] DATETIME2 NOT NULL,\n    [status] NVARCHAR(255) NOT NULL,\n    [error_code] NVARCHAR(255) NULL,\n    [error_message] NVARCHAR(255) NULL,\n    [error_timestamp] DATETIME2 NULL,\n    [metadata] NVARCHAR(MAX) NULL,\n    CONSTRAINT [PK_actions] PRIMARY KEY CLUSTERED ([action_id])', 'sub_actions': '    [sub_action_id] INT NOT NULL,\n    [action_id] INT NOT NULL,\n    [name] NVARCHAR(255) NOT NULL,\n    [description] NVARCHAR(255) NULL,\n    [sequence_number] INT NOT NULL,\n    [created_at] DATETIME2 NOT NULL,\n    [updated_at] DATETIME2 NOT NULL,\n    [status] NVARCHAR(255) NOT NULL,\n    [error_code] NVARCHAR(255) NULL,\n    [error_message] NVARCHAR(255) NULL,\n    [error_timestamp] DATETIME2 NULL,\n    [metadata] NVARCHAR(MAX) NULL,\n    CONSTRAINT [PK_sub_actions] PRIMARY KEY CLUSTERED ([sub_action_id])', 'requests':

In [8]:
output = SchemaMSSQL.create_tables_from_dict(db_engine=cursor, d_tables=s_sql_col)


        if OBJECT_ID('dbo.actions', 'U') IS NULL
        CREATE TABLE dbo.actions (    [action_id] INT NOT NULL,
    [name] NVARCHAR(255) NOT NULL,
    [description] NVARCHAR(255) NULL,
    [created_at] DATETIME2 NOT NULL,
    [updated_at] DATETIME2 NOT NULL,
    [status] NVARCHAR(255) NOT NULL,
    [error_code] NVARCHAR(255) NULL,
    [error_message] NVARCHAR(255) NULL,
    [error_timestamp] DATETIME2 NULL,
    [metadata] NVARCHAR(MAX) NULL,
    CONSTRAINT [PK_actions] PRIMARY KEY CLUSTERED ([action_id]))
        ;

        if OBJECT_ID('dbo.sub_actions', 'U') IS NULL
        CREATE TABLE dbo.sub_actions (    [sub_action_id] INT NOT NULL,
    [action_id] INT NOT NULL,
    [name] NVARCHAR(255) NOT NULL,
    [description] NVARCHAR(255) NULL,
    [sequence_number] INT NOT NULL,
    [created_at] DATETIME2 NOT NULL,
    [updated_at] DATETIME2 NOT NULL,
    [status] NVARCHAR(255) NOT NULL,
    [error_code] NVARCHAR(255) NULL,
    [error_message] NVARCHAR(255) NULL,
    [error_timestamp] DA

### Close Connection when done

In [9]:
cursor.close()
connection.close()