In [1]:
#import needed libraries
from sqlalchemy import create_engine
import pandas as pd
import os
from dotenv import load_dotenv
import urllib
import pyodbc

In [2]:
#Connecting to MS SQL Server database
load_dotenv()

#get parameters from environmnet var
pwd = os.getenv('mssql_pwd')
uid = os.getenv('mssql_uid')
#sql db details
driver = os.getenv('mssql_driver')
server = os.getenv('mssql_server')
database = os.getenv("mssql_database")

params = urllib.parse.quote_plus(f"DRIVER={driver};"
                            f"SERVER={server};"
                            f"DATABASE={database};"
                            "Trusted_Connection=yes;"
                            "TrustServerCertificate=yes")

mssql_engine = create_engine(f"mssql+pyodbc:///?odbc_connect={params}")

In [3]:
#Extracting table names to be loaded into postgres database

table_names_df = pd.read_sql_query(""" SELECT  '[' + table_catalog + '].[' + TABLE_SCHEMA + '].[' + table_name + ']' as table_name FROM INFORMATION_SCHEMA.TABLES 
                                        WHERE TABLE_NAME LIKE '%%' """, mssql_engine)

list_of_tables = list(table_names_df['table_name'])
list_of_tables

['[AdventureWorks2019].[HumanResources].[EmployeePayHistory]',
 '[AdventureWorks2019].[Sales].[SalesOrderHeaderSalesReason]',
 '[AdventureWorks2019].[Sales].[SalesPerson]',
 '[AdventureWorks2019].[Production].[Illustration]',
 '[AdventureWorks2019].[HumanResources].[JobCandidate]',
 '[AdventureWorks2019].[Production].[Location]',
 '[AdventureWorks2019].[Person].[Password]',
 '[AdventureWorks2019].[Sales].[SalesPersonQuotaHistory]',
 '[AdventureWorks2019].[Person].[Person]',
 '[AdventureWorks2019].[Sales].[SalesReason]',
 '[AdventureWorks2019].[Sales].[SalesTaxRate]',
 '[AdventureWorks2019].[Sales].[PersonCreditCard]',
 '[AdventureWorks2019].[Person].[vAdditionalContactInfo]',
 '[AdventureWorks2019].[Person].[PersonPhone]',
 '[AdventureWorks2019].[HumanResources].[vEmployee]',
 '[AdventureWorks2019].[Sales].[SalesTerritory]',
 '[AdventureWorks2019].[HumanResources].[vEmployeeDepartment]',
 '[AdventureWorks2019].[Person].[PhoneNumberType]',
 '[AdventureWorks2019].[HumanResources].[vEmplo

In [4]:
len(list_of_tables)     #No. of tables 

91

In [5]:
#Establishing connection to postgres database
pg_driver = os.getenv('postgre_driver') 
pg_database = os.getenv('postgre_database') 
pg_server = os.getenv('postgre_server') 
pg_port = os.getenv('postgre_port') 
pg_uid = os.getenv('postgre_uid') 
pg_pwd = os.getenv('postgre_pwd') 

pg_engine = create_engine(f'postgresql://{pg_uid}:{pg_pwd}@{pg_server}:{pg_port}/{pg_database}')


In [6]:
#Loading tables to postgres
errors = []

for tbl in list_of_tables:
    try:
        rows_imported = 0
        #query and load save data to dataframe
        df = pd.read_sql_query(f'select * FROM {tbl}', mssql_engine)  
        df.to_sql(f'{tbl[21:]}', pg_engine, if_exists='replace', index=False)
        print(f'importing rows {rows_imported} to {rows_imported + len(df)}... for table {tbl}')
        rows_imported += len(df)
        print("Data imported successfully!")
    except Exception as e:
        error =  str(e) + ' ' + str(tbl)
        errors.append(error)

importing rows 0 to 316... for table [AdventureWorks2019].[HumanResources].[EmployeePayHistory]
Data imported successfully!
importing rows 0 to 27647... for table [AdventureWorks2019].[Sales].[SalesOrderHeaderSalesReason]
Data imported successfully!
importing rows 0 to 17... for table [AdventureWorks2019].[Sales].[SalesPerson]
Data imported successfully!
importing rows 0 to 5... for table [AdventureWorks2019].[Production].[Illustration]
Data imported successfully!
importing rows 0 to 13... for table [AdventureWorks2019].[HumanResources].[JobCandidate]
Data imported successfully!
importing rows 0 to 14... for table [AdventureWorks2019].[Production].[Location]
Data imported successfully!
importing rows 0 to 19972... for table [AdventureWorks2019].[Person].[Password]
Data imported successfully!
importing rows 0 to 163... for table [AdventureWorks2019].[Sales].[SalesPersonQuotaHistory]
Data imported successfully!
importing rows 0 to 19972... for table [AdventureWorks2019].[Person].[Person]

In [7]:
errors #List of errors we faced during the extract-load process

["(pyodbc.ProgrammingError) ('ODBC SQL type -151 is not yet supported.  column-index=1  type=-151', 'HY106')\n(Background on this error at: https://sqlalche.me/e/14/f405) [AdventureWorks2019].[Production].[ProductDocument]",
 "(pyodbc.ProgrammingError) ('ODBC SQL type -151 is not yet supported.  column-index=6  type=-151', 'HY106')\n(Background on this error at: https://sqlalche.me/e/14/f405) [AdventureWorks2019].[Person].[Address]",
 "(pyodbc.ProgrammingError) ('ODBC SQL type -151 is not yet supported.  column-index=0  type=-151', 'HY106')\n(Background on this error at: https://sqlalche.me/e/14/f405) [AdventureWorks2019].[Production].[Document]",
 "(pyodbc.ProgrammingError) ('ODBC SQL type -151 is not yet supported.  column-index=3  type=-151', 'HY106')\n(Background on this error at: https://sqlalche.me/e/14/f405) [AdventureWorks2019].[HumanResources].[Employee]"]

In [8]:
#Extracting table name for transformations
tables_to_transform = []

for err in errors:
    tables_to_transform.append(err[168:])

tables_to_transform    

['[AdventureWorks2019].[Production].[ProductDocument]',
 '[AdventureWorks2019].[Person].[Address]',
 '[AdventureWorks2019].[Production].[Document]',
 '[AdventureWorks2019].[HumanResources].[Employee]']

In [9]:
#Transforming Data 

sql = """   SELECT ProductID
            ,CAST(DocumentNode AS VARBINARY(4000)) AS DocumentNode
            ,ModifiedDate
            FROM [AdventureWorks2019].[Production].[ProductDocument];
                """

df = pd.read_sql(sql, mssql_engine)
df.dtypes
df.to_sql('[Production].[ProductDocument]', pg_engine, if_exists='replace', index=False)

32

In [10]:
pg_engine.dispose()         #Close the connection
mssql_engine.dispose()      #Close the connection