In [2]:
import pyodbc
import numpy as np

In [3]:
pyodbc.drivers()

['SQL Server',
 'PostgreSQL ANSI(x64)',
 'PostgreSQL Unicode(x64)',
 'SQL Server Native Client 11.0',
 'SQL Server Native Client RDA 11.0',
 'ODBC Driver 17 for SQL Server',
 'Microsoft Access Driver (*.mdb, *.accdb)',
 'Microsoft Excel Driver (*.xls, *.xlsx, *.xlsm, *.xlsb)',
 'Microsoft Access dBASE Driver (*.dbf, *.ndx, *.mdx)',
 'Microsoft Access Text Driver (*.txt, *.csv)']

In [3]:
c = {    
    "driver": "SQL SERVER",
    "server": "LT-W1828-HH\SQLSERVER22019",
    "database": "AdventureWorks2019",
    "username": "sandbox",
    "password": "mytestpassword"
}

connection_string = f"DRIVER={{{c['driver']}}}; SERVER={c['server']}; Database={c['database']}; UID={c['username']}; PWD={c['password']};"


# Query 1
This query gets all of the UnitPrices in the PurchaseOrderDetail table

In [4]:
query = """
    SELECT *
    FROM AdventureWorks2019.Purchasing.PurchaseOrderDetail;
"""

In [5]:
with pyodbc.connect(connection_string) as conx:
    cursor = conx.cursor()
    cursor.execute(query)
    data = cursor.fetchall()

OperationalError: ('08001', '[08001] [Microsoft][ODBC SQL Server Driver][DBMSLPCN]SQL Server does not exist or access denied. (17) (SQLDriverConnect); [08001] [Microsoft][ODBC SQL Server Driver][DBMSLPCN]ConnectionOpen (Connect()). (2)')

In [13]:
unitPrices = np.array([float(row[5]) for row in data])

In [14]:
print(f"The median is {np.median(unitPrices)}")
print(f"The lower and upper quartiles are {np.quantile(unitPrices, [.25, .75])}")

The median is 39.2805
The lower and upper quartiles are [25.4205 45.444 ]


# Query 2
This query gets all of the UnitPrices in the PurchaseOrderDetail table that are less than $80 (eliminate outliers)

In [47]:
query = """
    SELECT *
    FROM AdventureWorks2019.Purchasing.PurchaseOrderDetail
    WHERE UnitPrice < 80;
"""

In [48]:
with pyodbc.connect(connection_string) as conx:
    cursor = conx.cursor()
    cursor.execute(query)
    data = cursor.fetchall()

In [49]:
unitPrices = np.array([float(row[5]) for row in data])

In [51]:
print(f"The median is {np.median(unitPrices)}")
print(f"The lower and upper quartiles are {np.quantile(unitPrices, [.25, .75])}")

The median is 39.2385
The lower and upper quartiles are [25.4205 45.423 ]


In [54]:
np.sort(unitPrices)

array([ 0.21 ,  0.21 ,  0.21 , ..., 63.693, 63.693, 63.693])

## ----------------------------------------------------------------------

In [42]:
[row for row in cursor.columns(table="Currency")] # Where is "Name?"

[('AdventureWorks2019', 'Sales', 'Currency', 'CurrencyCode', -8, 'nchar', 3, 6, None, None, 0, None, None, -8, None, 6, 1, 'NO', 47),
 ('AdventureWorks2019', 'Sales', 'Currency', 'ModifiedDate', 93, 'datetime', 23, 16, 3, None, 0, None, '(getdate())', 9, 3, None, 3, 'NO', 61)]

This query will get only the United States Dollar and Colombian Peso from the database.

In [73]:
query = """
    SELECT *
    FROM AdventureWorks2019.Sales.Currency
    WHERE Name IN ('US Dollar', 'Colombian Peso');
"""

In [74]:
with pyodbc.connect(connection_string) as conx:
    cursor = conx.cursor()
    cursor.execute(query)
    data = cursor.fetchall()

In [75]:
data[:5]

[('COP', 'Colombian Peso', datetime.datetime(2008, 4, 30, 0, 0)),
 ('USD', 'US Dollar', datetime.datetime(2008, 4, 30, 0, 0))]