In [1]:
%run setup_notebook.ipynb

### Save the query to a python variable `query_result`

In [2]:
%%sql query_result << SELECT c.Country, 
    AVG(i.Total / t.CustomerCount) AS AvgRevenuePerCustomer,
    t.TotalRevenue
FROM customers c
JOIN (
    SELECT BillingCountry, 
        COUNT(DISTINCT CustomerId) AS CustomerCount, 
        TRUNCATE(SUM(Total), 2) AS TotalRevenue
    FROM invoices
    GROUP BY BillingCountry
    HAVING COUNT(DISTINCT CustomerId) > 3
) t ON c.Country = t.BillingCountry
JOIN invoices i ON c.CustomerId = i.CustomerId
GROUP BY c.Country
ORDER BY AvgRevenuePerCustomer DESC;

Returning data to local variable query_result


## OPTION 1: 

#### ***`Using the ResultSet object`***

In [3]:
# let's see what is saved inside query_result
query_result

Country,AvgRevenuePerCustomer,TotalRevenue
Germany,1.397142857142857,156.48
France,1.1148571428571432,195.09
Brazil,1.0862857142857143,190.09
Canada,0.6784821428571428,303.95
USA,0.442147083685545,523.06


In [4]:
# list all methods available exclude builtins
methods = dir(query_result)
methods = [ method for method in methods if '_' not in method]
methods

['DataFrame',
 'append',
 'bar',
 'clear',
 'config',
 'copy',
 'count',
 'csv',
 'dict',
 'dicts',
 'extend',
 'index',
 'insert',
 'keys',
 'pie',
 'plot',
 'pop',
 'pretty',
 'remove',
 'reverse',
 'sort']

In [5]:
# get headers
query_result.keys

RMKeyView(['Country', 'AvgRevenuePerCustomer', 'TotalRevenue'])

In [6]:
# only extract the column names from the query result
headers = [col for col in query_result.keys]
headers

['Country', 'AvgRevenuePerCustomer', 'TotalRevenue']

In [7]:
# get row values
query_result.dict

<bound method ResultSet.dict of [('Germany', 1.397142857142857, 156.48), ('France', 1.1148571428571432, 195.09), ('Brazil', 1.0862857142857145, 190.09), ('Canada', 0.6784821428571428, 303.95), ('USA', 0.442147083685545, 523.06)]>

In [8]:
query_result.dicts

<bound method ResultSet.dicts of [('Germany', 1.397142857142857, 156.48), ('France', 1.1148571428571432, 195.09), ('Brazil', 1.0862857142857145, 190.09), ('Canada', 0.6784821428571428, 303.95), ('USA', 0.442147083685545, 523.06)]>

### No builtin method to extract row values using ***dict*** or ***dicts*** methods.

In [9]:
# simply iterate over the query_result to save the results in a list
rows = [row for row in query_result]
rows

[('Germany', 1.397142857142857, 156.48),
 ('France', 1.1148571428571432, 195.09),
 ('Brazil', 1.0862857142857145, 190.09),
 ('Canada', 0.6784821428571428, 303.95),
 ('USA', 0.442147083685545, 523.06)]

In [10]:
from tabulate import tabulate 

def format_sql_result(query_result):
    """
    Format the results of a SQL query stored in a Python variable as a text table using the tabulate library.

    Parameters:
    query_result (ResultSet): A ResultSet object containing the results of a SQL query.

    Returns:
    None: The function prints the formatted table to the console using the `print` function.

    Example Usage:
        >>> format_sql_result(query_result)
        | Country   |   AvgRevenuePerCustomer |   TotalRevenue |
        |-----------|------------------------|----------------|
        | Germany   |                 1.39714|         156.48 |
        | France    |                 1.11486|         195.09 |
        | Brazil    |                 1.08629|         190.09 |
        | Canada    |                 0.67848|         303.95 |
        | USA       |                 0.44214|         523.06 |
    """

    header = query_result.keys
    rows = [row for row in query_result]
    return  print(tabulate(rows, headers=header, tablefmt='pipe'))

In [11]:
format_sql_result(query_result)

| Country   |   AvgRevenuePerCustomer |   TotalRevenue |
|:----------|------------------------:|---------------:|
| Germany   |                1.39714  |         156.48 |
| France    |                1.11486  |         195.09 |
| Brazil    |                1.08629  |         190.09 |
| Canada    |                0.678482 |         303.95 |
| USA       |                0.442147 |         523.06 |


## OPTION 2: 

#### ***`Creating a DF using `DataFrame()` method.`***

In [12]:
%%sql df_input << SELECT c.Country, 
    AVG(i.Total / t.CustomerCount) AS AvgRevenuePerCustomer,
    t.TotalRevenue
FROM customers c
JOIN (
    SELECT BillingCountry, 
        COUNT(DISTINCT CustomerId) AS CustomerCount, 
        TRUNCATE(SUM(Total), 2) AS TotalRevenue
    FROM invoices
    GROUP BY BillingCountry
    HAVING COUNT(DISTINCT CustomerId) > 3
) t ON c.Country = t.BillingCountry
JOIN invoices i ON c.CustomerId = i.CustomerId
GROUP BY c.Country
ORDER BY AvgRevenuePerCustomer DESC;

Returning data to local variable df_input


In [13]:
df = df_input.DataFrame()
df 

Unnamed: 0,Country,AvgRevenuePerCustomer,TotalRevenue
0,Germany,1.397143,156.48
1,France,1.114857,195.09
2,Brazil,1.086286,190.09
3,Canada,0.678482,303.95
4,USA,0.442147,523.06


In [14]:
list(df.columns)

['Country', 'AvgRevenuePerCustomer', 'TotalRevenue']

In [15]:
df.values

array([['Germany', 1.397142857142857, 156.48],
       ['France', 1.1148571428571432, 195.09],
       ['Brazil', 1.0862857142857145, 190.09],
       ['Canada', 0.6784821428571428, 303.95],
       ['USA', 0.442147083685545, 523.06]], dtype=object)

In [16]:
rows = df.values.tolist()
rows

[['Germany', 1.397142857142857, 156.48],
 ['France', 1.1148571428571432, 195.09],
 ['Brazil', 1.0862857142857145, 190.09],
 ['Canada', 0.6784821428571428, 303.95],
 ['USA', 0.442147083685545, 523.06]]

In [17]:
def format_sql_result(sql_variable):
    """
    Takes in a sql variable as input, converts it to a Pandas DataFrame, and formats it as a text table using tabulate.
    
    Parameters:
    -----------
    sql_variable : object
        A variable containing the result of a SQL query.
        
    Returns:
    --------
    None.
        Prints the formatted text table to the console.
    """
    df = sql_variable.DataFrame()
    header = df.columns
    rows = df.values.tolist()
    return print(tabulate(rows, headers=header, tablefmt='pipe'))

In [18]:
format_sql_result(df_input)

| Country   |   AvgRevenuePerCustomer |   TotalRevenue |
|:----------|------------------------:|---------------:|
| Germany   |                1.39714  |         156.48 |
| France    |                1.11486  |         195.09 |
| Brazil    |                1.08629  |         190.09 |
| Canada    |                0.678482 |         303.95 |
| USA       |                0.442147 |         523.06 |


### `Paste the output onto a Markdown cell.`


| Country   |   AvgRevenuePerCustomer |   TotalRevenue |
|:----------|------------------------:|---------------:|
| Germany   |                1.39714  |         156.48 |
| France    |                1.11486  |         195.09 |
| Brazil    |                1.08629  |         190.09 |
| Canada    |                0.678482 |         303.95 |
| USA       |                0.442147 |         523.06 |