In [24]:
from numpy import char
from datetime import date
import pandas as pd
import numpy as np
import time
pd.set_option('display.float_format', str)

In [25]:
# Order

o_columnnames = ["O_ORDERKEY", "O_CUSTKEY", "O_ORDERSTATUS", "O_TOTALPRICE", "O_ORDERDATE", "O_ORDERPRIORITY", "O_CLERK", "O_SHIPPRIORITY", "O_COMMENT"]

for i in range(len(o_columnnames)):
    o_columnnames[i] = o_columnnames[i].lower()
    
o_data_types = {
    'o_orderkey': int,
    'o_custkey': int,
    'o_orderstatus': str,
    'o_totalprice': float,
    'o_orderpriority': str,
    'o_clerk': str,
    'o_shippriority': int,
    'o_comment': str
}

o_parse_dates = ['o_orderdate']

# Don't set indexes, as we can't access them with Pandas selection!
orders = pd.read_table("../data/orders.tbl.csv", sep="|", names=o_columnnames, dtype=o_data_types, parse_dates=o_parse_dates)

In [26]:
start_time = time.time()

df_filter_1 = orders[['o_orderkey', 'o_custkey', 'o_orderstatus', 'o_totalprice', 'o_orderdate', 'o_orderpriority', 'o_clerk', 'o_shippriority', 'o_comment']]
#df_filter_1['fun_aggregate'] = ((((df_filter_1.o_custkey).count() + (df_filter_1.o_totalprice).mean()) / ((df_filter_1.o_orderkey).sum() + (df_filter_1.o_shippriority).min())) * 25)
#df_filter_1['massive_query'] = ((((df_filter_1.o_custkey).max() * (df_filter_1.o_totalprice).min()) * ((df_filter_1.o_orderkey).max() - (df_filter_1.o_shippriority).mean())) + 5)

# This works!
df_group_1 = df_filter_1 \
    .groupby(['o_custkey']) \
    .agg(
        count_o_custkey=("o_custkey", "count"),
        mean_o_totalprice=("o_totalprice", "mean"),
        sum_o_orderkey=("o_orderkey", "sum"),
        min_o_shipprioity=("o_shippriority", "min"),
        max_o_custkey=("o_custkey", "max"),
        min_o_totalprice=("o_totalprice", "min"),
        max_o_orderkey=("o_orderkey", "max"),
        mean_o_shippriority=("o_shippriority", "mean"),
    )
df_group_1['fun_aggregate'] = (((df_group_1.count_o_custkey + df_group_1.mean_o_totalprice) / (df_group_1.sum_o_orderkey + df_group_1.min_o_shipprioity)) * 25)
df_group_1['massive_query'] = (((df_group_1.max_o_custkey * df_group_1.min_o_totalprice) * (df_group_1.max_o_orderkey - df_group_1.mean_o_shippriority)) + 5)

df_group_1 = df_group_1[['fun_aggregate', 'massive_query']]


end_time = time.time()
print(df_group_1)

print("--- %s seconds ---" % (end_time - start_time))

                 fun_aggregate          massive_query
o_custkey                                            
1          0.12810308393643452        277457229149.34
2           0.2728328523217036     131517134132.36002
4         0.060140050380629084          25393471717.0
5           1.0575971082868225         179142719014.1
7          0.09635563593962534     1129452816091.3499
...                        ...                    ...
149993      0.2114397542112407     6986911012645099.0
149995    0.054745827654903836     1835363180785452.0
149996     0.14529664144308146  5.241800038924327e+16
149998     0.04995259185356739 1.7912452741612304e+16
149999    0.055579803733751744     9985699992814908.0

[99996 rows x 2 columns]
--- 0.34902334213256836 seconds ---


In [29]:
# Pandas Code for:
    # SELECT (COUNT ( o_custkey * (o_totalprice / -1) ) + AVG ( o_totalprice )) FROM orders GROUP BY o_custkey;

start_time = time.time()

df_filter_1 = orders[['o_orderkey', 'o_custkey', 'o_orderstatus', 'o_totalprice', 'o_orderdate', 'o_orderpriority', 'o_clerk', 'o_shippriority', 'o_comment']]

df_filter_1['before'] = (df_filter_1.o_custkey * (df_filter_1.o_totalprice / -1))

df_group_1 = df_filter_1 \
    .groupby(['o_custkey']) \
    .agg(
        count_before=("before", "count"),
        mean_o_totalprice=("o_totalprice", "mean"),
    )

df_group_1['?content?'] = (df_group_1.count_before + df_group_1.mean_o_totalprice)

df_group_1 = df_group_1[['?content?']]

end_time = time.time()
print(df_group_1)

print("--- %s seconds ---" % (end_time - start_time))


                   ?content?
o_custkey                   
1                  97966.485
2         146903.20428571428
4                132446.8395
5                  171245.32
7                184882.3225
...                      ...
149993    169943.62833333333
149995    119301.31052631578
149996             160294.37
149998    145278.90120000002
149999    171159.29727272727

[99996 rows x 1 columns]
--- 0.19543218612670898 seconds ---
