In [51]:
%%capture
# Install postgresql server
!sudo apt-get -y -qq update
!sudo apt-get -y -qq install postgresql
!sudo service postgresql start

# Setup a password `postgres` for username `postgres`
!sudo -u postgres psql -U postgres -c "ALTER USER postgres PASSWORD 'postgres';"

# Setup a database with name `tfio_demo` to be used
!sudo -u postgres psql -U postgres -c 'DROP DATABASE IF EXISTS db;'
!sudo -u postgres psql -U postgres -c 'CREATE DATABASE db;'

In [52]:
import pandas as pd
from sqlalchemy import create_engine
import psycopg2
from psycopg2 import Error

In [53]:
user = 'postgres'
password = 'postgres'
host = 'localhost'
post = 5432
name = 'db'

point = 'postgresql://{}:{}@{}:{}/{}'.format(user, password, host, post, name)

con = create_engine(point)

In [54]:
df_sql = pd.read_csv('/content/data_two_columns.csv')
df_sql.to_sql("sales", con, if_exists='replace', index=False, method='multi')

In [55]:
def select_postgresql(sql):
    return pd.read_sql(sql, con)

In [56]:
sql = """select s.* from sales as s limit 10"""

In [57]:
select_postgresql(sql).head()

Unnamed: 0,invoiceno,stockcode
0,536365,85123A
1,536365,71053
2,536365,84406B
3,536365,84029G
4,536365,84029E


In [58]:
sql = """with tbl_no_duplicates as (select s.invoiceno, 
                                           s.stockcode 
                                    from sales as s 
                                    group by s.invoiceno, 
                                             s.stockcode),
      
              tbl_list_code_combinations as (select t.invoiceno, (select array_agg(concat(cast(t1.* as text),', ',cast(t2.* as text)))
                                                                  from unnest(array_agg(t.stockcode)) as t1 
                                                                                     cross join unnest(array_agg(t.stockcode)) as t2
                                                                  where t1.* < t2.*) as agg
                                             from tbl_no_duplicates as t
                                            group by t.invoiceno)

select unnest(t.agg) as couple_stockcode, count(*)
from tbl_list_code_combinations as t
group by  unnest(t.agg) 
order by count(*) desc
limit 10"""

In [59]:
%%time
select_postgresql(sql).head(10)

CPU times: user 279 ms, sys: 38.1 ms, total: 317 ms
Wall time: 50.8 s


Unnamed: 0,couple_stockcode,count
0,"22386, 85099B",833
1,"22697, 22699",784
2,"21931, 85099B",733
3,"22411, 85099B",683
4,"20725, 22383",663
5,"20725, 20727",648
6,"22726, 22727",646
7,"22697, 22698",644
8,"22698, 22699",614
9,"20725, 22384",613


In [62]:
sql = """with tbl_no_duplicates as (select s.invoiceno, 
                                           s.stockcode 
                                    from sales as s 
                                    group by s.invoiceno, 
                                             s.stockcode),
      
              tbl_list_code_combinations as (select t1.stockcode as code1, t2.stockcode as code2
                                             from tbl_no_duplicates as t1, tbl_no_duplicates as t2
                                             where (t1.stockcode < t2.stockcode) and (t1.invoiceno = t2.invoiceno))

select t.code1, t.code2, count(*)
from tbl_list_code_combinations as t
 group by  t.code1, t.code2
 order by count(*) desc
 limit 10"""

In [63]:
%%time
select_postgresql(sql).head(10)

CPU times: user 773 ms, sys: 70.6 ms, total: 844 ms
Wall time: 2min 19s


Unnamed: 0,code1,code2,count
0,22386,85099B,833
1,22697,22699,784
2,21931,85099B,733
3,22411,85099B,683
4,20725,22383,663
5,20725,20727,648
6,22726,22727,646
7,22697,22698,644
8,22698,22699,614
9,20725,22384,613
