# hstore data
- for storing key-value pairs in a single value.
- useful for semi-structured data or rows with many attributes
- ++ you cannot store duplicated key. 
- ++ stored as a string ONLY
<br><br>
----------------------------------------------------------------------------
Starting DATA<br>

| Employer      | Year_begin | Year_end | Dictionary                                                                 |
|---------------|------------|----------|----------------------------------------------------------------------------|
| Mr Han        | 1977       | 2019     | {"first_name":"Chewie", "last_name":"Wookie","occupation":"thrill seeker"} |
| Self employed | 1983       | 1985     | {"first_name":"Ewok","last_name":"Endor", "occupation":"forest dweller"}   |
| self          | 1999       | 2011     | {"first_name":"Harry","last_name":"P", "occupation":"magic"}               |
----------------------------------------------------------------------------
<br>
End Result PSQL<br><br>

| id | Employer      | Year_begin | Year_end | H_store                                                                      |
|----|---------------|------------|----------|------------------------------------------------------------------------------|
| 1  | Mr Han        | 1977       | 2019     | "last_name"=>"Wookie", "first_name"=>"Chewie", "occupation"=>"thrill seeker" |
| 2  | Self employed | 1983       | 1985     | "last_name"=>"Endor", "first_name"=>"Ewok", "occupation"=>"forest dweller"   |
| 3  | self          | 1999       | 2011     | "last_name"=>"P", "first_name"=>"Harry", "occupation"=>"magic"               |

In [1]:
import psycopg2           
import pandas as pd   
import seaborn as sns
from config import config# Import the 'config' function from the config.py file:

# Get the config params
params_ = config()
# Connect to the Postgres_DB:
conn = psycopg2.connect(**params_)
# Create new_cursor allowing us to write Python to execute PSQL:
cur = conn.cursor()
conn.autocommit = True

In [2]:
# Create a Table to store data: (you have to establish the extension to use hstore)

def create_staging_table(cursor):
    cursor.execute("""
        CREATE EXTENSION IF NOT EXISTS hstore; /*enable the hstore extension*/
        DROP TABLE IF EXISTS h_dct_prac;
        CREATE UNLOGGED TABLE  h_dct_prac (
            ID serial NOT NULL PRIMARY KEY,
            employer text, 
            yr_begin INT, 
            yr_end INT, 
            attr hstore);""") # column attr has a datatype of hstore

# Send the Schema to PSQL
with conn.cursor() as cursor:
    create_staging_table(cursor)   

# Insert one hstore data & multiple hstore data

In [3]:
# Insert ONE

sql_="""
INSERT INTO h_dct_prac (employer,yr_begin,yr_end,attr) VALUES (%s,%s,%s,%s)
"""
nemo=['everyone',
        1677,
        2020,
        '"first_name"=>"Santa","last_name"=>"Clause","occupation"=>"gift giver"']
cur.execute(sql_, nemo)

sq="""select * from h_dct_prac"""
cur.execute(sq)
cur.fetchall()

[(1,
  'everyone',
  1677,
  2020,
  '"last_name"=>"Clause", "first_name"=>"Santa", "occupation"=>"gift giver"')]

In [4]:
# Insert MANY
# Data
names_occup = [("Mr Han",1977,2019,{"first_name":"Chewie",
"last_name":"Wookie","occupation":"thrill seeker"},),
("self_empl",1983,1985,{"first_name":"Ewok","last_name":"Endor",
                        "occupation":"forest dweller"},),
("self",1999,2011,{"first_name":"Harry","last_name":"P","occupation":"magic"},)]

In [5]:
# Change dic to hstore format
final_ =[]
for i in names_occup:
    dic = i[3]
    dic2 = str(dic).replace("'", '"').replace(": ", "=>").replace("{","").replace("}","")
    final_.append([i[0],i[1],i[2],dic2])

In [6]:
# Insert
sql_="""
INSERT INTO h_dct_prac (employer,yr_begin,yr_end,attr) VALUES (%s,%s,%s,%s)
"""

for i in final_:
    cur.execute(sql_,i)

sql="""
select * from h_dct_prac
"""
cur.execute(sql)
cur.fetchall()

[(1,
  'everyone',
  1677,
  2020,
  '"last_name"=>"Clause", "first_name"=>"Santa", "occupation"=>"gift giver"'),
 (2,
  'Mr Han',
  1977,
  2019,
  '"last_name"=>"Wookie", "first_name"=>"Chewie", "occupation"=>"thrill seeker"'),
 (3,
  'self_empl',
  1983,
  1985,
  '"last_name"=>"Endor", "first_name"=>"Ewok", "occupation"=>"forest dweller"'),
 (4,
  'self',
  1999,
  2011,
  '"last_name"=>"P", "first_name"=>"Harry", "occupation"=>"magic"')]

# Query

In [7]:
# Update existing key-value pair
## when id=1, change first name to "Mr_Santa"

sq_santa = """
UPDATE h_dct_prac SET attr = attr || '"first_name"=>"Mr_Santa"' 
WHERE id=1;
"""
cur.execute(sq_santa)

cur.execute('''select * from h_dct_prac''')
cur.fetchall()

[(2,
  'Mr Han',
  1977,
  2019,
  '"last_name"=>"Wookie", "first_name"=>"Chewie", "occupation"=>"thrill seeker"'),
 (3,
  'self_empl',
  1983,
  1985,
  '"last_name"=>"Endor", "first_name"=>"Ewok", "occupation"=>"forest dweller"'),
 (4,
  'self',
  1999,
  2011,
  '"last_name"=>"P", "first_name"=>"Harry", "occupation"=>"magic"'),
 (1,
  'everyone',
  1677,
  2020,
  '"last_name"=>"Clause", "first_name"=>"Mr_Santa", "occupation"=>"gift giver"')]

In [8]:
# Query - first name only

q = """SELECT attr -> 'first_name' AS f_n
FROM h_dct_prac;"""
cur.execute(q)
cur.fetchall()

[('Chewie',), ('Ewok',), ('Harry',), ('Mr_Santa',)]

In [9]:
# Query2 - select employer and attr when occupation is magic
q = """SELECT employer, attr 
FROM h_dct_prac
WHERE attr -> 'occupation' = 'magic';"""
cur.execute(q)
cur.fetchall()

[('self', '"last_name"=>"P", "first_name"=>"Harry", "occupation"=>"magic"')]

In [10]:
# Query3 - select employer when occupation is magic with (@>) operator
q = """
SELECT
employer
FROM
h_dct_prac
WHERE
attr @> '"occupation"=>"magic"';"""
cur.execute(q)
cur.fetchall()

[('self',)]

In [11]:
# All keys:

s='''
SELECT
akeys (attr)
FROM
h_dct_prac;
'''
cur.execute(s)
cur.fetchall()


[(['last_name', 'first_name', 'occupation'],),
 (['last_name', 'first_name', 'occupation'],),
 (['last_name', 'first_name', 'occupation'],),
 (['last_name', 'first_name', 'occupation'],)]

In [12]:
# All values:
s_v='''
SELECT
avals (attr)
FROM
h_dct_prac;
'''
cur.execute(s_v)
cur.fetchall()

[(['Wookie', 'Chewie', 'thrill seeker'],),
 (['Endor', 'Ewok', 'forest dweller'],),
 (['P', 'Harry', 'magic'],),
 (['Clause', 'Mr_Santa', 'gift giver'],)]

# Convert Hstore to Json

In [14]:
cur.execute('''select * from h_dct_prac''')
cur.fetchall()

[(2,
  'Mr Han',
  1977,
  2019,
  '"last_name"=>"Wookie", "first_name"=>"Chewie", "occupation"=>"thrill seeker"'),
 (3,
  'self_empl',
  1983,
  1985,
  '"last_name"=>"Endor", "first_name"=>"Ewok", "occupation"=>"forest dweller"'),
 (4,
  'self',
  1999,
  2011,
  '"last_name"=>"P", "first_name"=>"Harry", "occupation"=>"magic"'),
 (1,
  'everyone',
  1677,
  2020,
  '"last_name"=>"Clause", "first_name"=>"Mr_Santa", "occupation"=>"gift giver"')]

In [15]:
# hstore to json
hstore_to_json = """
ALTER TABLE h_dct_prac ALTER COLUMN attr TYPE jsonb USING CAST(attr AS jsonb)"""
cur.execute(hstore_to_json)

cur.execute('''select * from h_dct_prac''')
cur.fetchall()

[(2,
  'Mr Han',
  1977,
  2019,
  {'last_name': 'Wookie',
   'first_name': 'Chewie',
   'occupation': 'thrill seeker'}),
 (3,
  'self_empl',
  1983,
  1985,
  {'last_name': 'Endor',
   'first_name': 'Ewok',
   'occupation': 'forest dweller'}),
 (4,
  'self',
  1999,
  2011,
  {'last_name': 'P', 'first_name': 'Harry', 'occupation': 'magic'}),
 (1,
  'everyone',
  1677,
  2020,
  {'last_name': 'Clause',
   'first_name': 'Mr_Santa',
   'occupation': 'gift giver'})]

# Convert Json to Hstore

In [17]:
def jsonb_to_hstore(cursor):
    cursor.execute("""
            create or replace function simple_jsonb_to_hstore(jdata jsonb)
            returns hstore language sql immutable
            as $$
                select hstore(array_agg(key), array_agg(value))
                from jsonb_each_text(jdata)
            $$;""") # column attr has a datatype of hstore

# Send the Schema to PSQL
with conn.cursor() as cursor:
    jsonb_to_hstore(cursor)  

to_hstore = """ALTER TABLE h_dct_prac ALTER COLUMN attr TYPE hstore USING simple_jsonb_to_hstore(attr);"""
cur.execute(to_hstore)
cur.execute('''select * from h_dct_prac''')
cur.fetchall()

[(2,
  'Mr Han',
  1977,
  2019,
  '"last_name"=>"Wookie", "first_name"=>"Chewie", "occupation"=>"thrill seeker"'),
 (3,
  'self_empl',
  1983,
  1985,
  '"last_name"=>"Endor", "first_name"=>"Ewok", "occupation"=>"forest dweller"'),
 (4,
  'self',
  1999,
  2011,
  '"last_name"=>"P", "first_name"=>"Harry", "occupation"=>"magic"'),
 (1,
  'everyone',
  1677,
  2020,
  '"last_name"=>"Clause", "first_name"=>"Mr_Santa", "occupation"=>"gift giver"')]