## Lesson 1 Exercise 1: Creating a Table with PostgreSQL

## Install the Dependencies

In [None]:
!pip3 install pandas psycopg2

## Clean Up Database to a clean slate

In [None]:
!brew services stop postgresql
!brew services start postgresql
!psql postgres -c "DROP DATABASE langcodes;"
!psql postgres -c "\l"

## Import the library

In [None]:
import psycopg2
import pandas as pd

## Defining a method to create database and return a connection and cursor to work upon. This will be executed from later steps

In [None]:
def create_database(dbname, user, password):
    conn = psycopg2.connect(f"host=127.0.0.1 dbname=postgres user={user} password={password}")
    conn.set_session(autocommit=True)
    cur = conn.cursor()

    cur.execute(f"DROP DATABASE IF EXISTS {dbname}")
    cur.execute(f"CREATE DATABASE {dbname}")

    conn.close()

    conn = psycopg2.connect(f"host=127.0.0.1 dbname={dbname} user={user} password={password}")
    curr = conn.cursor()

    return curr, conn

## Read from language-codes_csv.csv

In [None]:
language_codes = pd.read_csv("dataset/language-codes-3b2_csv.csv");

## Display first five rows of the language_codes_clean Dataframe created

In [None]:
print(language_codes.head());

## Rename Columns for language_codes_clean

In [None]:
language_codes_clean = language_codes.rename(columns={"alpha2":"character-code-2","English":"english-name"});

## Display first five rows of the language_codes_clean Dataframe updated

In [None]:
print(language_codes_clean.head());

## Read from language_codes_full csv

In [None]:
language_codes_full = pd.read_csv("dataset/language-codes-full_csv.csv");

## Peek language_codes_full dataframe

In [None]:
print(language_codes_full.head());

## Rename columns of language_codes_full Dataframe

In [None]:
language_codes_full = language_codes_full.rename(columns={"alpha3-b":"character-code-3","alpha2":"character-code-2","English":"english-name","French":"french-name"});

## Clean up alpha3-t column

In [None]:
language_codes_full_clean = language_codes_full[['character-code-3','character-code-2','english-name','french-name']];

## Clean up blank values

In [None]:
language_codes_full_clean = language_codes_full_clean.fillna('');

## Print language_codes_full_clean

In [None]:
print(language_codes_full_clean.head(10));

## Read from ietf_language_codes_csv

In [None]:
ietf_language_tags = pd.read_csv("dataset/ietf-language-tags_csv.csv");

## Rename columns of ietf_language_codes

In [None]:
ietf_language_tags = ietf_language_tags.rename(columns={"langType":"lang-type"});

## Clean up unneeded columns

In [None]:
ietf_language_tags_clean = ietf_language_tags[['lang','lang-type','territory','defs','file']];

## Clean up blank values

In [None]:
ietf_language_tags_clean = ietf_language_tags_clean.fillna('');

## Peek ietf_language_tags_clean

In [None]:
print(ietf_language_tags_clean.head(10));

## Create the database and get a cursor

In [None]:
curr, conn = create_database("langcodes","glall","root");

## Create Function to check custom constraint

In [None]:
try: 
    curr.execute("CREATE OR REPLACE FUNCTION check_lang_type(lang_type varchar) \
                RETURNS boolean AS $$ \
                BEGIN \
                    RETURN EXISTS (SELECT 1 FROM language_codes WHERE character_code_2 = lang_type) \
                        OR EXISTS (SELECT 1 FROM language_codes_extended WHERE character_code_3 = lang_type); \
                END; \
                $$ LANGUAGE plpgsql;")

    curr.execute("CREATE OR REPLACE FUNCTION check_character_code_2(p_character_code_2 varchar) \
                RETURNS boolean AS $$ \
                BEGIN \
                    RETURN p_character_code_2 IS NULL \
                        OR p_character_code_2 = '' \
                        OR EXISTS (SELECT 1 FROM language_codes WHERE character_code_2 = p_character_code_2); \
                END; \
                $$ LANGUAGE plpgsql;")

except psycopg2.Error as e: 
    print("Error: Issue creating custom check constraint")
    print (e)

## Create Tables for Langcodes database

In [None]:
try: 
    curr.execute("CREATE TABLE IF NOT EXISTS language_codes (alpha_3b varchar, character_code_2 varchar PRIMARY KEY, english_name varchar);")

    curr.execute("CREATE TABLE IF NOT EXISTS language_codes_extended (character_code_3 varchar PRIMARY KEY, character_code_2 varchar, english_name varchar, french_name varchar, \
                    CONSTRAINT character_code_2_check CHECK (check_character_code_2(character_code_2)));")
    
    curr.execute("CREATE TABLE IF NOT EXISTS language_codes_ietf (lang varchar PRIMARY KEY, lang_type varchar, territory varchar, defs varchar, file varchar,\
                    CONSTRAINT lang_type_check CHECK (check_lang_type(lang_type)));")

except psycopg2.Error as e: 
    print("Error: Issue creating table")
    print (e)

## Insert Dataframes into DB tables

This cell will run into an error while trying to insert records into language_codes_ietf table. This is a deliberate error to help me demonstrate how to relax a misjudged constraint in the following cell.

In [None]:
try:
    language_codes_insert = ("""INSERT INTO language_codes(alpha_3b, character_code_2, english_name) VALUES (%s, %s, %s)""")
    
    for i, row in language_codes_clean.iterrows():
        curr.execute(language_codes_insert, list(row))
    
    conn.commit();
    
    
    language_codes_full_insert = ("""INSERT INTO language_codes_extended(character_code_3, character_code_2, english_name, french_name) VALUES (%s, %s, %s, %s)""")
    
    for i, row in language_codes_full_clean.iterrows():
        curr.execute(language_codes_full_insert, list(row))
    
    conn.commit();
    
    
    ietf_language_tags_insert = ("""INSERT INTO language_codes_ietf(lang, lang_type, territory, defs, file) VALUES (%s, %s, %s, %s, %s)""")
    
    for i, row in ietf_language_tags_clean.iterrows():
        curr.execute(ietf_language_tags_insert, list(row))
    
    conn.commit();

except psycopg2.Error as e:
    print("Error while inserting records into tables from dataframes")
    print(e)
    curr.execute("ROLLBACK;");

## Relax Constraint on table

In [None]:
try:
    curr.execute("ALTER TABLE language_codes_ietf DROP CONSTRAINT lang_type_check;")

except psycopg2.Error as e: 
    print("Error: Issue deleting constraint from table")
    print (e)

## Retry addition of records into language_codes_ietf table

In [None]:
try:
    
    ietf_language_tags_insert = ("""INSERT INTO language_codes_ietf(lang, lang_type, territory, defs, file) VALUES (%s, %s, %s, %s, %s)""")
    
    for i, row in ietf_language_tags_clean.iterrows():
        curr.execute(ietf_language_tags_insert, list(row))
    
    conn.commit();

except psycopg2.Error as e:
    print("Error while inserting records into tables from dataframes")
    print(e)
    curr.execute("ROLLBACK;");

##  And finally close your cursor and connection.

In [None]:
curr.close()
conn.close()