## SQL 1/I - SELECT

In [None]:
from sqlite3 import connect
conn = connect('small_pfam.sqlite')
curs = conn.cursor()

Major SQL commands: SELECT, INSERT, DELETE, UPDATE

SELECT - which columns to include in the result <br>
FROM - which tables to use <br>
WHERE (optional) - predicate clause, which rows to include <br>
ORDER BY (optional) - indicates a sort order for the output data

There is a special sqlite_master table that describes the contents of the database

In [None]:
sql = '''SELECT * FROM sqlite_master;'''
curs.execute(sql)

See result header

In [None]:
curs.description

See result

In [None]:
for row in curs: print(row)

In [None]:
sql = '''
SELECT * FROM pfamA;
'''
curs.description

In [None]:
curs.execute(sql)
curs.fetchall()

Aggregate functions

In [None]:
sql = '''
SELECT count(*) FROM pfamA;
'''
curs.execute(sql)
curs.fetchall()

In [None]:
sql = '''
SELECT max(model_length) FROM pfamA;
'''
curs.execute(sql)curs.execute(sql)
curs.fetchall()

Aliasing column names

In [None]:
sql = '''
SELECT count(pfamA_id) AS 'Number of Protein Family Domains' 
FROM pfamA;
'''
curs.execute(sql)
curs.fetchall()

WHERE clause operators <br>
https://www.sqlite.org/lang_expr.html

<> ,  != 	inequality <br>
<			less than <br>
<= 			less than or equal <br>
=			equal <br>
'>			greater than <br>
'>= 		greater than or equal <br>
BETWEEN v1 AND v2	tests that a value to lies in a given range <br>
EXISTS		test for existence of rows matching query <br>
IN			tests if a value falls within a given set or query <br>
IS [ NOT ] NULL	is or is not null <br>
[ NOT ] LIKE		tests value to see if like or not like another <br>

% is the wildcard in SQL, used in conjunction with LIKE


In [None]:
sql = '''
SELECT pfamA_acc, pfamA_id, description
FROM pfamA 
WHERE type="Coiled-coil"
ORDER BY pfamA_id  DESC;
'''
curs.execute(sql)
curs.fetchall()
#for row in curs: print(row)

GROUP BY groups by a column and creates summary data for a different column

In [None]:
sql = '''
SELECT type, count(*) FROM pfamA GROUP BY type;
'''
curs.execute(sql)
curs.fetchall()

In [None]:
sql = '''
SELECT type, count(type) FROM pfamA GROUP BY type;
'''
curs.execute(sql)
curs.fetchall()

HAVING allows restrictions on the rows used or selected

In [None]:
sql = '''
SELECT type, count(type) FROM pfamA GROUP BY type
HAVING count(type)>100;
'''
curs.execute(sql)
curs.fetchall()

In [None]:
curs.close()
conn.close()

## SQL 1/II - INSERT, DELETE, UPDATE

In [None]:
from sqlite3 import connect
conn = connect('small_pfam.sqlite')
curs = conn.cursor()

INSET INTO table

In [None]:
sql = '''
INSERT INTO gene_ontology 
VALUES ('myPfamId1', 'goid', 'myterm', 'process');
'''
curs.execute(sql)

In [None]:
curs.execute('SELECT * FROM gene_ontology WHERE pfamA_acc = "myPfamId1"')
curs.fetchall()

COMMIT - save changes <br>
ROLLBACK - return to the state before the previous commit

In [None]:
conn.rollback()

In [None]:
curs.execute('SELECT * FROM gene_ontology WHERE pfamA_acc = "myPfamId1"')
curs.fetchall()

INSERT  a row by giving the value for each column

In [None]:
sql = '''
INSERT INTO gene_ontology 
VALUES ('myPfamId1', 'goid', 'myterm', 'process');
'''
curs.execute(sql)
conn.commit()

In [None]:
curs.execute('SELECT * FROM gene_ontology WHERE pfamA_acc = "myPfamId1"')
curs.fetchall()

In [None]:
conn.commit()

In [None]:
curs.execute('SELECT * FROM gene_ontology WHERE pfamA_acc = "myPfamId1"')
curs.fetchall()

INSERT  a row by expecting a specific number of values

In [None]:
sql='''INSERT INTO gene_ontology VALUES(?,?,?,?);'''
vals = ['myPfamId2', 'goid2', 'myterm2', 'process']
curs.execute(sql, vals)
conn.commit()

In [None]:
sql = '''SELECT * FROM gene_ontology 
WHERE pfamA_acc = 'myPfamId2';'''
curs.execute(sql)
for row in curs: print(row)

INSERT 'BULK' (multiple rows) with executemany 

In [None]:
sql='''INSERT INTO gene_ontology VALUES(?,?,?,?);'''
tbl=[['myPfamId2', 'goid3', 'myterm3', 'process'],
['myPfamId2', 'goid4', 'myterm4', 'process']]
curs.executemany(sql, tbl)
conn.commit()

In [None]:
sql = '''SELECT * FROM gene_ontology 
WHERE pfamA_acc = 'myPfamId2';'''
curs.execute(sql)
for row in curs: print(row)

In [None]:
curs.close() 
conn.close(

Try the bulk import in the command line client: <br>
sqlite3 small_pfam.sqlite <br>
.import GO_ontology_INSERT.txt gene_ontology

If database is locked!!! <br>
curs.close() <br>
conn.close()

sqlite3 small_pfam.sqlite <br>
.mode tabs
.import GO_ontology_INSERT.txt gene_ontology <br>
SELECT * FROM gene_ontology WHERE pfamA_acc = "myPfamId0" <br>
.q

from sqlite3 import connect <br>
conn = connect('pfam.sqlite') <br>
curs = conn.cursor()


In [None]:
from sqlite3 import connect
conn = connect('small_pfam.sqlite')
curs = conn.cursor()

DELETE ... WHERE - specific condition

In [None]:
sql = '''SELECT count(*) FROM gene_ontology;'''
curs.execute(sql)
for row in curs: print(row)

In [None]:
sql = '''DELETE FROM gene_ontology WHERE category = 'process';'''
curs.execute(sql)

In [None]:
sql = '''SELECT count(*) FROM gene_ontology;'''
curs.execute(sql)
for row in curs: print(row)

In [None]:
conn.rollback()

In [None]:
sql = '''SELECT count(*) FROM gene_ontology;'''
curs.execute(sql)
for row in curs: print(row)

DELETE - all - no condition

In [None]:
sql = '''DELETE FROM gene_ontology;'''
curs.execute(sql)

In [None]:
sql = '''SELECT count(*) FROM gene_ontology;'''
curs.execute(sql)
for row in curs: print(row)

In [None]:
conn.rollback()

In [None]:
sql = '''SELECT count(*) FROM gene_ontology;'''
curs.execute(sql)
for row in curs: print(row)

In [None]:
sql = '''SELECT * FROM gene_ontology 
WHERE pfamA_acc = 'myPfamId2';'''
curs.execute(sql)
for row in curs: print(row)

In [None]:
sql = '''DELETE FROM gene_ontology 
WHERE pfamA_acc = 'myPfamId2';'''
curs.execute(sql)
conn.commit()

In [None]:
sql = '''SELECT * FROM gene_ontology 
WHERE pfamA_acc = 'myPfamId2';'''
curs.execute(sql)
for row in curs: print(row)

In [None]:
conn.rollback()

In [None]:
sql = '''SELECT * FROM gene_ontology 
WHERE pfamA_acc = 'myPfamId2';'''
curs.execute(sql)
for row in curs: print(row)

DROP TABLE <table_name> removes the table PERMANENTLY!

In [None]:
sql = '''
SELECT name FROM sqlite_master WHERE type='table' AND name='gene_ontology';
'''
curs.execute(sql)
curs.fetchall()

In [None]:
sql = '''DROP TABLE gene_ontology;'''
curs.execute(sql)

In [None]:
curs.execute('SELECT * FROM gene_ontology')

In [None]:
conn.rollback()

In [None]:
curs.execute('SELECT * FROM gene_ontology')

In [None]:
sql = '''
SELECT name FROM sqlite_master WHERE type='table' AND name='gene_ontology';
'''
curs.execute(sql)
curs.fetchall()

RESTORE table using CREATE TABLE and values from file

In [None]:
sourceFile = 'gene_ontology.txt'
print('CREATE gene_ontology')

sql = '''CREATE TABLE gene_ontology (
pfamA_acc TEXT NOT NULL,
go_id TEXT NOT NULL,
term TEXT NOT NULL,
category TEXT NOT NULL,
FOREIGN KEY(pfamA_acc) REFERENCES pfamA(pfamA_acc));'''
curs.execute(sql)
print('DONE')

print('Loading gene_ontology')
sql = '''INSERT INTO gene_ontology VALUES (?,?,?,?);'''
infile = open(sourceFile, encoding='utf8')
for line in infile:
    fields = line.rstrip('\n').split('\t')
    curs.execute(sql, fields)
infile.close()
conn.commit()
print('DONE')

print('Indexing gene_ontology')
sql = '''CREATE INDEX gene_ontology_pfamA_acc_idx
ON gene_ontology(pfamA_acc);
'''
curs.execute(sql)
conn.commit()

sql = '''CREATE INDEX gene_ontology_go_id_idx
ON gene_ontology(go_id);'''
curs.execute(sql)
conn.commit()
print('DONE')


In [None]:
sql = '''
SELECT name FROM sqlite_master WHERE type='table' AND name='gene_ontology';
'''
curs.execute(sql)
curs.fetchall()

RESTORE table from database copy

In [None]:
sql = '''DROP TABLE gene_ontology;'''
curs.execute(sql)

In [None]:
sql = '''
SELECT name FROM sqlite_master WHERE type='table' AND name='gene_ontology';
'''
curs.execute(sql)
curs.fetchall()

In [None]:
sql = '''ATTACH 'small_pfam_nochange.sqlite' as master;'''
curs.execute(sql)
sql = '''CREATE TABLE gene_ontology as select * FROM master.gene_ontology;'''
curs.execute(sql)
conn.commit()
sql = '''DETACH master;'''
curs.execute(sql)


In [None]:
sql = '''SELECT count(*) FROM gene_ontology;'''
curs.execute(sql)
curs.fetchall()

UPDATE modifies EXISTING data in all rows matching the WHERE clause 

In [None]:
sql = '''SELECT * FROM gene_ontology WHERE pfamA_acc = 'myPfamId1';'''
curs.execute(sql)
curs.fetchall()

In [None]:
sql = '''INSERT INTO gene_ontology 
VALUES ('myPfamId1', 'goid', 'myterm', 'process');'''
curs.execute(sql)

In [None]:
sql = '''SELECT * FROM gene_ontology WHERE pfamA_acc = 'myPfamId1';'''
curs.execute(sql)
curs.fetchall()

In [None]:
sql = '''UPDATE gene_ontology SET category = 'function'
WHERE pfamA_acc='myPfamId1' AND go_id='goid' AND term='myterm';'''
curs.execute(sql)
curs.fetchall()

In [None]:
sql = '''SELECT * FROM gene_ontology WHERE pfamA_acc = 'myPfamId1';'''
curs.execute(sql)
curs.fetchall()

ALTER TABLE can ADD, MODIFY and DROP attributes like columns and keys

In [None]:
sql = '''
ALTER TABLE gene_ontology ADD COLUMN alt_description varchar(255);
'''
curs.execute(sql)
curs.fetchall()

In [None]:
sql = '''SELECT * FROM gene_ontology WHERE pfamA_acc = 'myPfamId1';'''
curs.execute(sql)
curs.description

ALTER TABLE can change column names or datatypes (when empty), or add constraints

In [None]:
sql = '''
ALTER TABLE gene_ontology RENAME TO alt_gene_ontology;
'''
curs.execute(sql)
curs.fetchall()

In [None]:
sql = 'SELECT name FROM sqlite_master WHERE type = "table";'
curs.execute(sql)
for row in curs: print(row)

In [None]:
conn.commit()

In [None]:
curs.close()
conn.close()

## SQL III - database design - create table, keys, views

In [None]:
from sqlite3 import connect
conn = connect('small_pfam.sqlite')
curs = conn.cursor()

See the SQL statement to create the table

In [None]:
sql = 'SELECT sql FROM sqlite_master WHERE name = "pfamA_interactions";'
curs.execute(sql)
for row in curs: print(row)

See the indices in a database

In [None]:
sql = 'SELECT name FROM sqlite_master WHERE type = "index";'
curs.execute(sql)
for row in curs: print(row)

GROUP BY the foreign key

In [None]:
sql = '''
SELECT pfamA_acc_A, count(*)
FROM pfamA_interactions
GROUP BY pfamA_acc_A
ORDER BY count(*) DESC
LIMIT 10;
'''
curs.execute(sql)
for row in curs: print(row)


JOIN ON the foreign key

In [None]:
sql = '''
SELECT pfamA_acc_A, pfamA_acc_B, pfamA.pfamA_id, pfamA.description
FROM pfamA_interactions
JOIN pfamA ON pfamA_acc_B = pfamA.pfamA_acc
WHERE pfamA_interactions.pfamA_acc_A  =  'PF00271';
'''
curs.execute(sql)
for row in curs: print(row)


CREATE/DROP VIEW 

In [None]:
sql = '''
CREATE VIEW pfamA_GO AS 
SELECT pf.pfamA_acc 'Protein_Family_Accession',  pf.description 'Protein_Family_Description', 
pf.type 'Protein_Family_Type', 
go.term 'Gene_Ontology_Term',
go.category 'Gene_Ontology_Category'
FROM pfamA pf JOIN gene_ontology go 
ON (pf.pfamA_acc=go.pfamA_acc);
'''
curs.execute(sql)
for row in curs: print(row)
conn.commit()

In [None]:
sql = '''
SELECT * FROM pfamA_GO 
LIMIT 10
'''
curs.execute(sql)
for row in curs: print(row)

In [None]:
sql = '''
DROP VIEW pfamA_GO 
'''
curs.execute(sql)
conn.commit()

In [None]:
curs.close()
conn.close()

## SQL IV - index, join, regular expressions

In [None]:
from sqlite3 import connect
conn = connect('small_pfam.sqlite')
curs = conn.cursor()

See the indices in the database

In [None]:
sql = 'SELECT name FROM sqlite_master WHERE type = "index";'
curs.execute(sql)
for row in curs: print(row)

CREATE INDEX ON statement - creates an index on table column(s)

In [None]:
sql = '''
CREATE INDEX go_category_idx ON gene_ontology(category);
'''
curs.execute(sql)
conn.commit()

In [None]:
sql = 'SELECT name FROM sqlite_master WHERE type = "index";'
curs.execute(sql)
for row in curs: print(row)

DROP INDEX statement - deletes the index by index name

In [None]:
sql = '''
DROP INDEX go_category_idx;
'''
curs.execute(sql)
conn.commit()

In [None]:
sql = 'SELECT name FROM sqlite_master WHERE type = "index";'
curs.execute(sql)
for row in curs: print(row)
    

Regular expression sqlite3

In [None]:
from re import search, IGNORECASE, MULTILINE, DOTALL
def regexp(pattern, strToSearch):
    return bool(search(pattern, strToSearch, IGNORECASE|MULTILINE|DOTALL))
conn.create_function("REGEXP", 2, regexp)
sql = r'''
SELECT * FROM gene_ontology
WHERE term REGEXP 'transcription.*?(\Bfactor|initiation)';
'''
curs.execute(sql)
for row in curs: print(row)


In [None]:
sql_template = '''
SELECT * FROM gene_ontology
WHERE term REGEXP '{}';
'''
pat=r'phospho[^egf]'
sql = sql_template.format(pat)
curs.execute(sql)
for row in curs: print(row)


JOIN ON tables <br>
The <font color='red'>JOIN ON</font> operation is a part of the <font color='red'>FROM</font> clause. It typically takes the form:<br> 
 <font color='red'>FROM</font> table1 <font color='red'>JOIN</font>  table2 <font color='red'>ON</font> table1.primarykey = table2.foreignkey <br><br>
Immediately following <font color='red'>JOIN</font> table2 <font color='red'>ON</font> you specify the condition for the join. <br>
Typically, this is the primary key for table1 and the related foreign key in table2.<br><br>
This can be extended for multiple tables:<br>
 <font color='red'>FROM</font> table1 <font color='red'>JOIN</font>  table2 <font color='red'>ON</font> table1.primarykey = table2.foreignkey <font color='red'>JOIN</font> table3 <font color='red'>ON</font> table2.primarykey = table3.foreignkey <br><br>
A complete join query looks like:<br>
 <font color='red'>SELECT</font> column1, column2, ...
 <font color='red'>FROM</font> table1 <font color='red'>JOIN</font>  table2 <font color='red'>ON</font> table1.primarykey = table2.foreignkey
 <font color='red'>WHERE</font> some_condition

In [None]:
sql = '''
SELECT pfamA.pfamA_acc, pfamA.type, literature_reference.pmid, literature_reference.author
FROM pfamA_literature_reference
JOIN pfamA ON pfamA_literature_reference.pfamA_acc = pfamA.pfamA_acc
JOIN literature_reference ON pfamA_literature_reference.auto_lit = literature_reference.auto_lit
WHERE pfamA.type = 'Coiled-coil';
'''
curs.execute(sql)
for row in curs: print(row)

Joins can be expressed without using JOIN ON and relying on the WHERE clause to perform all of the key comparisons <br>
The following query is equivalent to the one above <br>
https://pfam.xfam.org/help#tabview=tab12

In [None]:
sql = '''
SELECT pfamA.pfamA_acc, pfamA.'type', literature_reference.pmid, literature_reference.author
FROM pfamA, pfamA_literature_reference, literature_reference
WHERE pfamA.pfamA_acc = pfamA_literature_reference.pfamA_acc
AND
pfamA_literature_reference.auto_lit = literature_reference.auto_lit
AND
pfamA.type = 'Coiled-coil';
'''
curs.execute(sql)
for row in curs: print(row)

LEFT JOIN

In [None]:
sql = '''
SELECT count(*)
FROM pfamA
JOIN gene_ontology
ON pfamA.pfamA_acc = gene_ontology.pfamA_acc
'''
curs.execute(sql)
for row in curs: print(row)

In [None]:
sql = '''
SELECT count(*)
FROM pfamA
LEFT JOIN gene_ontology
ON pfamA.pfamA_acc = gene_ontology.pfamA_acc
'''
curs.execute(sql)
for row in curs: print(row)

TRIGGERS - for data integrity

In [None]:
#CREATE TABLE

sql = '''
CREATE TABLE pfamA_extra(
Extra_id PRIMARY KEY,
pfamA_id text NOT NULL,
Extra_info text NOT NULL
);
'''
curs.execute(sql)
conn.commit()

#CREATE TRIGGER 

sql = '''
CREATE TRIGGER validate_pfam_accession 
BEFORE INSERT ON pfamA_extra
BEGIN 
SELECT CASE WHEN NEW.pfamA_id NOT LIKE 'PFAM%' THEN RAISE ( ABORT, 'Invalid PFAM accession' ) 
END;
END;
'''
curs.execute(sql)
conn.commit()

In [None]:
sql = '''
INSERT INTO pfamA_extra(Extra_id , pfamA_id , Extra_info)
VALUES ( 1, 'test', 'Test trigger' );
'''
curs.execute(sql)

In [None]:
sql = '''
INSERT INTO pfamA_extra(Extra_id , pfamA_id , Extra_info)
VALUES ( 1, 'PFAMtest', 'Test trigger' );
'''
curs.execute(sql)
conn.commit()
for row in curs: print(row)

In [None]:
sql = '''
DROP TRIGGER validate_pfam_accession;
'''
curs.execute(sql)
conn.commit()

In [None]:
curs.close()
conn.commit()