# **IF AND FD QUERIES**
---
This notebook checks the integrity constraints and fundemenatal dependecies in the cleaned database.

In [1]:
import sqlite3
import pandas as pd
db = sqlite3.connect('../data/clean_data/food_inspections.db')
cur = db.cursor()

In [2]:
# Check null values in primary keys for table Establishments

N_E = '''
SELECT * 
FROM Establishments 
WHERE estID IS NULL

'''
cur.execute(N_E)
df = pd.read_sql_query(N_E, db)
df.head(10)


Unnamed: 0,estID,estName


In [3]:
# Check null values in primary keys for table EstablishmentInspections

N_EI = '''
SELECT * 
FROM EstablishmentInspections
WHERE estID IS NULL OR inspecID IS NULL
 

'''
cur.execute(N_EI)
df = pd.read_sql_query(N_EI, db)
df.head(10)

Unnamed: 0,estID,inspecID


In [4]:
# Check null values in primary keys for table Inspections

N_EI = '''
SELECT * 
FROM Inspections
WHERE inspecID IS NULL
 

'''
cur.execute(N_EI)
df = pd.read_sql_query(N_EI, db)
df.head(10)

Unnamed: 0,inspecID,result,number,comments,description


In [5]:
# Check IC between tables EstablishmentInspections, Establishments
ICV_E_EI = '''
SELECT ei.estID
FROM EstablishmentInspections AS ei 
WHERE ei.estID NOT IN (SELECT estID FROM Establishments)

'''
cur.execute(ICV_E_EI)
df = pd.read_sql_query(ICV_E_EI, db)
df.head(10)

Unnamed: 0,estID


In [6]:
# Check IC between tables Inspections, EstablishmentInspections

ICV_EI_I = '''
SELECT inspecID
FROM Inspections 
WHERE inspecID NOT IN (SELECT inspecID FROM EstablishmentInspections)

'''
cur.execute(ICV_EI_I)
df = pd.read_sql_query(ICV_EI_I, db)
df.head(10)

Unnamed: 0,inspecID


In [7]:
# Check duplicate keys on table Establishments

D1 = '''

SELECT estID , COUNT(estID) as count_of_estID
FROM Establishments
GROUP BY estID
HAVING COUNT(*) > 1

'''
cur.execute(D1)
df = pd.read_sql_query(D1, db)
df.head(10)


Unnamed: 0,estID,count_of_estID


In [8]:
# Check duplicate keys on table EstablishmentInspections
D2 = '''

SELECT estID, inspecID, COUNT(*) as count
FROM EstablishmentInspections
GROUP BY estID,inspecID
HAVING COUNT(*) > 1

'''
cur.execute(D2)
df = pd.read_sql_query(D2, db)
df.head(10)

Unnamed: 0,estID,inspecID,count


In [9]:
# Check Integrity on table Inspections

D3 = '''

SELECT number
FROM Inspections
WHERE number NOT BETWEEN 1 and 70

'''
cur.execute(D3)
df = pd.read_sql_query(D3, db)
df.head(10)

Unnamed: 0,number


In [10]:
# Check functional dependencies on table Establishments

FD_E = '''

SELECT E1.estName as ename1, E2.estName as ename2
FROM Establishments as E1 , Establishments as E2
WHERE E1.estID = E2.estID AND E1.estName != E2.estName

'''
cur.execute(FD_E)
df = pd.read_sql_query(FD_E, db)
df.head(10)

Unnamed: 0,ename1,ename2


In [11]:
db.close()