In [1]:
#Get CWD
import os
import csv
import pandas as pd

In [2]:
path = os.getcwd()

In [3]:
path

'C:\\Users\\gibra\\Desktop\\Data Science 2023\\Portfolio\\SQL\\SQL with Python'

# **Library for SQL in Python**

In [4]:
#Import SQL
import sqlite3

In [5]:
# Connect to a database (creates a new database if it doesn't exist)
conn = sqlite3.connect('database.db')

In [6]:
#Create a cursor object to execute SQL statements
cursor = conn.cursor()

In [7]:
#cursor.execute('''drop table EMPLOYEES''')

# **CREATE** a table

In [8]:
# Let's assume that HR has an excel data base of all the employees in the company
# With information on Employees.
# First we create a table with the attributes that we can find in the excel sheets.

In [9]:
# The excel with information of employees has 11 different attributes, so we create a table with the same number of attributes and the same type of
# attribute as in the excel sheet

cursor.execute('''CREATE TABLE EMPLOYEES (
    EMP_ID CHAR(9) NOT NULL,
    F_NAME VARCHAR(15) NOT NULL,
    L_NAME VARCHAR(15) NOT NULL,
    SSN CHAR(9),
    B_DATE DATE,
    SEX CHAR,
    ADDRESS VARCHAR(30),
    JOB_ID CHAR(9),
    SALARY DECIMAL(10,2),
    MANAGER_ID CHAR(9),
    DEP_ID CHAR(9) NOT NULL,
    PRIMARY KEY (EMP_ID))''')

<sqlite3.Cursor at 0x1c9013a2420>

# **INSERT** data from a csv/excel/etc file

In [10]:
csv_file = path + '//Data//Employees.csv'

In [11]:
# Get attribute names of a table
table_name = 'EMPLOYEES'
query = f"PRAGMA table_info({table_name})"

cursor.execute(query)
columns = cursor.fetchall()

# Extract attribute names
attribute_names = [column[1] for column in columns]

In [12]:
attribute_names

['EMP_ID',
 'F_NAME',
 'L_NAME',
 'SSN',
 'B_DATE',
 'SEX',
 'ADDRESS',
 'JOB_ID',
 'SALARY',
 'MANAGER_ID',
 'DEP_ID']

In [13]:
columns = ', '.join(attribute_names)
placeholders = ', '.join(['?'] * len(attribute_names))

In [14]:
#Insert every row in our excel file 
with open(csv_file, 'r') as file:
    csv_data = csv.reader(file)

    insert_query = f'''insert into EMPLOYEES ({columns}) values ({placeholders})'''
    cursor.executemany(insert_query, csv_data)


In [15]:
all_query = "select * from EMPLOYEES" 
cursor.execute(all_query)
rows = cursor.fetchall()
for row in rows:
    print(row)

('E1001', 'John', 'Thomas', '123456', '01/09/1976', 'M', '5631 Rice, OakPark,IL', '100', 100000, '30001', '2')
('E1002', 'Alice', 'James', '123457', '07/31/1972', 'F', '980 Berry ln, Elgin,IL', '200', 80000, '30002', '5')
('E1003', 'Steve', 'Wells', '123458', '08/10/1980', 'M', '291 Springs, Gary,IL', '300', 50000, '30002', '5')
('E1004', 'Santosh', 'Kumar', '123459', '07/20/1985', 'M', '511 Aurora Av, Aurora,IL', '400', 60000, '30004', '5')
('E1005', 'Ahmed', 'Hussain', '123410', '01/04/1981', 'M', '216 Oak Tree, Geneva,IL', '500', 70000, '30001', '2')
('E1006', 'Nancy', 'Allen', '123411', '02/06/1978', 'F', '111 Green Pl, Elgin,IL', '600', 90000, '30001', '2')
('E1007', 'Mary', 'Thomas', '123412', '05/05/1975', 'F', '100 Rose Pl, Gary,IL', '650', 65000, '30003', '7')
('E1008', 'Bharath', 'Gupta', '123413', '05/06/1985', 'M', '145 Berry Ln, Naperville,IL', '660', 65000, '30003', '7')
('E1009', 'Andrea', 'Jones', '123414', '07/09/1990', 'F', '120 Fall Creek, Gary,IL', '234', 70000, '30

# **FUNCTION** TO ADD EXCEL DATABASE TO A SQL TABLE

In [36]:
def fill_sql_table(table_name, csv_file):
    '''Function that receives a string with the SQL table name and a path to a csv file in string format'''
    
    # Get attribute names of a table
    query = f"PRAGMA table_info({table_name})"

    cursor.execute(query)
    columns = cursor.fetchall()

    # Extract attribute names
    attribute_names = [column[1] for column in columns]

    # Get the column names of our tables and save them in a list split by commas
    columns = ', '.join(attribute_names)
    #create the placeholders for the fill querry i.e. (?,?,?,?)
    placeholders = ', '.join(['?'] * len(attribute_names))
    
    #Insert every row in our excel file 
    with open(csv_file, 'r') as file:
        csv_data = csv.reader(file)

        insert_query = f'''insert into {table_name} ({columns}) values ({placeholders})'''
        
        try:
            cursor.executemany(insert_query, csv_data)
        
        except sqlite3.IntegrityError:
            print(f"IntegrityError: UNIQUE constraint failed. The ID Column does not have UNIQUE values, the filling process was interrupted.")        

# **FUNCTION** to read all the sql table

In [17]:
def read_sql_table(table_name):
    all_query = f"select * from {table_name}" 
    cursor.execute(all_query)
    rows = cursor.fetchall()
    for row in rows:
        print(row)

## *Create & Fill Job History table*

In [18]:
cursor.execute('''CREATE TABLE JOB_HISTORY (
                  EMPL_ID CHAR(9) NOT NULL,
                  START_DATE DATE,
                  JOBS_ID CHAR(9) NOT NULL,
                  DEPT_ID CHAR(9),
                  PRIMARY KEY (
                  EMPL_ID,JOBS_ID))''')

<sqlite3.Cursor at 0x1c9013a2420>

In [19]:
csv_path = path + '\\Data\\JobsHistory.csv'

In [20]:
#cursor.execute('''drop table JOB_HISTORY''')

In [21]:
fill_sql_table('JOB_HISTORY', csv_path)

In [22]:
read_sql_table('JOB_HISTORY')

('E1001', '08/01/2000', '100', '2')
('E1002', '08/01/2001', '200', '5')
('E1003', '08/16/2001', '300', '5')
('E1004', '08/16/2000', '400', '5')
('E1005', '05/30/2000', '500', '2')
('E1006', '08/16/2001', '600', '2')
('E1007', '05/30/2002', '650', '7')
('E1008', '05/06/2010', '660', '7')
('E1009', '08/16/2016', '234', '7')
('E1010', '08/16/2016', '220', '5')


## *Create & fill Jobs table*

In [23]:
cursor.execute('''CREATE TABLE JOBS (
                  JOB_IDENT CHAR(9) NOT NULL,
                  JOB_TITLE VARCHAR(15) ,
                  MIN_SALARY DECIMAL(10,2),
                  MAX_SALARY DECIMAL(10,2),
                  PRIMARY KEY (JOB_IDENT))''')

<sqlite3.Cursor at 0x1c9013a2420>

In [24]:
csv_path = path + '\\Data\\Jobs.csv'

In [25]:
fill_sql_table('JOBS', csv_path)

In [26]:
read_sql_table('JOBS')

('100', 'Sr. Architect', 60000, 100000)
('200', 'Sr. Software Developer', 60000, 80000)
('300', 'Jr.Software Developer', 40000, 60000)
('400', 'Jr.Software Developer', 40000, 60000)
('500', 'Jr. Architect', 50000, 70000)
('600', 'Lead Architect', 70000, 100000)
('650', 'Jr. Designer', 60000, 70000)
('660', 'Jr. Designer', 60000, 70000)
('234', 'Sr. Designer', 70000, 90000)
('220', 'Sr. Designer', 70000, 90000)


## *Create & fill Departments & Locations table*

In [27]:
cursor.execute('''CREATE TABLE DEPARTMENTS (
                  DEPT_ID_DEP CHAR(9) NOT NULL,
                  DEP_NAME VARCHAR(15) ,
                  MANAGER_ID CHAR(9),
                  LOC_ID CHAR(9),
                  PRIMARY KEY (DEPT_ID_DEP))''')

<sqlite3.Cursor at 0x1c9013a2420>

In [28]:
cursor.execute('''CREATE TABLE LOCATIONS (
                  LOCT_ID CHAR(9) NOT NULL,
                  DEP_ID_LOC CHAR(9) NOT NULL,
                  PRIMARY KEY (
                  LOCT_ID,DEP_ID_LOC))''')

<sqlite3.Cursor at 0x1c9013a2420>

In [29]:
fill_sql_table('LOCATIONS', path + '\\Data\\Locations.csv')

In [30]:
read_sql_table('LOCATIONS')

('L0001', '2')
('L0002', '5')
('L0003', '7')


In [37]:
fill_sql_table('DEPARTMENTS', path + '\\Data\\Departments.csv')

IntegrityError: UNIQUE constraint failed. The ID Column does not have UNIQUE values, the filling process was interrupted.


In [38]:
read_sql_table('DEPARTMENTS')

('2', 'Architect Group', '30001', 'L0001')
('5', 'Software Group', '30002', 'L0002')
('7', 'Design Team', '30003', 'L0003')


In [42]:
dept = pd.read_csv(path + '\\Data\\Departments.csv', header=None)

In [43]:
dept

Unnamed: 0,0,1,2,3
0,2,Architect Group,30001,L0001
1,5,Software Group,30002,L0002
2,7,Design Team,30003,L0003
3,5,Software Group,30004,L0004


# **Commit** changes to the Database

In [44]:
conn.commit()
conn.close()