# Week 11. Extra.

In [10]:
import pandas
from pprint import pprint
import sqlite3
# open db file
conn = sqlite3.connect("example1normalize.db")
# get cursor
cur = conn.cursor()

cur.executescript("""
    DROP TABLE IF EXISTS EMPLOYEES_PROJECTS_TIME;
    CREATE TABLE [EMPLOYEES_PROJECTS_TIME] (
            [EmployeeID] TEXT  NOT NULL PRIMARY KEY,
            [Name] TEXT NOT NULL,
            [Project] TEXT NOT NULL,
            [Time] TEXT NOT NULL
        );


    INSERT INTO EMPLOYEES_PROJECTS_TIME VALUES('EN1-26', "Sean O'Brien", "30-452-T3, 30-457-T3, 32-244-T3", "0.25, 0.40, 0.30");
    INSERT INTO EMPLOYEES_PROJECTS_TIME VALUES('EN1-33', "Amy Guya", "30-452-T3, 30-382-TC, 32-244-T3", "0.05, 0.35, 0.60");
    INSERT INTO EMPLOYEES_PROJECTS_TIME VALUES('EN1-35', "Steven Baranco", "30-452-T3, 31-238-TC", "0.15, 0.80");
    INSERT INTO EMPLOYEES_PROJECTS_TIME VALUES('EN1-36', "Elizabeth Roslyn", "35-152-TC", "0.90");
    INSERT INTO EMPLOYEES_PROJECTS_TIME VALUES('EN1-38', "Carol Schaaf", "36-272-TC", "0.75");
    INSERT INTO EMPLOYEES_PROJECTS_TIME VALUES('EN1-40', "Alexandra Wing", "31-238-TC, 31-241-TC", "0.20, 0.70");
""")

In [3]:
cur.execute("select * from EMPLOYEES_PROJECTS_TIME")
original_table = cur.fetchall()
pprint(original_table)

[('EN1-26',
  "Sean O'Brien",
  '30-452-T3, 30-457-T3, 32-244-T3',
  '0.25, 0.40, 0.30'),
 ('EN1-33', 'Amy Guya', '30-452-T3, 30-382-TC, 32-244-T3', '0.05, 0.35, 0.60'),
 ('EN1-35', 'Steven Baranco', '30-452-T3, 31-238-TC', '0.15, 0.80'),
 ('EN1-36', 'Elizabeth Roslyn', '35-152-TC', '0.90'),
 ('EN1-38', 'Carol Schaaf', '36-272-TC', '0.75'),
 ('EN1-40', 'Alexandra Wing', '31-238-TC, 31-241-TC', '0.20, 0.70')]


## lets normalize it

In [47]:
# drop old table and create new tables
# we have content of old one in original_table list of tuples
cur.executescript("""
    DROP TABLE IF EXISTS employees_projects_time;
    DROP TABLE IF EXISTS employees;
    DROP TABLE IF EXISTS projects;


    PRAGMA foreign_keys = ON;

    CREATE TABLE employees (
            employee_db_id INTEGER  NOT NULL PRIMARY KEY,
            first_name TEXT  NOT NULL,
            last_name TEXT NOT NULL,
            employee_id TEXT NOT NULL,
            UNIQUE(employee_id)
        );
    CREATE TABLE projects (
            project_db_id INTEGER  NOT NULL PRIMARY KEY,
            project_id TEXT NOT NULL,
            UNIQUE(project_id)
        );
    CREATE TABLE employees_projects_time (
            employees_projects_time_id INTEGER  NOT NULL PRIMARY KEY,
            project_db_id INTEGER NOT NULL,
            employee_db_id INTEGER NOT NULL,
            time REAL NOT NULL,
            FOREIGN KEY(employee_db_id) REFERENCES employees(employee_db_id),
            FOREIGN KEY(project_db_id) REFERENCES projects(project_db_id),
            UNIQUE(project_db_id, employee_db_id)
        );

""")

<sqlite3.Cursor at 0x20a4baa5500>

In [48]:
# populate employees
for employee_id, name, *_ in original_table:
    first_name, last_name = name.split()
    print(employee_id,first_name, last_name)

    cur.execute("INSERT INTO employees (first_name, last_name, employee_id) VALUES(?,?,?)",
                (first_name, last_name,employee_id))

EN1-26 Sean O'Brien
EN1-33 Amy Guya
EN1-35 Steven Baranco
EN1-36 Elizabeth Roslyn
EN1-38 Carol Schaaf
EN1-40 Alexandra Wing


In [59]:
cur.execute("select * from employees")
employees = cur.fetchall()
pprint(employees)

employees_dict = {r[3]:r[0] for r in employees}
pprint(employees_dict)

[(1, 'Sean', "O'Brien", 'EN1-26'),
 (2, 'Amy', 'Guya', 'EN1-33'),
 (3, 'Steven', 'Baranco', 'EN1-35'),
 (4, 'Elizabeth', 'Roslyn', 'EN1-36'),
 (5, 'Carol', 'Schaaf', 'EN1-38'),
 (6, 'Alexandra', 'Wing', 'EN1-40')]
{'EN1-26': 1, 'EN1-33': 2, 'EN1-35': 3, 'EN1-36': 4, 'EN1-38': 5, 'EN1-40': 6}


In [50]:
# populate project
projects = []

for _, _, m_projects,_ in original_table:
    for project in m_projects.split(","):
        projects.append(project.strip())
projects = sorted(list(set(projects)))

for project in projects:
    cur.execute("INSERT INTO projects (project_id) VALUES (?)",
                (project,))

In [51]:
cur.execute("select * from projects")
projects = cur.fetchall()
pprint(projects)

projects_dict = {r[1]:r[0] for r in projects}
pprint(projects_dict)

[(1, '30-382-TC'),
 (2, '30-452-T3'),
 (3, '30-457-T3'),
 (4, '31-238-TC'),
 (5, '31-241-TC'),
 (6, '32-244-T3'),
 (7, '35-152-TC'),
 (8, '36-272-TC')]
{'30-382-TC': 1,
 '30-452-T3': 2,
 '30-457-T3': 3,
 '31-238-TC': 4,
 '31-241-TC': 5,
 '32-244-T3': 6,
 '35-152-TC': 7,
 '36-272-TC': 8}


In [61]:
# populate employees_projects_time
for employee_id, name, m_projects,m_times in original_table:
    for projec_id,m_time in zip(m_projects.split(","),m_times.split(",")):
        project_id=projec_id.strip()
        m_time=m_time.strip()

        #print(employee_id,project_id,m_time)

        cur.execute("INSERT INTO employees_projects_time (project_db_id,employee_db_id,[time]) VALUES (?,?,?)",
                    (projects_dict[project_id],employees_dict[employee_id],m_time))

IntegrityError: UNIQUE constraint failed: employees_projects_time.project_db_id, employees_projects_time.employee_db_id

In [62]:
def show_table(conn, table):
    """
    Helping function to displays whole table
    """
    print(f"Table: {table}")
    df = pandas.read_sql_query(f"select * FROM {table};", conn)
    display(df)

show_table(conn, "employees")
show_table(conn, "projects")
show_table(conn, "employees_projects_time")

Table: employees


Unnamed: 0,employee_db_id,first_name,last_name,employee_id
0,1,Sean,O'Brien,EN1-26
1,2,Amy,Guya,EN1-33
2,3,Steven,Baranco,EN1-35
3,4,Elizabeth,Roslyn,EN1-36
4,5,Carol,Schaaf,EN1-38
5,6,Alexandra,Wing,EN1-40


Table: projects


Unnamed: 0,project_db_id,project_id
0,1,30-382-TC
1,2,30-452-T3
2,3,30-457-T3
3,4,31-238-TC
4,5,31-241-TC
5,6,32-244-T3
6,7,35-152-TC
7,8,36-272-TC


Table: employees_projects_time


Unnamed: 0,employees_projects_time_id,project_db_id,employee_db_id,time
0,1,2,1,0.25
1,2,3,1,0.4
2,3,6,1,0.3
3,4,2,2,0.05
4,5,1,2,0.35
5,6,6,2,0.6
6,7,2,3,0.15
7,8,4,3,0.8
8,9,7,4,0.9
9,10,8,5,0.75


In [63]:
df = pandas.read_sql_query("""
    SELECT * FROM employees_projects_time
    INNER JOIN employees on employees.employee_db_id = employees_projects_time.employee_db_id
    INNER JOIN projects on projects.project_db_id = projects.project_db_id
""", conn)
display(df)

Unnamed: 0,employees_projects_time_id,project_db_id,employee_db_id,time,employee_db_id.1,first_name,last_name,employee_id,project_db_id.1,project_id
0,1,2,1,0.25,1,Sean,O'Brien,EN1-26,1,30-382-TC
1,1,2,1,0.25,1,Sean,O'Brien,EN1-26,2,30-452-T3
2,1,2,1,0.25,1,Sean,O'Brien,EN1-26,3,30-457-T3
3,1,2,1,0.25,1,Sean,O'Brien,EN1-26,4,31-238-TC
4,1,2,1,0.25,1,Sean,O'Brien,EN1-26,5,31-241-TC
...,...,...,...,...,...,...,...,...,...,...
91,12,5,6,0.70,6,Alexandra,Wing,EN1-40,4,31-238-TC
92,12,5,6,0.70,6,Alexandra,Wing,EN1-40,5,31-241-TC
93,12,5,6,0.70,6,Alexandra,Wing,EN1-40,6,32-244-T3
94,12,5,6,0.70,6,Alexandra,Wing,EN1-40,7,35-152-TC


In [64]:
df = pandas.read_sql_query("""
    SELECT first_name,last_name,employee_id,project_id FROM employees_projects_time
    INNER JOIN employees on employees.employee_db_id = employees_projects_time.employee_db_id
    INNER JOIN projects on projects.project_db_id = projects.project_db_id
""", conn)
display(df)

Unnamed: 0,first_name,last_name,employee_id,project_id
0,Amy,Guya,EN1-33,30-382-TC
1,Amy,Guya,EN1-33,30-452-T3
2,Amy,Guya,EN1-33,30-457-T3
3,Amy,Guya,EN1-33,31-238-TC
4,Amy,Guya,EN1-33,31-241-TC
...,...,...,...,...
91,Carol,Schaaf,EN1-38,31-238-TC
92,Carol,Schaaf,EN1-38,31-241-TC
93,Carol,Schaaf,EN1-38,32-244-T3
94,Carol,Schaaf,EN1-38,35-152-TC
