## Import and Install Libraries

In [1]:
# !pip install pymongo



In [2]:
# Install psycopg3
# !pip install psycopg[binary]



In [116]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import json
import pymongo
import psycopg
import time
from psycopg import sql
from io import StringIO

# PART 5(1): NoSQL

## Pull data from database

In [119]:
# Pull data from the database

# Define reading function
def read_table_short(
    conn,
    table_name: str,
    column_names: list,
    n=5000,
    verbosity=100
) -> None:
    """Copy a single table from the database using the COPY command."""
    
    t0 = time.time()
    print(f'COPYING FROM {table_name}')
    
    with conn.cursor() as cur:
        query = f'COPY (SELECT * FROM {table_name} LIMIT %s) TO STDOUT'
        copy_sql = sql.SQL(query)
        
        # Load data from the table using copy method
        with cur.copy(copy_sql, (n,)) as copy: # Expect only n records
            r = 0
            list_out = []
            for row in copy.rows():
                r += 1
                list_out.append(row)

                if r % verbosity == 0:
                    print(f'...{r} ROWS COPIED: {time.time() - t0} SEC')
        
    print(f'TABLE {table_name} COPIED: {time.time() - t0} SEC\n')
    return pd.DataFrame(list_out, columns=column_names)

# Establish database connection
time0 = time.time()
with psycopg.connect(dbname="mesa8413", user="postgres", password="MESA8413") as conn:
    print(f'DATABASE CONNECTED: {time.time() - time0} SEC\n')
    
    # get AGENCY
    agency = read_table_short(
        conn, 
        'nyc311.agency',
        ['agency','agency_name'],
        verbosity=5
    )

    # get CALL_CENTER
    call_center = read_table_short(
        conn,
        'nyc311.call_center',
        ['cc_id','cc_street','cc_borough'],
    )

    # get CALL_OPERATOR
    call_operator = read_table_short(
        conn,
        'nyc311.call_operator',
        ['op_id','op_name','op_ssn','op_salary','op_cc'],
        n=6000, # Should retrieve all 5000 records
        verbosity=1000
    )
    
    # get COMPLAINT_TYPE
    complaint_type = read_table_short(
        conn,
        'nyc311.complaint_type',
        ['complaint_id','agency','complaint_type','complaint_descriptor','location_type'],
        verbosity=250
    )

    # get COMPLAINT_INCIDENT (will be truncated at 5000)
    complaint_incident = read_table_short(
        conn,
        'nyc311.complaint_incident',
        ['unique_key',
         'complaint_id',
         'op_id',
         'created_date',
         'incident_zip',
         'incident_address',
         'cross_street_1',
         'cross_street_2',
         'intersection_street_1',
         'intersection_street_2',
         'address_type',
         'city',
         'landmark',
         'community_board',
         'borough',
         'x_coordinate_state_plane',
         'y_coordinate_state_plane',
         'open_data_channel_type',
         'park_facility_name',
         'latitude',
         'longitude',
         'full_adress'],
        verbosity=500
    )
    
print(f'CONNECTION CLOSED. TOTAL TRANSACTION TIME: {time.time() - time0} SEC')

DATABASE CONNECTED: 0.11371636390686035 SEC

COPYING FROM nyc311.agency
...5 ROWS COPIED: 0.008042335510253906 SEC
...10 ROWS COPIED: 0.008042335510253906 SEC
...15 ROWS COPIED: 0.008042335510253906 SEC
TABLE nyc311.agency COPIED: 0.008042335510253906 SEC

COPYING FROM nyc311.call_center
...100 ROWS COPIED: 0.0075266361236572266 SEC
...200 ROWS COPIED: 0.008558273315429688 SEC
...300 ROWS COPIED: 0.008558273315429688 SEC
...400 ROWS COPIED: 0.009523153305053711 SEC
...500 ROWS COPIED: 0.009523153305053711 SEC
TABLE nyc311.call_center COPIED: 0.009523153305053711 SEC

COPYING FROM nyc311.call_operator
...1000 ROWS COPIED: 0.015401363372802734 SEC
...2000 ROWS COPIED: 0.025006532669067383 SEC
...3000 ROWS COPIED: 0.0320584774017334 SEC
...4000 ROWS COPIED: 0.03948068618774414 SEC
...5000 ROWS COPIED: 0.04714465141296387 SEC
TABLE nyc311.call_operator COPIED: 0.04714465141296387 SEC

COPYING FROM nyc311.complaint_type
...250 ROWS COPIED: 0.008826732635498047 SEC
...500 ROWS COPIED: 0.0108

In [120]:
tables = [
    agency,
    call_center,
    call_operator,
    complaint_type,
    complaint_incident
]

# Check that data looks as expected
for t in tables:
    print(t.info())
    print(t.head(),'\n')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15 entries, 0 to 14
Data columns (total 2 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   agency       15 non-null     object
 1   agency_name  15 non-null     object
dtypes: object(2)
memory usage: 372.0+ bytes
None
  agency                                   agency_name
0   NYPD               New York City Police Department
1    TLC                 Taxi and Limousine Commission
2   DSNY                      Department of Sanitation
3   DCWP  Department of Consumer and Worker Protection
4  DOHMH       Department of Health and Mental Hygiene 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   cc_id       500 non-null    object
 1   cc_street   500 non-null    object
 2   cc_borough  500 non-null    object
dtypes: object(3)
memory usage: 11.8+ KB
None
  

## Transform data to JSON

In [121]:
# Convert types as necessary
call_operator['op_salary'] = call_operator['op_salary'].astype(float)
complaint_incident['x_coordinate_state_plane'] = complaint_incident['x_coordinate_state_plane'].astype(int)
complaint_incident['y_coordinate_state_plane'] = complaint_incident['y_coordinate_state_plane'].astype(int)
complaint_incident['latitude'] = complaint_incident['latitude'].astype(float)
complaint_incident['longitude'] = complaint_incident['longitude'].astype(float)

In [122]:
# We're going to build nested documents, of the form:

# { unique_key,
#   [other_incident_details],
#   status: { [status_details] },
#   complaint: { [complaint_details], agency: { agnecy, agency_name } },
#   operator: { [operator_details], call_center: { [call_center_details] } }
# }


# Start from the center (agency) and work out to complaint_type, then complaint_incident:
agency_js = agency.copy()

# Create list of dictionaries out of the table content
agency_dict = agency_js.to_dict(orient='records')

# Reattach the json to the dataframe, drop redundant columns
agency_js['agency_details'] = agency_dict
agency_js = agency_js.drop(columns='agency_name')
# print(agency_js)

# Set index to 'agency'
agency_js = agency_js.set_index('agency')

# Now do a left join between complaint_type and agency, on 'agency'
complaint_type_js = complaint_type.copy()
complaint_type_js = pd.merge(complaint_type_js, agency_js, how='left', left_on='agency', right_index=True)
complaint_type_js = complaint_type_js.drop(columns='agency')
# print(complaint_type_js.head())

# Create list of dictionaries of columns, reattach
complaint_type_js['complaint_type_details'] = complaint_type_js.to_dict(orient='records')

# Set complaint index to complaint ID and clean up extra columns
complaint_type_js = complaint_type_js.set_index('complaint_id')
complaint_type_js = complaint_type_js.drop(columns=['complaint_type', 'complaint_descriptor', 'location_type'])
# print(complaint_type_js.head())

# Now attach complaint_type to complaint_incident, remove extra columns
complaint_incident_js = complaint_incident.copy()
complaint_incident_js = pd.merge(complaint_incident_js, complaint_type_js, how='left', left_on='complaint_id', right_index=True)
complaint_incident_js = complaint_incident_js.drop(columns='complaint_id')
# ---

# Now take care of call center, then call operator, then incident
call_center_js = call_center.copy()

# Create list of dictionaries, reattach
call_center_js['call_center_details'] = call_center_js.to_dict(orient='records')

# Set center index to ID and remove extra columns
call_center_js = call_center_js.set_index('cc_id')
call_center_js = call_center_js.drop(columns=['cc_street', 'cc_borough'])

# Now left-join to operators
call_operator_js = call_operator.copy()
call_operator_js = pd.merge(call_operator_js, call_center_js, how='left', left_on='op_cc', right_index=True)
call_operator_js = call_operator_js.drop(columns='op_cc')
# print(call_operator_js.loc[0])

# Create list of dictionaries of columns, reattach
call_operator_js['operator_details'] = call_operator_js.to_dict(orient='records')

# Set index to operator ID, remove extra columns
call_operator_js = call_operator_js.set_index('op_id')
call_operator_js = call_operator_js.drop(columns=['op_name', 'op_ssn', 'op_salary'])

# Now left-join to complaint_incident
complaint_incident_js = pd.merge(complaint_incident_js, call_operator_js, how='left', left_on='op_id', right_index=True)
complaint_incident_js = complaint_incident_js.drop(columns='op_id')

# ---

# Rename unique_key to _id
complaint_incident_js = complaint_incident_js.rename(columns={'unique_key':'_id'})

# Convert complaint_incident_js to dict
complaints_dict = complaint_incident_js.to_dict(orient='records')

# Drop null key/value pairs
i = 0
for cdict in complaints_dict:
    del_keys = []
    for key, value in cdict.items():
        if pd.isna(value):
            del_keys.append(key)
    if len(del_keys) > 0:
        for k in del_keys:
            removed_key = cdict.pop(k, None)
            i += 1
print(f'Removed {i} NaN values')

Removed 4 NaN values


In [166]:
# Finally, convert to json for external storage
complaints = pd.Series(complaints_dict).to_json(orient='records')

# Write to file
with open('..\\Data\\3_JSON\\complaints.json', 'w', encoding='utf-8') as f:
    json.dump(complaints, f, indent=4) # indent=4 for pretty-printing

## Insert data into MongoDB

In [123]:
# Define local connection
CONNECTION_STRING = "mongodb://localhost:27017/"
client = pymongo.MongoClient(CONNECTION_STRING)

# Define db
db = client["nyc_311"]

# Clear old collection if needed to rerun below insert commands
collection = db["complaints"]
collection.drop()

# (Re-)Define collection
collection = db["complaints"]

In [124]:
# Import complaints_dict into database
complaints_import_list = collection.insert_many(complaints_dict)

# Print output
print(len(complaints_import_list.inserted_ids))

5000


In [125]:
# view first item to check
first_complaint = collection.find_one()
print(first_complaint)

{'_id': '66807860', 'created_date': '2025-11-12 13:22:53', 'incident_zip': '11373', 'incident_address': '40-40 79 STREET', 'cross_street_1': 'ROOSEVELT AVENUE', 'cross_street_2': '41 AVENUE', 'intersection_street_1': 'ROOSEVELT AVENUE', 'intersection_street_2': '41 AVENUE', 'address_type': 'ADDRESS', 'city': 'ELMHURST', 'landmark': '79 STREET', 'community_board': '04 QUEENS', 'borough': 'QUEENS', 'x_coordinate_state_plane': 1015673, 'y_coordinate_state_plane': 211313, 'open_data_channel_type': 'PHONE', 'park_facility_name': 'Unspecified', 'latitude': 40.746626, 'longitude': -73.886595, 'full_adress': '40-40 79 STREET, QUEENS, NY 11373', 'agency_details': {'agency': 'NYPD', 'agency_name': 'New York City Police Department'}, 'complaint_type_details': {'complaint_id': 'NYPD0', 'complaint_type': 'Noise - Residential', 'complaint_descriptor': 'Loud Music/Party', 'location_type': 'Residential Building/House', 'agency_details': {'agency': 'NYPD', 'agency_name': 'New York City Police Departmen

## Run Sample Queries in MongoDB 

### Task 1: Get a list of all distinct call center IDs

In [126]:
# Set timer
t1 = time.time()

# define and run query
cc_ids = collection.distinct("operator_details.call_center_details.cc_id")

# Close timer 
t_mongo_1 = time.time() - t1

print(cc_ids,'\n')
print(f'Query Time: {t_mongo_1:.6f} SEC')

['CC_1', 'CC_10', 'CC_100', 'CC_101', 'CC_102', 'CC_103', 'CC_104', 'CC_105', 'CC_106', 'CC_107', 'CC_108', 'CC_109', 'CC_11', 'CC_110', 'CC_111', 'CC_112', 'CC_113', 'CC_114', 'CC_115', 'CC_116', 'CC_117', 'CC_118', 'CC_119', 'CC_12', 'CC_120', 'CC_121', 'CC_122', 'CC_123', 'CC_124', 'CC_125', 'CC_126', 'CC_127', 'CC_128', 'CC_129', 'CC_13', 'CC_130', 'CC_131', 'CC_132', 'CC_133', 'CC_134', 'CC_135', 'CC_136', 'CC_137', 'CC_138', 'CC_139', 'CC_14', 'CC_140', 'CC_141', 'CC_142', 'CC_143', 'CC_144', 'CC_145', 'CC_146', 'CC_147', 'CC_148', 'CC_149', 'CC_15', 'CC_150', 'CC_151', 'CC_152', 'CC_153', 'CC_154', 'CC_155', 'CC_156', 'CC_157', 'CC_158', 'CC_159', 'CC_16', 'CC_160', 'CC_161', 'CC_162', 'CC_163', 'CC_164', 'CC_165', 'CC_166', 'CC_167', 'CC_168', 'CC_169', 'CC_17', 'CC_170', 'CC_171', 'CC_172', 'CC_173', 'CC_174', 'CC_175', 'CC_176', 'CC_177', 'CC_178', 'CC_179', 'CC_18', 'CC_180', 'CC_181', 'CC_182', 'CC_183', 'CC_184', 'CC_185', 'CC_186', 'CC_187', 'CC_188', 'CC_189', 'CC_19', '

### Task 2: Pull number of complaints with agency_name = 'Department of Sanitation'

In [127]:
# Set timer
t2 = time.time()

# Define and Execute query
num_DOT = collection.count_documents({'complaint_type_details.agency_details.agency_name':'Department of Sanitation'})

# Output result:
print(f'No. of complaints with agency_name == Department of Transportation: {num_DOT}\n')

# Close timer
t_mongo_2 = time.time() - t2
print(f'Query Time: {t_mongo_2:.6f} SEC')

No. of complaints with agency_name == Department of Transportation: 1

Query Time: 0.005882 SEC


## Run Equivalent Queries in SQL

### Task 1: Get a list of all distinct call center IDs

In [128]:
# Establish database connection
time_conn = time.time()
with psycopg.connect(dbname="mesa8413", user="postgres", password="MESA8413") as conn:
    print(f'DATABASE CONNECTED: {time.time() - time_conn} SEC\n')

    # Establish cursor
    with conn.cursor() as cur:

        # Set timer
        t3 = time.time()

        # Define query
        query1 = 'SELECT cc_id FROM nyc311.call_center'

        # Execute
        cur.execute(query1)

        # Output restult
        print('Call Center IDs:', cur.fetchall(), '\n')

        
        # Close timer
        t_postgres_1 = time.time() - t3
        print(f'Query Time: {t_postgres_1:.6f} SEC')

DATABASE CONNECTED: 0.09435510635375977 SEC

Call Center IDs: [('CC_1',), ('CC_2',), ('CC_3',), ('CC_4',), ('CC_5',), ('CC_6',), ('CC_7',), ('CC_8',), ('CC_9',), ('CC_10',), ('CC_11',), ('CC_12',), ('CC_13',), ('CC_14',), ('CC_15',), ('CC_16',), ('CC_17',), ('CC_18',), ('CC_19',), ('CC_20',), ('CC_21',), ('CC_22',), ('CC_23',), ('CC_24',), ('CC_25',), ('CC_26',), ('CC_27',), ('CC_28',), ('CC_29',), ('CC_30',), ('CC_31',), ('CC_32',), ('CC_33',), ('CC_34',), ('CC_35',), ('CC_36',), ('CC_37',), ('CC_38',), ('CC_39',), ('CC_40',), ('CC_41',), ('CC_42',), ('CC_43',), ('CC_44',), ('CC_45',), ('CC_46',), ('CC_47',), ('CC_48',), ('CC_49',), ('CC_50',), ('CC_51',), ('CC_52',), ('CC_53',), ('CC_54',), ('CC_55',), ('CC_56',), ('CC_57',), ('CC_58',), ('CC_59',), ('CC_60',), ('CC_61',), ('CC_62',), ('CC_63',), ('CC_64',), ('CC_65',), ('CC_66',), ('CC_67',), ('CC_68',), ('CC_69',), ('CC_70',), ('CC_71',), ('CC_72',), ('CC_73',), ('CC_74',), ('CC_75',), ('CC_76',), ('CC_77',), ('CC_78',), ('CC_79',)

### Task 2: Pull number of complaints with agency_name = 'Department of Sanitation'

In [129]:
# Establish database connection
time_conn = time.time()
with psycopg.connect(dbname="mesa8413", user="postgres", password="MESA8413") as conn:
    print(f'DATABASE CONNECTED: {time.time() - time_conn} SEC\n')

    # Establish cursor
    with conn.cursor() as cur:

        # Set timer
        t4 = time.time()

        # Define query
        query2 = '''
            WITH incident_short AS ( SELECT * 
                FROM nyc311.complaint_incident 
                ORDER BY created_date ASC
                LIMIT 5000
                )
                SELECT COUNT(*) FROM incident_short AS I
                    LEFT JOIN ( SELECT *
                        FROM nyc311.complaint_type AS T 
                        LEFT JOIN nyc311.agency AS A 
                        ON (T.agency = A.agency)
                    ) AS TA
                    ON (I.complaint_id = TA.complaint_id)
                    WHERE agency_name = 'Department of Sanitation'
            '''

        # Execute
        cur.execute(query2)

        # Output restult
        print('Number of matching records:', cur.fetchone()[0])

        
        # Close timer
        t_postgres_2 = time.time() - t4
        print(f'Query Time: {t_postgres_2:.6f} SEC')

DATABASE CONNECTED: 0.10174870491027832 SEC

Number of matching records: 315
Query Time: 0.038740 SEC


## Compare query results

In [130]:
# Create dataframe of timing results
n = 6
data = {
    'MongoDB':[round(t_mongo_1, n), round(t_mongo_2, n)],
    'Postgresql':[round(t_postgres_1, n), round(t_postgres_2, n)],
    'Best Performance':[('MongoDB' if (t_mongo_1 < t_postgres_1) else 'Postgresql'),('MongoDB' if (t_mongo_2 < t_postgres_2) else 'Postgresql')],
    'Time Advantage':[np.abs(t_mongo_1 - t_postgres_1), np.abs(t_mongo_2 - t_postgres_2)]
}

timing_df = pd.DataFrame(data, index=['Task1', 'Task2'])
display(timing_df)

Unnamed: 0,MongoDB,Postgresql,Best Performance,Time Advantage
Task1,0.008678,0.007538,Postgresql,0.00114
Task2,0.005882,0.03874,MongoDB,0.032859


In our first task, we asked both databases to retrieve a list of all (distinct) call center IDs. Our Postgresql database
slightly outperformed our MongoDB database on this task. This makes sense from our knowledge of the structure of these
two databases: on Posstgresql, we simply had to read a single column/attribute from a single table, while in MondoDB every
document had to be sequentially queried, and duplicates removed.

Our second task involved finding the number of records with the given agency name 'Department of Sanitation.' Note, importantly,
that both queries pull from a subsample of 5000 records. This task is relatively straightforward in MongoDB, as it requires sequentially
scanning all documents and retrieving the same value, and counting the number of exact matches. For the Posstgresql database, however,
this query involves joining three tables (complaint_incident, complaint_type, and agency) before a count of matches can be performed.
Accordingly, MongoDB outperforms Postgresql on this task, by close to an order of magnitude (0.006 vs. 0.04 seconds).

# PART 5(2): Access-Control and Security 

## Create analyst and guest roles

In [150]:
# Create roles & assign permissions

# Establish database connection
time_conn = time.time()
with psycopg.connect(dbname="mesa8413", user="postgres", password="MESA8413") as conn:
    print(f'DATABASE CONNECTED: {time.time() - time_conn} SEC\n')

    # Establish cursor
    with conn.cursor() as cur:

        # Create ANALYST: Access to everything BUT operator salary, operator name, operator SSN, and call center street address

        # Create role (skip if role already exists)
        cur.execute('''
        DO 
        $do$
        BEGIN
            IF EXISTS (
                SELECT FROM pg_catalog.pg_roles
                WHERE  rolname = 'analyst') THEN
                RAISE NOTICE 'Role "analyst" already exists. Skipping.';
            ELSE
                CREATE ROLE analyst LOGIN PASSWORD 'MESA8413ANALYST';
            END IF;
        END
        $do$
        ''')

        # Grand Schema access
        cur.execute('GRANT USAGE ON SCHEMA nyc311 TO analyst;')

        # Assign access to selected tables
        cur.execute('GRANT SELECT ON nyc311.complaint_incident TO analyst;')
        cur.execute('GRANT SELECT ON nyc311.complaint_type TO analyst;')
        cur.execute('GRANT SELECT ON nyc311.complaint_status TO analyst;')
        cur.execute('GRANT SELECT ON nyc311.road_details TO analyst;')
        cur.execute('GRANT SELECT ON nyc311.taxi_details TO analyst;')
        cur.execute('GRANT SELECT ON nyc311.agency TO analyst;')

        # Assign column-based access to operator & call center info
        cur.execute('GRANT SELECT (cc_id, cc_borough) ON nyc311.call_center TO analyst;')
        cur.execute('GRANT SELECT (op_id, op_cc) ON nyc311.call_operator TO analyst;')

        print('ROLE analyst CREATED.')

        # Create GUEST: Access to complaint_indicent, complaint_type, complaint_status, and agency

        # Create role (skip if role already exists)
        cur.execute('''
        DO 
        $do$
        BEGIN
            IF EXISTS (
                SELECT FROM pg_catalog.pg_roles
                WHERE  rolname = 'guest') THEN
                RAISE NOTICE 'Role "guest" already exists. Skipping.';
            ELSE
                CREATE ROLE guest LOGIN PASSWORD 'MESA8413GUEST';
            END IF;
        END
        $do$
        ''')
        
        # Grand Schema access
        cur.execute('GRANT USAGE ON SCHEMA nyc311 TO guest;')
        
        # Assign access to selected tables
        cur.execute('GRANT SELECT ON nyc311.complaint_incident TO guest;')
        cur.execute('GRANT SELECT ON nyc311.complaint_type TO guest;')
        cur.execute('GRANT SELECT ON nyc311.complaint_status TO guest;')
        cur.execute('GRANT SELECT ON nyc311.agency TO guest;')

        print('ROLE guest CREATED.')
        
        conn.commit()

print(f'CONNECTION CLOSED. TOTAL TRANSACTION TIME: {time.time() - time_conn} SEC')

DATABASE CONNECTED: 0.11222410202026367 SEC

ROLE analyst CREATED.
ROLE guest CREATED.
CONNECTION CLOSED. TOTAL TRANSACTION TIME: 0.1518385410308838 SEC


## Test Role Access

In [163]:
# First, define our test queries
# All three queries below were proven to work with role 'postgres' in Part 3

# Query 1: Requires access to complaint_incident only
# both roles should succeed
query1 = '''
SELECT COUNT(community_board), community_board
	FROM nyc311.complaint_incident
	GROUP BY community_board
	ORDER BY community_board;
'''

# Query 2: Requires access to complaint_incident, taxi_details, and road_details
# analyst should suceed; guest should not
query2 = '''
SELECT I.unique_key, I.created_date, I.incident_address, I.borough, T.taxi_pick_up_location, R.bridge_highway_name
	FROM nyc311.complaint_incident AS I
	INNER JOIN nyc311.taxi_details AS T 
	ON (I.unique_key = T.unique_key)
	INNER JOIN nyc311.road_details AS R 
	ON (I.unique_key = R.unique_key);
'''

# Query 3: Requires access to op_name and op_salary
# both roles should fail
query3 = '''
SELECT op_name, op_salary, op_cc, AVG(op_salary) OVER (PARTITION BY op_cc) AS cc_avg_salary
	FROM nyc311.call_operator
	ORDER BY op_cc ASC, op_salary DESC;
'''

# Establish database connection: ANALYST
time_conn = time.time()
with psycopg.connect(dbname="mesa8413", user="analyst", password="MESA8413ANALYST") as conn:
    print(f'DATABASE CONNECTED VIA UESR analyst: {time.time() - time_conn} SEC\n')

    # Establish cursor
    with conn.cursor() as cur:
        
        # --- Q1 ---
        try:
            # Try to execute first query
            cur.execute(query1)
            
            # Save restults
            results_analyst_1 = ['query successful', None, cur.fetchall()]

        except Exception as e:
            
            # Save error as result
            results_analyst_1 = ['query failed', e, None]

# Establish database connection: ANALYST
time_conn = time.time()
with psycopg.connect(dbname="mesa8413", user="analyst", password="MESA8413ANALYST") as conn:
    print(f'DATABASE CONNECTED VIA UESR analyst: {time.time() - time_conn} SEC\n')

    # Establish cursor
    with conn.cursor() as cur:
        
        #  --- Q2 ---
        try:
            # Try to execute second query
            cur.execute(query2)
            
            # Save restults
            results_analyst_2 = ['query successful', None, cur.fetchall()]
            
        except Exception as e:
            
            # Save error as result
            results_analyst_2 = ['query failed', e, None]
            
# Establish database connection: ANALYST
time_conn = time.time()
with psycopg.connect(dbname="mesa8413", user="analyst", password="MESA8413ANALYST") as conn:
    print(f'DATABASE CONNECTED VIA UESR analyst: {time.time() - time_conn} SEC\n')

    # Establish cursor
    with conn.cursor() as cur:
        
        #  --- Q3 ---
        try:
            # Try to execute third query
            cur.execute(query3)
            
            # Save restults
            results_analyst_3 = ['query successful', None, cur.fetchall()]
            
        except Exception as e:
            
            # Save error as result
            results_analyst_3 = ['query failed', e, None]

print(f'SESSION analyst CLOSED.\n')

# Establish database connection: GUEST
time_conn = time.time()
with psycopg.connect(dbname="mesa8413", user="guest", password='MESA8413GUEST') as conn:
    print(f'DATABASE CONNECTED VIA UESR guest: {time.time() - time_conn} SEC\n')

    # Establish cursor
    with conn.cursor() as cur:
        
        # --- Q1 ---
        try:
            # Try to execute first query
            cur.execute(query1)
            
            # Save restults
            results_guest_1 = ['query successful', None, cur.fetchall()]

        except Exception as e:
            
            # Save error as result
            results_guest_1 = ['query failed', e, None]

# Establish database connection: GUEST
time_conn = time.time()
with psycopg.connect(dbname="mesa8413", user="guest", password='MESA8413GUEST') as conn:
    print(f'DATABASE CONNECTED VIA UESR guest: {time.time() - time_conn} SEC\n')

    # Establish cursor
    with conn.cursor() as cur:
        
        #  --- Q2 ---
        try:
            # Try to execute second query
            cur.execute(query2)
            
            # Save restults
            results_guest_2 = ['query successful', None, cur.fetchall()]
            
        except Exception as e:
            
            # Save error as result
            results_guest_2 = ['query failed', e, None]
            
# Establish database connection: GUEST
time_conn = time.time()
with psycopg.connect(dbname="mesa8413", user="guest", password='MESA8413GUEST') as conn:
    print(f'DATABASE CONNECTED VIA UESR guest: {time.time() - time_conn} SEC\n')

    # Establish cursor
    with conn.cursor() as cur:
        #  --- Q3 ---
        try:
            # Try to execute third query
            cur.execute(query3)
            
            # Save restults
            results_guest_3 = ['query successful', None, cur.fetchall()]
            
        except Exception as e:
            
            # Save error as result
            results_guest_3 = ['query failed', e, None]

print(f'SESSION guest CLOSED.')

DATABASE CONNECTED VIA UESR analyst: 0.1809556484222412 SEC

DATABASE CONNECTED VIA UESR analyst: 0.15890049934387207 SEC

DATABASE CONNECTED VIA UESR analyst: 0.10323429107666016 SEC

SESSION analyst CLOSED.

DATABASE CONNECTED VIA UESR guest: 0.10705232620239258 SEC

DATABASE CONNECTED VIA UESR guest: 0.0941460132598877 SEC

DATABASE CONNECTED VIA UESR guest: 0.15746212005615234 SEC

SESSION guest CLOSED.


In [165]:
# Evaluate output

# Create DF of results
results_data = {
    'ANALYST': [results_analyst_1[0], results_analyst_2[0], results_analyst_3[0]],
    'ANALYST ERRORS': [results_analyst_1[1], results_analyst_2[1], results_analyst_3[1]],
    'GUEST': [results_guest_1[0], results_guest_2[0], results_guest_3[0]],
    'GUEST ERRORS': [results_guest_1[1], results_guest_2[1], results_guest_3[1]]
}

results_df = pd.DataFrame(results_data, index=['Query1','Query2','Query3'])

display(results_df)

Unnamed: 0,ANALYST,ANALYST ERRORS,GUEST,GUEST ERRORS
Query1,query successful,,query successful,
Query2,query successful,,query failed,permission denied for table taxi_details
Query3,query failed,permission denied for table call_operator,query failed,permission denied for table call_operator


In case the session data is lost, here are the results from running the above code locally:

![image.png](attachment:b16ec4e5-93ce-497d-ab22-1944dca7f1ee.png)

None of the data in this dataset are subject to HIPAA or GLBA, as it includes neither personal health data, 
and is collected by the municipal government instead of a financial institution. However, it would be subject to
the NYS Personal Privacy Protection Law (PPPL), which regulates how state agencies can collect and use personal
data. 

Note that the public version of this dataset does not include any identifying information about the individual
who called, other than the location from which they filed a complaint. It is possible that PII such as names or other
identifying information is collected by NYC 311 for certain calls: their website notes several types of complaints,
including apartment maintenance issues, do require contact information from the complaintant. NYC offers a privacy 
policy for information submitted via NYC.gov, which includes the following statement:

"The City does not rent or sell personally identifiable information (e.g., information such as name, address, phone number, e-mail, etc., or other information that identifies or could lead to the identification of a user as a particular person), nor would the City exchange or trade such information with third-parties without a user's explicit permission" (https://www.nyc.gov/main/nyc-gov-privacy-policy).

In the database which we have created for this project, there is (ssynthetic) PII, including call operator names and SSNs.
As such, this information (along with salary) is redacted from both the analyst and guest roles. The analyst is also prohibited from accessing
the addresses of call centers; the guest is limited even further, unable to see call center boroughs (or taxi or road details).
Neither a data analyst or guest would have a valid reason to access personal details of call center employees; presumably that information
would be restricted to database administrators and members of HR.