In [None]:
import pandas as pd
from sqlalchemy import create_engine

# Function to read database credentials from a file
def read_db_creds(file_path):
    creds = {}
    with open(file_path, 'r') as f:
        for line in f:
            key, value = line.strip().split('=')
            creds[key] = value
    return creds

# Read credentials from dbcreds.txt

creds = read_db_creds('dbcreds.txt')


# Define the connection string
DATABASE_TYPE = 'postgresql'
DBAPI = 'psycopg2'
HOST = creds['host']
USER = creds['user']
PASSWORD = creds['password']
DATABASE = 'univ'
PORT = int(creds['port'])

# Create the connection string
connection_string = f"{DATABASE_TYPE}+{DBAPI}://{USER}:{PASSWORD}@{HOST}:{PORT}/{DATABASE}"

# Create an engine that connects to the database
engine = create_engine(connection_string)


In [67]:
# Select distinct
query = '''
SELECT DISTINCT building
FROM classroom
WHERE capacity < 100;
'''

In [68]:
df = pd.read_sql_query(query, engine)
df

Unnamed: 0,building
0,Taylor
1,Painter
2,Watson


In [69]:
# Select ALL
query = '''
SELECT ALL building
FROM classroom
WHERE capacity < 100;
'''

In [70]:
df = pd.read_sql_query(query, engine)
df

Unnamed: 0,building
0,Painter
1,Taylor
2,Watson
3,Watson


In [71]:
# Cartesian product (and more)
query = '''
select name, budget
from student, department
where student.dept_name = department.dept_name and budget < 100000;
'''

In [72]:
df = pd.read_sql_query(query, engine)
df

Unnamed: 0,name,budget
0,Brandt,50000.0
1,Peltier,70000.0
2,Levy,70000.0
3,Sanchez,80000.0
4,Snow,70000.0
5,Aoi,85000.0
6,Bourikas,85000.0
7,Tanaka,90000.0


In [73]:
# Rename AS operation
query = '''
select S.name as studentname, budget as deptbudget
from student as S, department as D
where S.dept_name = D.dept_name and budget < 100000;
'''

In [74]:
df = pd.read_sql_query(query, engine)
df

Unnamed: 0,studentname,deptbudget
0,Brandt,50000.0
1,Peltier,70000.0
2,Levy,70000.0
3,Sanchez,80000.0
4,Snow,70000.0
5,Aoi,85000.0
6,Bourikas,85000.0
7,Tanaka,90000.0


In [2]:
# Example query to get data from a table
query = "SELECT * FROM student;"

# Read data into a pandas DataFrame
df = pd.read_sql(query, engine)

# Display the DataFrame
df.head()

Unnamed: 0,id,name,dept_name,tot_cred
0,128,Zhang,Comp. Sci.,102.0
1,12345,Shankar,Comp. Sci.,32.0
2,19991,Brandt,History,80.0
3,23121,Chavez,Finance,110.0
4,44553,Peltier,Physics,56.0


In [12]:
query = '''
SELECT name
FROM instructor I, department D
WHERE D.dept_name = I.dept_name
    AND (I.dept_name = 'Finance'
        OR building in ('Watson', 'Taylor'));
'''

In [13]:
# Read data into a pandas DataFrame
df = pd.read_sql(query, engine)

# Display the DataFrame
df

Unnamed: 0,name
0,Srinivasan
1,Wu
2,Einstein
3,Gold
4,Katz
5,Singh
6,Crick
7,Brandt
8,Kim


In [34]:
query1 = '''
SELECT * FROM course;
'''

In [35]:
# Read data into a pandas DataFrame
df = pd.read_sql(query1, engine)

# Display the DataFrame
df

Unnamed: 0,course_id,title,dept_name,credits
0,BIO-101,Intro. to Biology,Biology,4.0
1,BIO-301,Genetics,Biology,4.0
2,BIO-399,Computational Biology,Biology,3.0
3,CS-101,Intro. to Computer Science,Comp. Sci.,4.0
4,CS-190,Game Design,Comp. Sci.,4.0
5,CS-315,Robotics,Comp. Sci.,3.0
6,CS-319,Image Processing,Comp. Sci.,3.0
7,CS-347,Database System Concepts,Comp. Sci.,3.0
8,EE-181,Intro. to Digital Systems,Elec. Eng.,3.0
9,FIN-201,Investment Banking,Finance,3.0


In [38]:
query = '''
SELECT title
FROM course
WHERE course_id LIKE '___-%%';
'''
#  need %% here instead of % because % is a special character in python

In [39]:
# Read data into a pandas DataFrame
df = pd.read_sql(query, engine) 

# Display the DataFrame
df

Unnamed: 0,title
0,Intro. to Biology
1,Genetics
2,Computational Biology
3,Investment Banking
4,World History
5,Physical Principles


In [40]:
# order by
query = '''
SELECT name, dept_name, tot_cred
FROM student
ORDER BY dept_name ASC, tot_cred DESC;
'''

In [41]:
# Read data into a pandas DataFrame
df = pd.read_sql(query, engine) 

# Display the DataFrame
df

Unnamed: 0,name,dept_name,tot_cred
0,Tanaka,Biology,120.0
1,Zhang,Comp. Sci.,102.0
2,Brown,Comp. Sci.,58.0
3,Williams,Comp. Sci.,54.0
4,Shankar,Comp. Sci.,32.0
5,Bourikas,Elec. Eng.,98.0
6,Aoi,Elec. Eng.,60.0
7,Chavez,Finance,110.0
8,Brandt,History,80.0
9,Sanchez,Music,38.0


In [42]:
# In Operator
query = '''
SELECT course_id
from teaches
where semester in ('Fall', 'Spring')
    and year=2018;
'''

In [43]:
# Read data into a pandas DataFrame
df = pd.read_sql(query, engine) 

# Display the DataFrame
df

Unnamed: 0,course_id
0,CS-315
1,FIN-201
2,MU-199
3,HIS-351
4,CS-101
5,CS-319
6,CS-319


In [44]:
# union
query = '''
SELECT course_id
from teaches
where semester = 'Fall'
    and year = 2018
UNION
SELECT course_id
from teaches
where semester = 'Spring'
    and year = 2018;
'''

In [45]:
# Read data into a pandas DataFrame
df = pd.read_sql(query, engine) 

# Display the DataFrame
df

Unnamed: 0,course_id
0,CS-101
1,CS-315
2,CS-319
3,FIN-201
4,HIS-351
5,MU-199


In [48]:
# intersect
query = '''
select name
from instructor
where dept_name in ('Comp. Sci.', 'Finance')
intersect
select name
from instructor
where salary < 80000;
'''

In [49]:
# Read data into a pandas DataFrame
df = pd.read_sql(query, engine)

# Display the DataFrame
df

Unnamed: 0,name
0,Srinivasan
1,Katz


In [50]:
# except
query = '''
select name
from instructor
where dept_name in ('Comp. Sci.', 'Finance')
except
select name
from instructor
where salary < 90000 and salary > 70000;
'''

In [51]:
# Read data into a pandas DataFrame
df = pd.read_sql(query, engine) 

# Display the DataFrame
df

Unnamed: 0,name
0,Srinivasan
1,Brandt
2,Wu


In [52]:
# avg
query = '''
select building, avg(capacity)
from classroom
group by building
having avg(capacity) > 25;
'''

In [53]:
# Read data into a pandas DataFrame
df = pd.read_sql(query, engine) 

# Display the DataFrame
df

Unnamed: 0,building,avg
0,Taylor,70.0
1,Packard,500.0
2,Watson,40.0


In [54]:
# min
query = '''
select min(salary) as least_salary
from instructor;
'''

In [55]:
# Read data into a pandas DataFrame
df = pd.read_sql(query, engine) 

# Display the DataFrame
df

Unnamed: 0,least_salary
0,40000.0


In [56]:
# max
query = '''
select max(tot_cred) as max_credits
from student;
'''

In [57]:
# Read data into a pandas DataFrame
df = pd.read_sql(query, engine) 

# Display the DataFrame
df

Unnamed: 0,max_credits
0,120.0


In [58]:
# count
query = '''
select building, count(course_id) as course_count
from section
group by building;
'''                   

In [59]:
# Read data into a pandas DataFrame
df = pd.read_sql(query, engine) 

# Display the DataFrame
df

Unnamed: 0,building,course_count
0,Taylor,5
1,Packard,4
2,Painter,3
3,Watson,3


In [60]:
# sum
query = '''
select dept_name, sum(credits) as sum_credits
from course
group by dept_name;
'''

In [61]:
# Read data into a pandas DataFrame
df = pd.read_sql(query, engine) 

# Display the DataFrame
df

Unnamed: 0,dept_name,sum_credits
0,Finance,3.0
1,History,3.0
2,Physics,4.0
3,Music,3.0
4,Comp. Sci.,17.0
5,Biology,11.0
6,Elec. Eng.,3.0
