### Step 1. Download the college normalized dataset:
#### gsutil cp gs://cs327e-open-access/college_normalized.zip .
#### unzip college_normalized.zip

### Step 2: Run code samples in sequence to populate Firestore database

In [1]:
import pandas as pd
from google.cloud import firestore
db = firestore.Client()

In [2]:
batch = db.batch()

df = pd.read_csv('/home/jupyter/college_normalized/class.csv', sep=',', header=0, lineterminator='\n')
rows = df.values.tolist()

for row in rows:
    
    record = {}
    record['cno'] = row[0]
    record['cname'] = row[1]
    record['credits'] = row[2]
    
    class_ref = db.collection('class').document(row[0])
    
    batch.set(class_ref, record)
    
batch.commit()

[update_time {
   seconds: 1614989409
   nanos: 101582000
 },
 update_time {
   seconds: 1614989409
   nanos: 101582000
 },
 update_time {
   seconds: 1614989409
   nanos: 101582000
 },
 update_time {
   seconds: 1614989409
   nanos: 101582000
 },
 update_time {
   seconds: 1614989409
   nanos: 101582000
 },
 update_time {
   seconds: 1614989409
   nanos: 101582000
 },
 update_time {
   seconds: 1614989409
   nanos: 101582000
 },
 update_time {
   seconds: 1614989409
   nanos: 101582000
 },
 update_time {
   seconds: 1614989409
   nanos: 101582000
 },
 update_time {
   seconds: 1614989409
   nanos: 101582000
 },
 update_time {
   seconds: 1615046007
   nanos: 865656000
 }]

In [3]:
batch = db.batch()

df = pd.read_csv('/home/jupyter/college_normalized/student.csv', sep=',', header=0, lineterminator='\n')
student_rows = df.values.tolist()

df = pd.read_csv('/home/jupyter/college_normalized/takes.csv', sep=',', header=0, lineterminator='\n')
takes_rows = df.values.tolist()

for student_row in student_rows:
    
    student_record = {}
    student_record['sid'] = student_row[0]
    student_record['fname'] = student_row[1]
    student_record['lname'] = student_row[2]
    student_record['dob'] = student_row[3]
    student_record['status'] = student_row[4]
    
    student_ref = db.collection('student').document(student_row[0])
    
    batch.set(student_ref, student_record)
    
    for takes_row in takes_rows:
        if student_row[0] == takes_row[0]:
            
            class_record = {}
            class_record['cno'] = takes_row[1]
            class_record['cname'] = takes_row[2]
            class_record['credits'] = takes_row[3]
            class_record['grade'] = takes_row[4]
            
            classes_ref = student_ref.collection('classes').document(takes_row[1])
        
            batch.set(classes_ref, class_record)
    
    
    batch.commit()
    

In [4]:
batch = db.batch()

df = pd.read_csv('/home/jupyter/college_normalized/instructor.csv', sep=',', header=0, lineterminator='\n')
instructor_rows = df.values.tolist()

df = pd.read_csv('/home/jupyter/college_normalized/teaches.csv', sep=',', header=0, lineterminator='\n')
teaches_rows = df.values.tolist()

for instructor_row in instructor_rows:
    
    instructor_record = {}
    instructor_record['tid'] = instructor_row[0]
    instructor_record['instructor_name'] = instructor_row[1]
    instructor_record['dept'] = instructor_row[2]
    
    instructor_ref = db.collection('instructor').document(instructor_row[0])
    
    batch.set(instructor_ref, instructor_record)
    
    for teaches_row in teaches_rows:
        
        if instructor_row[0] == teaches_row[0]:
            
            class_record = {}
            class_record['cno'] = teaches_row[1]
            class_record['cname'] = teaches_row[2]
            class_record['credits'] = teaches_row[3]
            
            classes_ref = instructor_ref.collection('classes').document(teaches_row[1])
        
            batch.set(classes_ref, class_record)
    
    batch.commit()

### Step 3: Run some queries

In [5]:
student_ref = db.collection('student').document('paulg')
result = student_ref.get()

if result.exists:
    print(f'{result.id} => {result.to_dict()}')
else:
    print('No such student')

paulg => {'lname': 'Gore', 'sid': 'paulg', 'fname': 'Paul', 'status': 'CUR', 'dob': '2000-09-17'}


In [6]:
student_ref = db.collection('student')
query = student_ref.where('status', '==', 'CUR')
results = query.stream()

for result in results:
    print(f'{result.id} => {result.to_dict()}')

aprilz => {'dob': '2000-10-01', 'sid': 'aprilz', 'fname': 'April', 'status': 'CUR', 'lname': 'Lopez'}
bzen26 => {'dob': '1998-04-22', 'lname': 'Zen', 'sid': 'bzen26', 'status': 'CUR', 'fname': 'Biswa'}
jc => {'status': 'CUR', 'sid': 'jc', 'lname': 'Cowe', 'dob': '2000-04-22', 'fname': 'James'}
jerryh => {'dob': '1999-01-03', 'fname': 'Jerry', 'status': 'CUR', 'lname': 'Hargrove', 'sid': 'jerryh'}
kev18 => {'sid': 'kev18', 'fname': 'Kevin', 'dob': '1999-10-05', 'lname': 'Lin', 'status': 'CUR'}
paulg => {'status': 'CUR', 'lname': 'Gore', 'dob': '2000-09-17', 'sid': 'paulg', 'fname': 'Paul'}
sudeepa4 => {'fname': 'Sudeepa', 'sid': 'sudeepa4', 'status': 'CUR', 'lname': 'Roy', 'dob': '2001-10-01'}


In [7]:
query = db.collection('instructor').document('mitra').collection('classes')

results = query.stream()

for result in results:
    print(f'{result.id} => {result.to_dict()}')

CS313E => {'cno': 'CS313E', 'credits': 3, 'cname': 'Elements of Software Engineering'}
CS329E => {'cname': 'Elements of Web Programming', 'credits': 3, 'cno': 'CS329E'}


In [8]:
classes = db.collection_group('classes').where('credits', '==', 3)
docs = classes.stream()
for doc in docs:
    print(f'{doc.id} => {doc.to_dict()}')

CS303E => {'cname': 'Elements of Computers and Programming', 'cno': 'CS303E', 'credits': 3}
CS326E => {'credits': 3, 'cname': 'Elements of Networking', 'cno': 'CS326E'}
CS347 => {'cname': 'Data Management', 'cno': 'CS347', 'credits': 3}
CS373 => {'cname': 'Software Engineering', 'cno': 'CS373', 'credits': 3}
CS331E => {'cno': 'CS331E', 'credits': 3, 'cname': 'Elements of Software Engineering II'}
M328K => {'credits': 3, 'cname': 'Intro to Number Theory', 'cno': 'M328K'}
CS313E => {'credits': 3, 'cno': 'CS313E', 'cname': 'Elements of Software Engineering'}
CS329E => {'cno': 'CS329E', 'cname': 'Elements of Web Programming', 'credits': 3}
M362K => {'cname': 'Probability I', 'cno': 'M362K', 'credits': 3}
M362K => {'credits': 3, 'cname': 'Probability I', 'cno': 'M362K'}
CS327E => {'credits': 3, 'cname': 'Elements of Databases', 'cno': 'CS327E'}
M358K => {'credits': 3, 'cname': 'Applied Statistics', 'cno': 'M358K'}
CS373 => {'cname': 'Software Engineering', 'grade': 'B+', 'cno': 'CS373', 'cr

In [9]:
instructor_ref = db.collection('instructor')
results = instructor_ref.stream()

for result in results:
    print(f'{result.id} => {result.to_dict()}')
    sresults = instructor_ref.document(result.id).collection('classes').stream()
    
    for sresult in sresults:
        print(f'{sresult.id} => {sresult.to_dict()}')

bulko => {'tid': 'bulko', 'dept': 'Computer Science', 'instructor_name': 'Bill Bulko'}
CS303E => {'credits': 3, 'cname': 'Elements of Computers and Programming', 'cno': 'CS303E'}
cannata => {'tid': 'cannata', 'dept': 'Computer Science', 'instructor_name': 'Phil Cannata'}
CS326E => {'credits': 3, 'cno': 'CS326E', 'cname': 'Elements of Networking'}
CS347 => {'credits': 3, 'cname': 'Data Management', 'cno': 'CS347'}
downing => {'instructor_name': 'Glenn Downing', 'tid': 'downing', 'dept': 'Computer Science'}
CS373 => {'cname': 'Software Engineering', 'cno': 'CS373', 'credits': 3}
fares => {'tid': 'fares', 'dept': 'Computer Science', 'instructor_name': 'Fares Fraij'}
CS331E => {'cno': 'CS331E', 'cname': 'Elements of Software Engineering II', 'credits': 3}
koch => {'instructor_name': 'Hans Koch', 'dept': 'Mathematics', 'tid': 'koch'}
M328K => {'credits': 3, 'cname': 'Intro to Number Theory', 'cno': 'M328K'}
mitra => {'dept': 'Computer Science', 'instructor_name': 'Shyamal Mitra', 'tid': 'mi