In [1]:
%pip install happybase
%pip install pandas
import happybase
import pandas as pd

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [2]:
import happybase
c = happybase.Connection('localhost', 9090)
c.open()
print(c.tables())  # should show [b'courses']
t = c.table(b'courses')
for key, data in t.scan():
    print(key, data)

# result displayed are createed using HBase shell commands

[b'courses']
b'CSE101' {b'info:credits': b'4', b'info:name': b'Intro to CS', b'students:1': b'Aarav', b'students:3': b'Bibek'}
b'MAT201' {b'info:credits': b'4', b'info:name': b'Calculus II', b'students:1': b'Sujal', b'students:2': b'Shristi'}
b'PHY150' {b'info:credits': b'3', b'info:name': b'General Physics', b'students:1': b'Aayush', b'students:2': b'Nisha'}
b'STA210' {b'info:credits': b'3', b'info:name': b'Applied Statistics I', b'students:1': b'Kritika', b'students:2': b'Sagar', b'students:3': b'Anish'}


In [3]:
def put_course(t, course_id, name, credits, students):
    d = {b"info:name": name.encode(), b"info:credits": str(credits).encode()}
    for i, s in enumerate(students, 1):
        d[f"students:{i}".encode()] = s.encode()
    t.put(course_id.encode(), d)

In [4]:
def print_all(t):
    for rk, cols in t.scan():
        name = cols.get(b"info:name", b"").decode()
        credits = cols.get(b"info:credits", b"").decode()
        students = sorted([(k.decode(), v.decode()) for k, v in cols.items() if k.startswith(b"students:")], key=lambda kv: int(kv[0].split(":")[1]))
        print(rk.decode(), "|", name, "| credits:", credits, "| students:", ", ".join(s for _, s in students))


In [5]:
def to_df(t):
    rec = []
    for rk, cols in t.scan():
        course = rk.decode()
        name = cols.get(b"info:name", b"").decode()
        cr = cols.get(b"info:credits", b"0").decode()
        try: credits = int(cr)
        except: credits = None
        students = [v.decode() for k, v in cols.items() if k.startswith(b"students:")]
        rec.append({"course_id": course, "name": name, "credits": credits, "num_students": len(students), "students": sorted(students)})
    rec.sort(key=lambda r: r["course_id"])
    return pd.DataFrame(rec, columns=["course_id","name","credits","num_students","students"])

In [6]:
c = happybase.Connection(host="localhost", port=9090, autoconnect=False); c.open()
if b"courses" not in set(c.tables()):
    c.create_table('courses', {'info': dict(max_versions=1), 'students': dict(max_versions=1)})
t = c.table(b"courses")



In [7]:
put_course(t, "STA210", "Statistics I", 3, ["Kritika", "Sagar", "Anish"])
put_course(t, "ENG101", "Academic Writing", 3, ["Priyanshi", "Rupesh", "Neha"])

In [8]:
print_all(t)

CSE101 | Intro to CS | credits: 4 | students: Aarav, Bibek
ENG101 | Academic Writing | credits: 3 | students: Priyanshi, Rupesh, Neha
MAT201 | Calculus II | credits: 4 | students: Sujal, Shristi
PHY150 | General Physics | credits: 3 | students: Aayush, Nisha
STA210 | Statistics I | credits: 3 | students: Kritika, Sagar, Anish


In [9]:
t.put(b"STA210", {b"info:name": b"Applied Statistics I"})
t.delete(b"ENG101")

In [10]:
for key, data in t.scan():
    print(key, data)

b'CSE101' {b'info:credits': b'4', b'info:name': b'Intro to CS', b'students:1': b'Aarav', b'students:3': b'Bibek'}
b'MAT201' {b'info:credits': b'4', b'info:name': b'Calculus II', b'students:1': b'Sujal', b'students:2': b'Shristi'}
b'PHY150' {b'info:credits': b'3', b'info:name': b'General Physics', b'students:1': b'Aayush', b'students:2': b'Nisha'}
b'STA210' {b'info:credits': b'3', b'info:name': b'Applied Statistics I', b'students:1': b'Kritika', b'students:2': b'Sagar', b'students:3': b'Anish'}


In [11]:
df = to_df(t)
df

Unnamed: 0,course_id,name,credits,num_students,students
0,CSE101,Intro to CS,4,2,"[Aarav, Bibek]"
1,MAT201,Calculus II,4,2,"[Shristi, Sujal]"
2,PHY150,General Physics,3,2,"[Aayush, Nisha]"
3,STA210,Applied Statistics I,3,3,"[Anish, Kritika, Sagar]"


In [12]:
def get_courses_for_student(table, student_name: str):
    courses = []
    for rk, cols in table.scan():
        for k, v in cols.items():
            if k.startswith(b"students:") and v.decode() == student_name:
                courses.append(rk.decode())
                break
    return courses

In [13]:
def average_students_per_course(table):
    total_students, total_courses = 0, 0
    for rk, cols in table.scan():
        student_count = sum(1 for k in cols if k.startswith(b"students:"))
        total_students += student_count
        total_courses += 1
    return total_students / total_courses if total_courses > 0 else 0.0

In [14]:
c = happybase.Connection('localhost', 9090); c.open()
t = c.table(b"courses")

print("Courses for Aarav:", get_courses_for_student(t, "Aarav"))
print("Average students per course:", average_students_per_course(t))


Courses for Aarav: ['CSE101']
Average students per course: 2.25
