# Setup

In [1]:
# Environment setup
import pandas as pd
import numpy as np
# import requests
import os
from pathlib import Path
from dotenv import load_dotenv

# Load .env from the project root (parent of data_cleaning)
load_dotenv(Path.cwd().resolve().parent / ".env")
GOOGLE_SHEETS_API_KEY = os.getenv('GOOGLE_SHEETS_API_KEY')
PROJECTS_SHEET_ID = os.getenv('PROJECTS_SHEET_ID')
ACTIVES_SHEET_ID = os.getenv('ACTIVES_SHEET_ID')
FALL_ATTENDANCE_SHEET_ID = os.getenv('FALL_ATTENDANCE_SHEET_ID')
BASE_URL = "https://sheets.googleapis.com/v4/spreadsheets"
load_dotenv(Path.cwd().resolve().parent / ".env")

True

In [2]:
#def get_sheet_titles(spreadsheet_id: str, api_key: str) -> list[str]:
#    """Return all sheet/tab titles in the spreadsheet."""
#    resp = requests.get(
#        f"{BASE_URL}/{spreadsheet_id}",
#        params={
#            "fields": "sheets(properties(title))",
#            "key": api_key,
#        },
#        timeout=30,
#    )
#    resp.raise_for_status()
#    data = resp.json()
#    return [s["properties"]["title"] for s in data.get("sheets", [])]

In [3]:
#def fetch_values_batch(spreadsheet_id: str, api_key: str, sheet_titles: list[str]) -> dict[str, list[list]]:
#    """Batch fetch values for provided sheet titles.
#    Returns mapping of title -> 2D list of cell values (including header row).
#    """
#    if not sheet_titles:
#        return {}
#    # Multiple 'ranges' params are supported by the API
#    params = [("key", api_key), ("valueRenderOption", "UNFORMATTED_VALUE"), ("dateTimeRenderOption", "FORMATTED_STRING")]
#    params.extend(("ranges", title) for title in sheet_titles)
#    resp = requests.get(
#        f"{BASE_URL}/{spreadsheet_id}/values:batchGet",
#        params=params,
#        timeout=60,
#    )
#    resp.raise_for_status()
#    payload = resp.json()

#    values_by_title: dict[str, list[list]] = {}
#    for vr, title in zip(payload.get("valueRanges", []), sheet_titles):
#        values_by_title[title] = vr.get("values", [])
#    return values_by_title

In [4]:
#def values_to_dataframe(values: list[list]) -> pd.DataFrame:
#    """Convert a 2D list from Sheets API to a DataFrame, using first row as header.
#    Pads short rows so all rows match header length.
#    """
#    if not values:
#        return pd.DataFrame()
#    header = [str(h) for h in values[0]]
#    rows = values[1:] if len(values) > 1 else []
#    normalized_rows = [row + [""] * (len(header) - len(row)) for row in rows]
#    return pd.DataFrame(normalized_rows, columns=header)

In [5]:
#def fetch_spreadsheet_as_dataframes(spreadsheet_id: str, api_key: str) -> dict[str, pd.DataFrame]:
#    """Fetch all sheets in a spreadsheet and return {sheet_title: DataFrame}."""
#    titles = get_sheet_titles(spreadsheet_id, api_key)
#    if not titles:
#        return {}
#    values_by_title = fetch_values_batch(spreadsheet_id, api_key, titles)
#    return {title: values_to_dataframe(values_by_title.get(title, [])) for title in titles}

# Load data

In [6]:
# # fetch all data from Google Sheets into DataFrames per sheet
# # Build DataFrames per sheet for each spreadsheet ID found in .env
# PROJECTS_DFS = fetch_spreadsheet_as_dataframes(PROJECTS_SHEET_ID, GOOGLE_SHEETS_API_KEY) if PROJECTS_SHEET_ID else {}
# ACTIVES_DFS = fetch_spreadsheet_as_dataframes(ACTIVES_SHEET_ID, GOOGLE_SHEETS_API_KEY) if ACTIVES_SHEET_ID else {}
# FALL_ATTENDANCE_DFS = fetch_spreadsheet_as_dataframes(FALL_ATTENDANCE_SHEET_ID, GOOGLE_SHEETS_API_KEY) if FALL_ATTENDANCE_SHEET_ID else {}
# Load data from CSV URLs exported from Google Sheets
projects_df = pd.read_csv(os.getenv('PROJECTS_SHEET_URL'))
actives_df = pd.read_csv(os.getenv('ACTIVES_SHEET_URL'))
attendance_df = pd.read_csv(os.getenv('ATTENDANCE_URL'))

In [7]:
actives_df.head()

Unnamed: 0,Timestamp,Name,Year,Are you planning to be an active member this quarter? (All actives have to pay dues)
0,10/2/2025 19:37:26,Spencer,3rd,Yes
1,10/2/2025 19:37:27,Jayden Patel,2nd,Yes
2,10/2/2025 19:39:01,Mattie freaking dao,2nd,Yes
3,10/2/2025 19:42:19,aastha,3rd,Yes
4,10/2/2025 19:42:52,Mihir Joshi,4th,Yes


In [8]:
actives_df['Timestamp'] = pd.to_datetime(actives_df['Timestamp'])
actives_df['Name'] = actives_df['Name'].str.lower()
actives_df['Year'] = actives_df['Year'].map({'1st': 1, '2nd': 2, '3rd': 3, '4th': 4, '5th': 5}).fillna(actives_df['Year'])
actives_df['Are you planning to be an active member this quarter? (All actives have to pay dues)'] = actives_df.iloc[:, 3].apply(lambda x: True if x == 'Yes' else False)

In [9]:
actives_df.dtypes

Timestamp                                                                               datetime64[ns]
Name                                                                                            object
Year                                                                                             int64
Are you planning to be an active member this quarter? (All actives have to pay dues)              bool
dtype: object

In [10]:
projects_df['Status'] = projects_df['Status'].fillna("")
projects_df.columns = ['quarter', 'company', 'point_of_contact', 'project_manager', 'associates',
       'nda', 'status', 'description']

In [11]:
projects_df['nda'] = projects_df['nda'].apply(lambda x: True if x == "Yes" else False)

In [12]:
projects_df.head()

Unnamed: 0,quarter,company,point_of_contact,project_manager,associates,nda,status,description
0,S23,CARI Health,,Ashley Lee & Edmond Hong,,False,,The project focused on two primary objectives:...
1,FA23,UCSD Craft Center,,Sydney Jang,,True,,TCG identified strategies to enhance client en...
2,FA23,MD Revolution,,Dhathry Doppalapudi,,False,,The project focused on completing a competitiv...
3,FA23,Atmo Biosciences,,Isabel Wang,,True,,This project focused on conducting market rese...
4,WI24,Empirical,,Daniel Woo,,True,,Created a list of companies aligning with Empi...


In [13]:
unique_companies = projects_df['company'].unique()
companies_range = range(len(projects_df['company'].unique()))
company_inds = dict(zip(unique_companies, companies_range))
companies_df = pd.DataFrame({"company_id": companies_range, "name": unique_companies})
                            
projects_df['company'] = projects_df['company'].replace(company_inds)

# company

In [14]:
companies_df.head()

Unnamed: 0,company_id,name
0,0,CARI Health
1,1,UCSD Craft Center
2,2,MD Revolution
3,3,Atmo Biosciences
4,4,Empirical


# project

In [15]:
projects_df.head()

Unnamed: 0,quarter,company,point_of_contact,project_manager,associates,nda,status,description
0,S23,0,,Ashley Lee & Edmond Hong,,False,,The project focused on two primary objectives:...
1,FA23,1,,Sydney Jang,,True,,TCG identified strategies to enhance client en...
2,FA23,2,,Dhathry Doppalapudi,,False,,The project focused on completing a competitiv...
3,FA23,3,,Isabel Wang,,True,,This project focused on conducting market rese...
4,WI24,4,,Daniel Woo,,True,,Created a list of companies aligning with Empi...


In [16]:
projects_df['status'] = projects_df['status'].replace('', '-')
projects_df['donated'] = (projects_df['status'] == 'Donated')
projects_df['dnf'] = (projects_df['status'] == 'Did not finish')
projects_df['donated'] = (projects_df['status'] == 'Donated')
projects_df['point_of_contact'] = projects_df['point_of_contact'].replace(np.nan, '-')
projects_df['associates'] = projects_df['associates'].replace(np.nan, '-')

In [17]:
projects_df.head()

Unnamed: 0,quarter,company,point_of_contact,project_manager,associates,nda,status,description,donated,dnf
0,S23,0,-,Ashley Lee & Edmond Hong,-,False,-,The project focused on two primary objectives:...,False,False
1,FA23,1,-,Sydney Jang,-,True,-,TCG identified strategies to enhance client en...,False,False
2,FA23,2,-,Dhathry Doppalapudi,-,False,-,The project focused on completing a competitiv...,False,False
3,FA23,3,-,Isabel Wang,-,True,-,This project focused on conducting market rese...,False,False
4,WI24,4,-,Daniel Woo,-,True,-,Created a list of companies aligning with Empi...,False,False


# quarter

In [18]:
projects_df['quarter'] = (projects_df['quarter'].str.upper()
    .str.replace(r'\bS(\d{2})\b', r'SP\1', regex=True)
    .str.replace(r'\bF(\d{2})\b', r'FA\1', regex=True)
    .str.replace(r'\bW(\d{2})\b', r'WI\1', regex=True)
)
quarter_df = pd.DataFrame({"quarter_id": projects_df['quarter'].unique()})
quarter_df.head()

Unnamed: 0,quarter_id
0,SP23
1,FA23
2,WI24
3,SP24
4,FA24


In [19]:
# projects_df['associates']

# member

In [20]:
coffee = pd.read_csv('coffee.csv')

In [21]:
df = pd.read_csv('members.csv')

In [22]:
members = pd.DataFrame(columns=['member_id', 'name', 'year', 'status', 'role', 'email'])
members['name'] = coffee['x']
members['year'] = coffee['Year']
members['status'] = 0
members['role'] = '-'
members['email'] = '-'
members['member_id'] = members.index

active_names = df['name'].str.lower().str.strip().tolist()

for idx, member_name in enumerate(members['name']):
    member_name_lower = str(member_name).lower().strip()
    
    for active_name in active_names:
        active_name_lower = active_name.lower().strip()
       
        if active_name_lower in member_name_lower:
            members.loc[idx, 'status'] = 1
            break
       
        member_first_name = member_name_lower.split()[0] if member_name_lower else ''
        active_first_name = active_name_lower.split()[0] if active_name_lower else ''
        if member_first_name and active_first_name and member_first_name == active_first_name:
            members.loc[idx, 'status'] = 1
            break

members.head()

Unnamed: 0,member_id,name,year,status,role,email
0,0,Aastha Shah,3rd,1,-,-
1,1,Aathi Muthu,3rd,1,-,-
2,2,Aatish Mandalapu,4th,1,-,-
3,3,Aditi Parthan,3rd,1,-,-
4,4,Advay Pradhan,2nd,1,-,-


In [23]:
# Load emails and match them to members by name
emails_df = pd.read_csv('emails.csv')

def extract_name_parts_from_email(email):
    """Extract potential name parts from email address username."""
    if pd.isna(email) or not email:
        return None, None
    username = str(email).split('@')[0].lower()
    # Replace common separators with spaces
    username_clean = username.replace('.', ' ').replace('_', ' ').replace('-', ' ')
    parts = [p for p in username_clean.split() if p.isalpha() and len(p) > 1]
    first_part = parts[0] if parts else username.split('.')[0].split('_')[0].split('-')[0]
    last_part = parts[-1] if len(parts) > 1 else None
    return first_part, last_part

# Match emails to members
for idx, member_name in enumerate(members['name']):
    if members.loc[idx, 'email'] != '-':  # Skip if already has email
        continue
        
    member_name_lower = str(member_name).lower().strip()
    member_parts = member_name_lower.split()
    member_first_name = member_parts[0] if member_parts else ''
    member_last_name = member_parts[-1] if len(member_parts) > 1 else ''
    member_full_name = ' '.join(member_parts)
    
    best_match = None
    best_match_score = 0
    
    # Try to match with each email
    for email in emails_df['email']:
        if pd.isna(email):
            continue
            
        email_username = str(email).split('@')[0].lower()
        email_first, email_last = extract_name_parts_from_email(email)
        
        # Calculate match score
        score = 0
        
        # Exact match on extracted name parts
        if email_first and member_first_name and email_first == member_first_name:
            score += 2
            if email_last and member_last_name and email_last == member_last_name:
                score += 3  # Strong match with both first and last
            elif len(member_parts) == 1:
                score += 1  # Only first name, but member only has one name
        
        # Check if email username contains member's name parts
        if member_first_name and member_first_name in email_username:
            score += 1
        if member_last_name and member_last_name in email_username:
            score += 2
        
        # Check if member name parts are in email username (reverse)
        if email_first and email_first in member_full_name:
            score += 1
        if email_last and email_last in member_full_name:
            score += 1
        
        # Prefer matches with higher scores
        if score > best_match_score:
            best_match_score = score
            best_match = email
    
    # Only assign email if we have a reasonable match (score >= 2)
    if best_match and best_match_score >= 2:
        members.loc[idx, 'email'] = best_match

members.head()


Unnamed: 0,member_id,name,year,status,role,email
0,0,Aastha Shah,3rd,1,-,-
1,1,Aathi Muthu,3rd,1,-,-
2,2,Aatish Mandalapu,4th,1,-,-
3,3,Aditi Parthan,3rd,1,-,aparthan@ucsd.edu
4,4,Advay Pradhan,2nd,1,-,adpradhan@ucsd.edu


In [24]:
members = pd.read_csv('members.csv')
members.head()

Unnamed: 0,member_id,name,year,status,role,email
0,0,Aastha Shah,3rd,1,-,-
1,1,Aathi Muthu,3rd,0,-,-
2,2,Aatish Mandalapu,4th,0,-,-
3,3,Aditi Parthan,3rd,0,-,aparthan@ucsd.edu
4,4,Advay Pradhan,2nd,0,-,adpradhan@ucsd.edu


# assignment

In [25]:
assignments = pd.DataFrame(columns=['member_id', 'project_id'])
assignments['member_id'] = members['member_id']
assignments['project_id'] = '-'
assignments.head()

Unnamed: 0,member_id,project_id
0,0,-
1,1,-
2,2,-
3,3,-
4,4,-


# gbm

In [26]:
import datetime
quarters = ['FA25', 'FA25', 'FA25']
dates = [datetime.datetime(2025, 10, 2), datetime.datetime(2025, 10, 22), datetime.datetime(2025, 10, 29)]
gbm = pd.DataFrame(columns=['gbm_id', 'quarter_id', 'date'])
gbm['quarter_id'] = quarters
gbm['date'] = dates
gbm['gbm_id'] = gbm.index
gbm.head()

Unnamed: 0,gbm_id,quarter_id,date
0,0,FA25,2025-10-02
1,1,FA25,2025-10-22
2,2,FA25,2025-10-29


# attendance

In [None]:
# Create attendance records for ALL members for ALL GBMs
# Status = False by default (not attended), can be updated with actual attendance data
attendance_list = []
for member_id in members['member_id']:
    for gbm_id in gbm['gbm_id']:
        attendance_list.append({
            'member_id': member_id,
            'gbm_id': gbm_id,
            'status': False  # Default to False (not attended)
        })

attendance = pd.DataFrame(attendance_list)
attendance.head()

Unnamed: 0,member_id,gbm_id,status
0,0,0.0,-
1,1,1.0,-
2,2,2.0,-
3,3,,-
4,4,,-


In [None]:
# Optional: Update attendance with actual data
# If you have actual attendance data, you can update the status here
# Example: Mark specific members as attended for specific GBMs
# attendance.loc[(attendance['member_id'] == 5) & (attendance['gbm_id'] == 0), 'status'] = True

# Or load from attendance CSV if available:
# attendance_df = pd.read_csv(os.getenv('ATTENDANCE_URL'))
# Then match and update attendance status based on your data structure


# enrollment

In [28]:
enrollment = pd.DataFrame(columns=['member_id', 'quarter_id'])
enrollment['member_id'] = members[members['status'] == 1]['member_id']
enrollment['quarter_id'] = 'FA25'
enrollment.head()

Unnamed: 0,member_id,quarter_id
0,0,FA25
9,9,FA25
10,10,FA25
11,11,FA25
12,12,FA25


# assignment & project (2)

In [29]:
projects_df.head()

Unnamed: 0,quarter,company,point_of_contact,project_manager,associates,nda,status,description,donated,dnf
0,SP23,0,-,Ashley Lee & Edmond Hong,-,False,-,The project focused on two primary objectives:...,False,False
1,FA23,1,-,Sydney Jang,-,True,-,TCG identified strategies to enhance client en...,False,False
2,FA23,2,-,Dhathry Doppalapudi,-,False,-,The project focused on completing a competitiv...,False,False
3,FA23,3,-,Isabel Wang,-,True,-,This project focused on conducting market rese...,False,False
4,WI24,4,-,Daniel Woo,-,True,-,Created a list of companies aligning with Empi...,False,False


In [30]:
projects_df['associates']

0                                                     -
1                                                     -
2                                                     -
3                                                     -
4                                                     -
5                                                     -
6                                                     -
7                                                     -
8                                                     -
9                                                     -
10                                                    -
11                                                    -
12                                                    -
13                                                    -
14                                                    -
15                                                    -
16                                                    -
17                                              

In [31]:
assig_arr = {"member_id": [], "project_id": []}

In [32]:
def match_project_assignments(project_id, names_string):
    names = [n.strip() for n in names_string.split("\n") if n.strip()]
    for name in names:
        if (members['name'] == name).any():
            assig_arr['member_id'].append(members.loc[members['name'] == name].iloc[0]['member_id'])
            assig_arr['project_id'].append(project_id)
    return

In [33]:
for index, row in projects_df.iterrows():
    match_project_assignments(index, row['associates'])

In [34]:
assig_df = pd.DataFrame(assig_arr)
assig_df

Unnamed: 0,member_id,project_id
0,3,18
1,33,18
2,63,18
3,36,19
4,24,19
...,...,...
61,12,36
62,30,37
63,4,37
64,55,38


In [35]:
projects_df['project_id'] = range(0, len(projects_df))
projects_df = projects_df.rename(columns={'quarter': 'quarter_id', 'company': 'company_id'})

In [37]:
projects_df = projects_df.drop(['associates', 'status'], axis=1)
projects_df = projects_df[['project_id', 'quarter_id', 'company_id', 'point_of_contact', 'project_manager', 'nda', 'donated', 'dnf', 'description']]

In [38]:
projects_df.head()

Unnamed: 0,project_id,quarter_id,company_id,point_of_contact,project_manager,nda,donated,dnf,description
0,0,SP23,0,-,Ashley Lee & Edmond Hong,False,False,False,The project focused on two primary objectives:...
1,1,FA23,1,-,Sydney Jang,True,False,False,TCG identified strategies to enhance client en...
2,2,FA23,2,-,Dhathry Doppalapudi,False,False,False,The project focused on completing a competitiv...
3,3,FA23,3,-,Isabel Wang,True,False,False,This project focused on conducting market rese...
4,4,WI24,4,-,Daniel Woo,True,False,False,Created a list of companies aligning with Empi...


# Database Migration

In [None]:
import psycopg2
from psycopg2.extras import execute_values

# Get database connection URL
DATABASE_URL = os.getenv('SUPABASE_DIRECT_CONNECTION_URL')

if not DATABASE_URL:
    raise ValueError("SUPABASE_DIRECT_CONNECTION_URL not found in .env file")

print("Connecting to db")
conn = psycopg2.connect(DATABASE_URL)
cur = conn.cursor()
print("Connected successfully")


ModuleNotFoundError: No module named 'psycopg2'

In [None]:
# Drop existing tables and create new ones
drop_tables_sql = """
DROP TABLE IF EXISTS attendance CASCADE;
DROP TABLE IF EXISTS enrollment CASCADE;
DROP TABLE IF EXISTS assignment CASCADE;
DROP TABLE IF EXISTS gbm CASCADE;
DROP TABLE IF EXISTS project CASCADE;
DROP TABLE IF EXISTS member CASCADE;
DROP TABLE IF EXISTS quarter CASCADE;
DROP TABLE IF EXISTS company CASCADE;
"""

create_tables_sql = """
-- Company table
CREATE TABLE IF NOT EXISTS company (
    company_id INTEGER PRIMARY KEY,
    name TEXT NOT NULL
);

-- Quarter table
CREATE TABLE IF NOT EXISTS quarter (
    quarter_id TEXT PRIMARY KEY
);

-- Member table
CREATE TABLE IF NOT EXISTS member (
    member_id INTEGER PRIMARY KEY,
    name TEXT NOT NULL,
    year INTEGER,
    status BOOLEAN NOT NULL,
    role TEXT,
    email TEXT
);

-- Project table
CREATE TABLE IF NOT EXISTS project (
    project_id INTEGER PRIMARY KEY,
    quarter_id TEXT NOT NULL REFERENCES quarter(quarter_id),
    company_id INTEGER NOT NULL REFERENCES company(company_id),
    point_of_contact TEXT,
    project_manager TEXT,
    nda BOOLEAN NOT NULL,
    status TEXT,
    description TEXT
);

-- Assignment table (many-to-many between member and project)
CREATE TABLE IF NOT EXISTS assignment (
    member_id INTEGER NOT NULL REFERENCES member(member_id),
    project_id INTEGER NOT NULL REFERENCES project(project_id),
    PRIMARY KEY (member_id, project_id)
);

-- GBM (General Body Meeting) table
CREATE TABLE IF NOT EXISTS gbm (
    gbm_id INTEGER PRIMARY KEY,
    quarter_id TEXT NOT NULL REFERENCES quarter(quarter_id),
    date TIMESTAMP NOT NULL
);

-- Attendance table
CREATE TABLE IF NOT EXISTS attendance (
    member_id INTEGER NOT NULL REFERENCES member(member_id),
    gbm_id INTEGER NOT NULL REFERENCES gbm(gbm_id),
    status BOOLEAN NOT NULL,
    PRIMARY KEY (member_id, gbm_id)
);

-- Enrollment table (which members are enrolled in which quarters)
CREATE TABLE IF NOT EXISTS enrollment (
    member_id INTEGER NOT NULL REFERENCES member(member_id),
    quarter_id TEXT NOT NULL REFERENCES quarter(quarter_id),
    PRIMARY KEY (member_id, quarter_id)
);
"""

cur.execute(drop_tables_sql)
cur.execute(create_tables_sql)
conn.commit()


In [None]:
# Insert companies
companies_data = [(int(row['company_id']), str(row['name'])) 
                  for _, row in companies_df.iterrows()]
execute_values(cur, 
    "INSERT INTO company (company_id, name) VALUES %s",
    companies_data)
conn.commit()

In [None]:
# Insert quarters
quarters_data = [(str(row['quarter_id']),) for _, row in quarter_df.iterrows()]
execute_values(cur, 
    "INSERT INTO quarter (quarter_id) VALUES %s",
    quarters_data)
conn.commit()

In [None]:
# Insert members
members_data = [
    (int(row['member_id']), str(row['name']), 
     int(row['year']) if pd.notna(row['year']) else None,
     bool(row['status']), str(row['role']), str(row['email']))
    for _, row in members.iterrows()
]
execute_values(cur,
    "INSERT INTO member (member_id, name, year, status, role, email) VALUES %s",
    members_data)
conn.commit()
print(f"✓ Inserted {len(members_data)} members")


In [None]:
# Insert projects
projects_data = [
    (int(row['project_id']), str(row['quarter_id']), int(row['company_id']),
     str(row['point_of_contact']), str(row['project_manager']), bool(row['nda']),
     str(row['status']), str(row['description']))
    for _, row in projects_df.iterrows()
]
execute_values(cur,
    "INSERT INTO project (project_id, quarter_id, company_id, point_of_contact, project_manager, nda, status, description) VALUES %s",
    projects_data)
conn.commit()

In [None]:
# Insert assignments
if len(assig_df) > 0:
    assignments_data = [(int(row['member_id']), int(row['project_id'])) 
                       for _, row in assig_df.iterrows()]
    execute_values(cur,
        "INSERT INTO assignment (member_id, project_id) VALUES %s ON CONFLICT DO NOTHING",
        assignments_data)
    conn.commit()
    print(f"Inserted {len(assignments_data)} assignments")
else:
    print("No assignments to insert")


In [None]:
# Insert GBMs
gbm_data = [(int(row['gbm_id']), str(row['quarter_id']), row['date']) 
            for _, row in gbm.iterrows()]
execute_values(cur,
    "INSERT INTO gbm (gbm_id, quarter_id, date) VALUES %s",
    gbm_data)
conn.commit()


In [None]:
# Insert attendance (for ALL members)
attendance_data = [(int(row['member_id']), int(row['gbm_id']), bool(row['status']))
                   for _, row in attendance.iterrows()]
execute_values(cur,
    "INSERT INTO attendance (member_id, gbm_id, status) VALUES %s",
    attendance_data)
conn.commit()


In [None]:
# Insert enrollments
enrollment_data = [(int(row['member_id']), str(row['quarter_id'])) 
                   for _, row in enrollment.iterrows()]
execute_values(cur,
    "INSERT INTO enrollment (member_id, quarter_id) VALUES %s",
    enrollment_data)
conn.commit()


In [None]:
# Verify data and show summary
print("Database Summary:")

tables = ['company', 'quarter', 'member', 'project', 'assignment', 'gbm', 'attendance', 'enrollment']
for table in tables:
    cur.execute(f"SELECT COUNT(*) FROM {table}")
    count = cur.fetchone()[0]
    print(f"{table.ljust(15)}: {count} rows")

# Close connection
cur.close()
conn.close()
