In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

import os, shutil, subprocess, getpass, urllib.parse

GITHUB_USER = "avahuu"
REPO_NAME   = "cal_school"
BRANCH      = "main"

DRIVE_DIR   = "/content/drive/MyDrive/github"
LOCAL_PATH  = f"{DRIVE_DIR}/{REPO_NAME}"

# read token
print("Paste your GitHub Personal Access Token (PAT). It will NOT be saved:")
raw = getpass.getpass()
TOKEN = urllib.parse.quote(raw.strip(), safe="")

# clone
if not os.path.exists(LOCAL_PATH):
    auth_url = f"https://{GITHUB_USER}:{TOKEN}@github.com/{GITHUB_USER}/{REPO_NAME}.git"
    print("Cloning to:", LOCAL_PATH)
    subprocess.run(["git","clone",auth_url,LOCAL_PATH], check=True, cwd=DRIVE_DIR)
else:
    print("Repo exists:", LOCAL_PATH)

# safety check (ty GPT)
os.chdir(LOCAL_PATH)
subprocess.run(["git","remote","set-url","origin", f"https://github.com/{GITHUB_USER}/{REPO_NAME}.git"], check=True)

rc = subprocess.run(["git","checkout",BRANCH])
if rc.returncode != 0:
    subprocess.run(["git","checkout","-b",BRANCH,f"origin/{BRANCH}"], check=True)
subprocess.run(["git","pull","origin",BRANCH], check=False)

print("\n✅ Ready at:", LOCAL_PATH)

Mounted at /content/drive
Paste your GitHub Personal Access Token (PAT). It will NOT be saved:
··········
Repo exists: /content/drive/MyDrive/github/cal_school

✅ Ready at: /content/drive/MyDrive/github/cal_school


In [None]:
import pandas as pd

In [None]:
df = pd.read_csv('/content/drive/MyDrive/github/cal_school/sources/2021-22-crdc-data/SCH/School Characteristics.csv')

  df = pd.read_csv('/content/drive/MyDrive/github/cal_school/sources/2021-22-crdc-data/SCH/School Characteristics.csv')


In [None]:

df_filtered = df[
    (df['LEA_STATE_NAME'] == 'CALIFORNIA') &
    (df['SCH_STATUS_CHARTER'] == 'Yes')
]
columns_to_keep = ['LEAID', 'LEA_NAME', 'SCHID', 'SCH_NAME', 'COMBOKEY']
df_charter = df_filtered[columns_to_keep]

print(df_charter.head())
print(df_charter.shape)

       LEAID           LEA_NAME  SCHID                              SCH_NAME  \
5047  600011  Fort Sage Unified  12763                    Mt. Lassen Charter   
5276  600034    Windsor Unified   6983        Cali Calmecac Language Academy   
5297  600036    Natomas Unified  11087                      Westlake Charter   
5299  600036    Natomas Unified  11735         Natomas Pacific Pathways Prep   
5301  600036    Natomas Unified  12523  Natomas Pacific Pathways Prep Middle   

         COMBOKEY  
5047  60001112763  
5276  60003406983  
5297  60003611087  
5299  60003611735  
5301  60003612523  
(1271, 5)


In [None]:
edge = pd.read_excel('/content/drive/MyDrive/github/cal_school/sources/EDGE_GEOCODE_PUBLICSCH_2122.xlsx', dtype=str)

edge_ca = edge[edge['STATE'].str.strip().str.upper() == 'CA']

# 2) Normalize keys: drop leading zeros
edge_ca = edge_ca.copy()
edge_ca['NCESSCH_norm'] = edge_ca['NCESSCH'].str.strip().str.lstrip('0')

df_charter = df_charter.copy()
df_charter['COMBOKEY_norm'] = (
    df_charter['COMBOKEY'].astype(str).str.strip().str.lstrip('0')
)

# 3) Merge on COMBOKEY <-> NCESSCH
merged = df_charter.merge(
    edge_ca[['NCESSCH','NCESSCH_norm','NMCNTY','CITY','LOCALE','LAT','LON']],
    left_on='COMBOKEY_norm',
    right_on='NCESSCH_norm',
    how='left'
)

# 4) Keep columns
out_cols = ['LEAID','LEA_NAME','SCHID','SCH_NAME','CITY','COMBOKEY','NMCNTY','LOCALE','LAT','LON']
out = merged[out_cols]

print(f"Rows in df_charter: {len(df_charter)}")
print(f"Rows matched with geocodes: {out['LAT'].notna().sum()}")
print(out.head())


Rows in df_charter: 1271
Rows matched with geocodes: 1266
    LEAID           LEA_NAME  SCHID                              SCH_NAME  \
0  600011  Fort Sage Unified  12763                    Mt. Lassen Charter   
1  600034    Windsor Unified   6983        Cali Calmecac Language Academy   
2  600036    Natomas Unified  11087                      Westlake Charter   
3  600036    Natomas Unified  11735         Natomas Pacific Pathways Prep   
4  600036    Natomas Unified  12523  Natomas Pacific Pathways Prep Middle   

         CITY     COMBOKEY             NMCNTY LOCALE        LAT          LON  
0     Herlong  60001112763      Lassen County     33    40.4211  -120.650932  
1     Windsor  60003406983      Sonoma County     21  38.550242   -122.82712  
2  Sacramento  60003611087  Sacramento County     11   38.67564  -121.526258  
3  Sacramento  60003611735  Sacramento County     11    38.6551  -121.546082  
4  Sacramento  60003612523  Sacramento County     11    38.6551  -121.546082  


In [None]:
print(out['CITY'].astype(str).str.strip().str.casefold().value_counts())


CITY
los angeles    175
san jose        52
san diego       49
oakland         38
sacramento      32
              ... 
beale afb        1
pittsburg        1
yerington        1
san rafael       1
freedom          1
Name: count, Length: 331, dtype: int64


In [None]:
out_path = '/content/drive/MyDrive/github/cal_school/export/CA_charter_with_geo.csv'
out.to_csv(out_path, index=False)
print(f"Saved: {out_path}")


Saved: /content/drive/MyDrive/github/cal_school/export/CA_charter_with_geo.csv


In [None]:
%cd /content/drive/MyDrive/github/cal_school

!git config user.name "avahuu"
!git config user.email "xmhu312@gmail.com"
!git rm -r --cached -f sources/
!grep -qxF "sources/" .gitignore || echo "sources/" >> .gitignore
!git add -A
!git commit -m "chore: ignore sources/ and untrack it; update notebook and export"


/content/drive/MyDrive/github/cal_school
fatal: pathspec 'sources/' did not match any files
On branch main
Your branch is ahead of 'origin/main' by 1 commit.
  (use "git push" to publish your local commits)

nothing to commit, working tree clean


In [None]:
import getpass, urllib.parse, subprocess, os

REPO_OWNER = "avahuu"
REPO_NAME  = "cal_school"
BRANCH     = subprocess.check_output(["git","rev-parse","--abbrev-ref","HEAD"]).decode().strip()

!git add -A
!git commit -m "update from Colab" || echo "no changes to commit"

pat = urllib.parse.quote(getpass.getpass("Paste PAT (not saved): ").strip(), safe="")

auth = f"https://{REPO_OWNER}:{pat}@github.com/{REPO_OWNER}/{REPO_NAME}.git"
!git remote set-url origin "$auth"
!git push origin $BRANCH
!git remote set-url origin "https://github.com/{REPO_OWNER}/{REPO_NAME}.git"



[main 0fc55c5] update from Colab
 1 file changed, 1 insertion(+), 1 deletion(-)
Paste PAT (not saved): ··········
Enumerating objects: 14, done.
Counting objects: 100% (14/14), done.
Delta compression using up to 2 threads
Compressing objects: 100% (12/12), done.
Writing objects: 100% (12/12), 3.16 KiB | 59.00 KiB/s, done.
Total 12 (delta 8), reused 0 (delta 0), pack-reused 0
remote: Resolving deltas: 100% (8/8), completed with 2 local objects.[K
To https://github.com/avahuu/cal_school.git
   4e9d8c3..0fc55c5  main -> main


In [53]:
!jupyter nbconvert myfile.ipynb --to html --output index.html

This application is used to convert notebook files (*.ipynb)
        to various other formats.


Options
The options below are convenience aliases to configurable class-options,
as listed in the "Equivalent to" description-line of the aliases.
To see all configurable class-options for some <cmd>, use:
    <cmd> --help-all

--debug
    set log level to logging.DEBUG (maximize logging output)
    Equivalent to: [--Application.log_level=10]
--show-config
    Show the application's configuration (human-readable format)
    Equivalent to: [--Application.show_config=True]
--show-config-json
    Show the application's configuration (json format)
    Equivalent to: [--Application.show_config_json=True]
--generate-config
    generate default config file
    Equivalent to: [--JupyterApp.generate_config=True]
-y
    Answer yes to any questions instead of prompting.
    Equivalent to: [--JupyterApp.answer_yes=True]
--execute
    Execute the notebook prior to export.
    Equivalent to: [--ExecutePr