In [2]:
import pandas as pd
from geopy.geocoders import Nominatim
from geopy.distance import geodesic
import time

In [5]:
# ========== STEP 1: Load your data ==========
law_df = pd.read_csv("../data/law_schools.csv")  # columns: Name, Full Address
ortho_df = pd.read_csv("../data/ortho_programs.csv")  # columns: Name, Full Address

In [8]:
#law_df
ortho_df

Unnamed: 0,Name,Full Address
0,1. Hospital for Special Surgery (HSS),"535 East 70th Street, New York, NY 10021"
1,3. NYU Grossman / NYU Langone Orthopedic Hospital,"301 East 17th Street, New York, NY 10003"
2,5. Mass General Brigham (Harvard Combined Orth...,"55 Fruit Street, Boston, MA 02114"
3,6. Rush Medical Center Orthopaedic Surgery Res...,"1653 W Congress Pkwy, Chicago, IL 60612"
4,7. Duke Orthopaedic Surgery Residency,"311 Trent Drive, Durham, NC 27710"
5,8. Vanderbilt Orthopaedic Surgery Residency,"1211 Medical Center Drive, Nashville, TN 37232"
6,9. Thomas Jefferson Orthopaedic Surgery Reside...,"1015 Walnut Street, Philadelphia, PA 19107"
7,12. UCSF Orthopaedic Surgery Residency Institute,"1500 Owens Street, San Francisco, CA 94158"
8,13. Emory University Orthopaedic Surgery Resid...,"1364 Clifton Rd NE, Atlanta, GA 30322"
9,14. Carolinas Medical Center Orthopaedic Resid...,"1000 Blythe Blvd, Charlotte, NC 28203"


In [9]:
# ========== STEP 2: Geocode each address ==========
geolocator = Nominatim(user_agent="law_ortho_matcher")

def geocode_address(address):
    for _ in range(3):  # retry in case of rate limit
        try:
            location = geolocator.geocode(address)
            if location:
                return pd.Series([location.latitude, location.longitude])
        except Exception:
            time.sleep(1)
    return pd.Series([None, None])

print("Geocoding law schools...")
law_df[["Lat", "Lon"]] = law_df["Full Address"].apply(geocode_address)

print("Geocoding ortho programs...")
ortho_df[["Lat", "Lon"]] = ortho_df["Full Address"].apply(geocode_address)

Geocoding law schools...
Geocoding ortho programs...


## How Important the Rank of Law School is vs Ortho Rank 
higher number penalizes distance more as this is a minimization problem

In [None]:
law_rank_importance = 2
ortho_rank_importance= 1
dist_rank_importance= 2 

In [None]:
# ========== STEP 3: Match and score all pairwise combos ==========
matches = []
for _, law in law_df.iterrows():
    for _, ortho in ortho_df.iterrows():
        if pd.notnull(law["Lat"]) and pd.notnull(ortho["Lat"]):
            distance = geodesic((law["Lat"], law["Lon"]), (ortho["Lat"], ortho["Lon"])).miles
            law_rank = int(law["Name"].split(".")[0])
            ortho_rank = int(ortho["Name"].split(".")[0])
            
            # Example scoring weights (adjust as needed)
            score = law_rank_importance * law_rank + ortho_rank_importance * ortho_rank + dist_rank_importance * distance

            matches.append({
                "Law School": law["Name"],
                "Ortho Program": ortho["Name"],
                "Distance (miles)": round(distance, 2),
                "Score": round(score, 2)
            })


In [None]:
# ========== STEP 4: Sort and get top matches ==========
results_df = pd.DataFrame(matches).sort_values("Score").reset_index(drop=True)

# Save or display
results_df.to_csv("../results/top_law_ortho_matches.csv", index=False)
print("Top 30 matches:")
print(results_df.head(30))

Top 30 matches:
                                        Law School  \
0                               2. Yale Law School   
1              3. University of Chicago Law School   
2                               2. Yale Law School   
3                           1. Stanford Law School   
4             8. New York University School of Law   
5             8. New York University School of Law   
6                 6. Duke University School of Law   
7   5. University of Pennsylvania Carey Law School   
8                            7. Harvard Law School   
9   5. University of Pennsylvania Carey Law School   
10                              2. Yale Law School   
11                         10. Columbia Law School   
12  5. University of Pennsylvania Carey Law School   
13                          1. Stanford Law School   
14                         10. Columbia Law School   
15  5. University of Pennsylvania Carey Law School   
16                              2. Yale Law School   
17          