In [9]:
import pandas as pd
import folium
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter
from functools import lru_cache

In [16]:
schools = pd.read_csv('./drive/MyDrive/New_home/schools.csv')
universities = pd.read_csv('./drive/MyDrive/New_home/universities.csv')


In [11]:
# prompt: I need to remove leading and trailing white spaces from the city and state column in both the schools and universities data frames

schools['city'] = schools['city'].str.strip()
schools['state'] = schools['state'].str.strip()

universities['city'] = universities['city'].str.strip()
universities['state'] = universities['state'].str.strip()


In [17]:
df = pd.merge(universities, schools, on=['city', 'state'], how='inner')
df.head(100)
df.to_csv('./drive/MyDrive/New_home/combined_data.csv')

In [13]:
# First, aggregate the data by city, combining universities, levels, and schools
agg_func = {
    'university': lambda x: list(x.unique()),
    'level': lambda x: list(x.unique()),
    'school': lambda x: list(x.unique())
}
grouped = df.groupby(['city', 'state']).agg(agg_func).reset_index()

# Function to determine the color of each dot
def determine_color(levels):
    if 'R1' in levels and 'R2' in levels:
        return 'blue'
    elif 'R1' in levels:
        return 'red'
    elif 'R2' in levels:
        return 'green'
    return 'gray'

# Create a geolocator with a specific, unique user-agent
geolocator = Nominatim(user_agent="my_unique_geocoder")

# Rate Limiter to manage request timing
geocode = RateLimiter(geolocator.geocode, min_delay_seconds=1)

# Function to cache geocode results to avoid unnecessary requests
@lru_cache(maxsize=1000)
def get_location(query):
    return geocode(query)

# Applying geocoding with caching
grouped['location'] = grouped.apply(lambda row: get_location(f"{row['city']}, {row['state']}"), axis=1)
grouped['lat'] = grouped['location'].apply(lambda loc: loc.latitude if loc else None)
grouped['lon'] = grouped['location'].apply(lambda loc: loc.longitude if loc else None)
grouped['color'] = grouped['level'].apply(determine_color)

# Creating the map
m = folium.Map(location=[39.8283, -98.5795], zoom_start=4)  # Center of the US

for _, row in grouped.iterrows():
    if pd.notna(row['lat']) and pd.notna(row['lon']):
        tooltip_content = f"<strong>{row['city']}</strong><br/>" + \
                          "Universities:<br/>" + "<br/>".join([f"• {uni} ({lvl})" for uni, lvl in zip(row['university'], row['level'])]) + \
                          "<br/>Schools:<br/>" + "<br/>".join([f"• {school}" for school in row['school']])
        popup_content = f"""
        <div style='width:350px; height:auto; overflow:auto; font-size:12px;'>
            <strong>{row['city']}, {row['state']}</strong><br>
            <div style='width:100%;'>
                <strong>Universities:</strong><br>
                {'<br>'.join([f"• {uni} ({lvl})" for uni, lvl in zip(row['university'], row['level'])])}
                <br><strong>Schools:</strong><br>
                {'<br>'.join([f"• {school}" for school in row['school']])}
            </div>
        </div>
        """
        popup = folium.Popup(popup_content, max_width=350)
        folium.CircleMarker(
            location=[row['lat'], row['lon']],
            radius=5,
            color=row['color'],
            fill=True,
            fill_color=row['color'],
            popup=popup
        ).add_to(m).add_child(folium.Tooltip(tooltip_content))

# Display the map
m




In [14]:
m.save('./drive/MyDrive/New_home/map.html')