In [11]:
# --------------------------------------
# COMPLETE Grand Slam Animated Folium Map
# --------------------------------------

import pandas as pd
import folium
from folium.plugins import MarkerCluster, MiniMap
import pycountry
import time
import os
from geopy.geocoders import Nominatim
from bs4 import BeautifulSoup

# === 1. Load your data ===
df = pd.read_csv("assets/data/match_tennis_68_ajd.csv")

# === 2. Map last names to countries ===
name_to_country = {
    "Djokovic": "Serbia", "Nadal": "Spain", "Federer": "Switzerland",
    "Sampras": "United States", "Murray": "United Kingdom", "Agassi": "United States",
    # Add more if needed
}

def extract_last_name(name):
    return name.split()[0] if isinstance(name, str) else ""

def country_to_flag(country_name):
    try:
        code = pycountry.countries.search_fuzzy(country_name)[0].alpha_2
        return ''.join(chr(0x1F1E6 + ord(c) - ord('A')) for c in code)
    except:
        return ''

# === 3. Top player per location ===
location_counts = df["Location"].value_counts().reset_index()
location_counts.columns = ["Location", "Occurrences"]

top_players = (
    df.groupby("Location")["Winner"]
    .agg(lambda x: x.value_counts().index[0])
    .reset_index()
)
top_players.columns = ["Location", "TopPlayer"]
top_players["LastName"] = top_players["TopPlayer"].apply(extract_last_name)
top_players["Country"] = top_players["LastName"].map(name_to_country).fillna("Unknown")
top_players["Flag"] = top_players["Country"].apply(lambda c: country_to_flag(c) if c != "Unknown" else "")

location_counts = location_counts.merge(top_players, on="Location", how="left")

# === 4. Most frequent surface per location ===
surface_info = (
    df.groupby("Location")["Surface"]
    .agg(lambda x: x.value_counts().index[0])
    .reset_index()
)
location_counts = location_counts.merge(surface_info, on="Location", how="left")

# === 5. Surface colors ===
surface_colors = {
    "Hard": "#4A90E2",
    "Clay": "#CC6600",
    "Grass": "#2E8B57",
    "Carpet": "#7F3FBF"
}

# === 6. Geocode with cache ===
cache_file = "geocoded_locations_mens.csv"
if os.path.exists(cache_file):
    cache = pd.read_csv(cache_file)
else:
    cache = pd.DataFrame(columns=["Location", "Latitude", "Longitude"])

geolocator = Nominatim(user_agent="tennis_mapper")

def geocode_location(loc):
    if loc in cache["Location"].values:
        return cache.loc[cache["Location"] == loc, ["Latitude", "Longitude"]].values[0]
    try:
        g = geolocator.geocode(loc)
        time.sleep(1)
        if g:
            lat, lon = g.latitude, g.longitude
            cache.loc[len(cache)] = [loc, lat, lon]
            cache.to_csv(cache_file, index=False)
            return [lat, lon]
    except:
        pass
    return [None, None]

location_counts[["Latitude", "Longitude"]] = location_counts["Location"].apply(lambda x: pd.Series(geocode_location(x)))
location_counts.dropna(subset=["Latitude", "Longitude"], inplace=True)

# === 7. Create Folium map ===
m = folium.Map(location=[20, 0], zoom_start=2, control_scale=True)
m.add_child(MiniMap(toggle_display=True, position="bottomright"))
marker_cluster = MarkerCluster(name="Tournaments").add_to(m)

max_occurrences = location_counts["Occurrences"].max()

for _, row in location_counts.iterrows():
    popup_html = (
        f"<div style='width: 220px;'>"
        f"<b>{row['Location']}</b><br>"
        f"Surface: {row['Surface']}<br>"
        f"Matches: {int(row['Occurrences'])}<br>"
        f"Top Player: {row['Flag']} {row['TopPlayer']}"
        f"</div>"
    )
    radius = (row["Occurrences"] / max_occurrences) * 20
    color = surface_colors.get(row["Surface"], "gray")
    folium.CircleMarker(
        location=[row["Latitude"], row["Longitude"]],
        radius=radius,
        color=color,
        fill=True,
        fill_color=color,
        fill_opacity=0.6,
        popup=popup_html,
        tooltip=row["Location"]
    ).add_to(marker_cluster)

# Save initial map
m.save("tennis_map_mens_with_flags.html")

# === 8. Add ✈️ plane animation ===
grand_slam_coords = [
    [-37.8136, 144.9631],  # Melbourne
    [48.8566, 2.3522],     # Paris
    [51.5074, -0.1278],    # London
    [40.7128, -74.0060]    # New York
]

with open("tennis_map_mens_with_flags.html", "r", encoding="utf-8") as f:
    soup = BeautifulSoup(f, "html.parser")

map_div = soup.find("div", {"id": lambda x: x and x.startswith("map_")})
map_id = map_div["id"]

plane_script = f"""
<script>
var map = window.{map_id}_map;

var slamCoords = {grand_slam_coords};

var planeIcon = L.divIcon({{
    className: 'plane-icon',
    html: '✈️',
    iconSize: [24, 24],
    iconAnchor: [12, 12]
}});

var plane = L.marker(slamCoords[0], {{icon: planeIcon}}).addTo(map);

var line = L.polyline(slamCoords, {{
    color: '#000',
    weight: 2,
    dashArray: '5, 5'
}}).addTo(map);

let i = 0;
let steps = 200;
let interval = 30;

function flyToNext() {{
    if (i >= slamCoords.length - 1) return;

    let [lat1, lon1] = slamCoords[i];
    let [lat2, lon2] = slamCoords[i + 1];
    let step = 0;
    let deltaLat = (lat2 - lat1) / steps;
    let deltaLon = (lon2 - lon1) / steps;

    let move = setInterval(() => {{
        let newLat = lat1 + step * deltaLat;
        let newLon = lon1 + step * deltaLon;
        plane.setLatLng([newLat, newLon]);

        step++;
        if (step > steps) {{
            clearInterval(move);
            i++;
            flyToNext();
        }}
    }}, interval);
}}

flyToNext();
</script>

<style>
.plane-icon {{
    font-size: 22px;
    line-height: 22px;
}}
</style>
"""

soup.body.append(BeautifulSoup(plane_script, "html.parser"))

# === 9. Save final version ===
with open("tennis_map_with_plane_animation.html", "w", encoding="utf-8") as f:
    f.write(str(soup))

print("✅ Done. Open 'tennis_map_with_plane_animation.html' to see your animated map.")


FileNotFoundError: [Errno 2] No such file or directory: '/Users/karine/SmashData-1/Milestone 1/match_tennis_68_ajd.csv'

In [10]:
import pandas as pd
import plotly.graph_objects as go

# Load your CSV data
file_path = "/Users/karine/SmashData-1/Milestone 1/match_tennis_68_ajd.csv"
df = pd.read_csv(file_path)

# --- Step 1: Clean & Compute Statistics ---
# Focus on top 5 players by total wins
top_players = df['Winner'].value_counts().head(5).index.tolist()

# Ensure WRank is numeric
df['WRank'] = pd.to_numeric(df['WRank'], errors='coerce')

# Prepare statistics for radar plot
player_stats = []
for player in top_players:
    subset = df[df['Winner'] == player]

    avg_aces = pd.to_numeric(subset['W1'], errors='coerce').mean(skipna=True)  # proxy for ace if missing real ace column
    avg_duration = subset.apply(
        lambda row: sum([
            pd.to_numeric(row.get(f"W{i}"), errors='coerce') +
            pd.to_numeric(row.get(f"L{i}"), errors='coerce')
            for i in range(1, 6)
        ]), axis=1).mean() * 5  # estimate minutes

    win_sets = pd.to_numeric(subset['Wsets'], errors='coerce').mean(skipna=True)
    rank_avg = subset['WRank'].mean(skipna=True)

    player_stats.append({
        "Player": player,
        "Estimated Minutes": avg_duration,
        "Avg Sets Won": win_sets,
        "Avg W1 Games": avg_aces,
        "Avg WRank": rank_avg
    })

# Convert to DataFrame
stats_df = pd.DataFrame(player_stats).set_index("Player")

# Normalize data to 0–100
def normalize(series):
    return 100 * (series - series.min()) / (series.max() - series.min())

normalized = stats_df.apply(normalize)

# --- Step 2: Plot Radar Chart ---
categories = normalized.columns.tolist()

fig = go.Figure()
for player in normalized.index:
    fig.add_trace(go.Scatterpolar(
        r=normalized.loc[player].tolist() + [normalized.loc[player].tolist()[0]],
        theta=categories + [categories[0]],
        fill='toself',
        name=player
    ))

fig.update_layout(
    title="Top 5 Players – Normalized Match Performance",
    polar=dict(radialaxis=dict(visible=True, range=[0, 100])),
    showlegend=True,
    template="plotly_white",
    height=600
)

fig.show()

  df = pd.read_csv(file_path)


In [11]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

# Load your CSV data
file_path = "/Users/karine/SmashData-1/Milestone 1/match_tennis_68_ajd.csv"
df = pd.read_csv(file_path)

# --- Step 1: Clean & Compute Statistics ---
# Focus on top 10 players by total wins
top_players = df['Winner'].value_counts().head(10).index.tolist()

# Ensure WRank is numeric
df['WRank'] = pd.to_numeric(df['WRank'], errors='coerce')

# Prepare statistics for radar plot
player_stats = []
for player in top_players:
    subset = df[df['Winner'] == player]

    avg_aces = pd.to_numeric(subset['W1'], errors='coerce').mean(skipna=True)  # proxy for ace if missing real ace column
    win_sets = pd.to_numeric(subset['Wsets'], errors='coerce').mean(skipna=True)
    rank_avg = subset['WRank'].mean(skipna=True)

    player_stats.append({
        "Player": player,
        "Avg Sets Won": win_sets,
        "Avg W1 Games": avg_aces,
        "Avg WRank": rank_avg
    })

# Convert to DataFrame
stats_df = pd.DataFrame(player_stats)

# Normalize selected columns to 0–100 for visual comparison
features = ['Avg Sets Won', 'Avg W1 Games', 'Avg WRank']
for feature in features:
    min_val = stats_df[feature].min()
    max_val = stats_df[feature].max()
    stats_df[feature] = 100 * (stats_df[feature] - min_val) / (max_val - min_val)

# Melt the DataFrame for plotly express compatibility
df_melted = stats_df.melt(id_vars="Player", var_name="Metric", value_name="Value")

# Create animated radar plot with dropdown menu
fig = px.line_polar(
    df_melted,
    r="Value",
    theta="Metric",
    color="Player",
    line_close=True,
    template="plotly_dark",
    height=650,
    animation_frame="Player"
)

fig.update_traces(fill="toself")
fig.update_layout(
    title="Interactive Radar Chart: Top 10 Tennis Players",
    polar=dict(radialaxis=dict(visible=True, range=[0, 100])),
    showlegend=False,
    updatemenus=[{
        "buttons": [
            {
                "args": [None, {"frame": {"duration": 1000, "redraw": True}, "fromcurrent": True}],
                "label": "Play",
                "method": "animate"
            },
            {
                "args": [[None], {"frame": {"duration": 0, "redraw": False}, "mode": "immediate"}],
                "label": "Pause",
                "method": "animate"
            }
        ],
        "type": "buttons",
        "direction": "left",
        "pad": {"r": 10, "t": 87},
        "x": 0.1,
        "xanchor": "right",
        "y": 0,
        "yanchor": "top"
    }]
)

fig.show()


Columns (14,20,21,27,46,47,51) have mixed types. Specify dtype option on import or set low_memory=False.



In [1]:
import pandas as pd
import folium
from folium.plugins import TimestampedGeoJson
import pycountry
from geopy.geocoders import Nominatim
import time

# Grand Slam details
slams = [
    {"name": "Australian Open", "city": "Melbourne", "surface": "Hard"},
    {"name": "Roland Garros", "city": "Paris", "surface": "Clay"},
    {"name": "Wimbledon", "city": "London", "surface": "Grass"},
    {"name": "US Open", "city": "New York", "surface": "Hard"}
]

# Top players per Slam (replace with your dataset logic if needed)
slam_top_players = {
    "Australian Open": ("Djokovic", "Serbia"),
    "Roland Garros": ("Nadal", "Spain"),
    "Wimbledon": ("Federer", "Switzerland"),
    "US Open": ("Sampras", "United States")
}

# Surface colors
surface_colors = {
    "Hard": "#4A90E2",
    "Clay": "#CC6600",
    "Grass": "#2E8B57"
}

# Get country flag
def country_to_flag(country_name):
    try:
        country = pycountry.countries.search_fuzzy(country_name)[0]
        code = country.alpha_2
        return ''.join(chr(0x1F1E6 + ord(c) - ord('A')) for c in code.upper())
    except:
        return ''

# Geocode cities
geolocator = Nominatim(user_agent="grand_slam_animator")

def geocode_city(city):
    try:
        location = geolocator.geocode(city)
        time.sleep(1)
        if location:
            return location.latitude, location.longitude
    except Exception as e:
        print(f"Error: {e}")
    return None, None

# Build features for animation
features = []
for i, slam in enumerate(slams):
    player, country = slam_top_players[slam["name"]]
    flag = country_to_flag(country)
    lat, lon = geocode_city(slam["city"])

    if lat is None or lon is None:
        continue

    popup = f"""
    <div style='width:200px'>
        <b>{slam['name']}</b><br>
        Surface: {slam['surface']}<br>
        Top Player: {flag} {player}
    </div>
    """

    feature = {
        "type": "Feature",
        "geometry": {
            "type": "Point",
            "coordinates": [lon, lat]
        },
        "properties": {
            "time": f"2025-01-0{i+1}T00:00:00",  # Fake dates for animation steps
            "style": {
                "color": surface_colors.get(slam["surface"], "gray"),
                "fillColor": surface_colors.get(slam["surface"], "gray"),
                "radius": 15,
                "fillOpacity": 0.7
            },
            "popup": popup,
            "icon": "circle"
        }
    }
    features.append(feature)

# Build map
m = folium.Map(location=[20, 0], zoom_start=2, control_scale=True)

# Add animated geojson
TimestampedGeoJson(
    {
        "type": "FeatureCollection",
        "features": features
    },
    period="P1M",
    add_last_point=True,
    auto_play=True,
    loop=False,
    max_speed=1,
    loop_button=True,
    date_options="YYYY",
    time_slider_drag_update=True
).add_to(m)

# Save it
m.save("grand_slam_animation_map.html")


Error: HTTPSConnectionPool(host='nominatim.openstreetmap.org', port=443): Max retries exceeded with url: /search?q=London&format=json&limit=1 (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x16590c250>, 'Connection to nominatim.openstreetmap.org timed out. (connect timeout=1)'))
Error: HTTPSConnectionPool(host='nominatim.openstreetmap.org', port=443): Max retries exceeded with url: /search?q=New+York&format=json&limit=1 (Caused by ReadTimeoutError("HTTPSConnectionPool(host='nominatim.openstreetmap.org', port=443): Read timed out. (read timeout=1)"))
