In [50]:
# import applicable modules
import pandas as pd
import numpy as np
import folium

In [51]:
# read in csv data
races_df = pd.read_csv('races.csv')
circuits_df = pd.read_csv('circuits.csv')

In [52]:
# merge reces with circuits
df = races_df.merge(circuits_df, on='circuitId', suffixes = ('_r','_c'))

In [53]:
df.head()

Unnamed: 0,raceId,year,round,circuitId,name_r,date,time,url_r,fp1_date,fp1_time,...,sprint_date,sprint_time,circuitRef,name_c,location,country,lat,lng,alt,url_c
0,1,2009,1,1,Australian Grand Prix,2009-03-29,06:00:00,http://en.wikipedia.org/wiki/2009_Australian_G...,\N,\N,...,\N,\N,albert_park,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10,http://en.wikipedia.org/wiki/Melbourne_Grand_P...
1,18,2008,1,1,Australian Grand Prix,2008-03-16,04:30:00,http://en.wikipedia.org/wiki/2008_Australian_G...,\N,\N,...,\N,\N,albert_park,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10,http://en.wikipedia.org/wiki/Melbourne_Grand_P...
2,36,2007,1,1,Australian Grand Prix,2007-03-18,03:00:00,http://en.wikipedia.org/wiki/2007_Australian_G...,\N,\N,...,\N,\N,albert_park,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10,http://en.wikipedia.org/wiki/Melbourne_Grand_P...
3,55,2006,3,1,Australian Grand Prix,2006-04-02,14:00:00,http://en.wikipedia.org/wiki/2006_Australian_G...,\N,\N,...,\N,\N,albert_park,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10,http://en.wikipedia.org/wiki/Melbourne_Grand_P...
4,71,2005,1,1,Australian Grand Prix,2005-03-06,14:00:00,http://en.wikipedia.org/wiki/2005_Australian_G...,\N,\N,...,\N,\N,albert_park,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10,http://en.wikipedia.org/wiki/Melbourne_Grand_P...


In [54]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1079 entries, 0 to 1078
Data columns (total 26 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   raceId       1079 non-null   int64  
 1   year         1079 non-null   int64  
 2   round        1079 non-null   int64  
 3   circuitId    1079 non-null   int64  
 4   name_r       1079 non-null   object 
 5   date         1079 non-null   object 
 6   time         1079 non-null   object 
 7   url_r        1079 non-null   object 
 8   fp1_date     1079 non-null   object 
 9   fp1_time     1079 non-null   object 
 10  fp2_date     1079 non-null   object 
 11  fp2_time     1079 non-null   object 
 12  fp3_date     1079 non-null   object 
 13  fp3_time     1079 non-null   object 
 14  quali_date   1079 non-null   object 
 15  quali_time   1079 non-null   object 
 16  sprint_date  1079 non-null   object 
 17  sprint_time  1079 non-null   object 
 18  circuitRef   1079 non-null   object 
 19  name_c

In [55]:
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
raceId,1079.0,542.020389,314.621154,1.0,270.5,540.0,809.5,1096.0
year,1079.0,1991.389249,20.009147,1950.0,1976.0,1993.0,2009.0,2022.0
round,1079.0,8.432808,5.033365,1.0,4.0,8.0,12.0,22.0
circuitId,1079.0,23.493976,19.022536,1.0,9.0,18.0,34.0,79.0
lat,1079.0,33.842768,25.459793,-37.8497,34.8431,43.7347,49.2542,57.2653
lng,1079.0,7.191151,57.215152,-118.189,-1.01694,6.9475,14.7647,144.968


In [56]:
# view all columns
df.columns

Index(['raceId', 'year', 'round', 'circuitId', 'name_r', 'date', 'time',
       'url_r', 'fp1_date', 'fp1_time', 'fp2_date', 'fp2_time', 'fp3_date',
       'fp3_time', 'quali_date', 'quali_time', 'sprint_date', 'sprint_time',
       'circuitRef', 'name_c', 'location', 'country', 'lat', 'lng', 'alt',
       'url_c'],
      dtype='object')

In [57]:
# drop unwanted columns
df.drop(['round', 'date', 'time',
       'url_r', 'fp1_date', 'fp1_time', 'fp2_date', 'fp2_time', 'fp3_date',
       'fp3_time', 'quali_date', 'quali_time', 'sprint_date', 'sprint_time',
       'url_c'], axis=1, inplace=True)

In [58]:
# add new column concatenating City and Country
df['place'] = df.location + ', ' + df.country

In [59]:
# subset dataframe for specific time periods, grouping by location to get a count of races at each location
df_all = df.groupby(['place','lat','lng'], as_index=False)['raceId'].count()
df_old = df.loc[df.year<1960].groupby(['place','lat','lng'], as_index=False)['raceId'].count()
df_new = df.loc[df.year>2010].groupby(['place','lat','lng'], as_index=False)['raceId'].count()

In [60]:
# create map
m_old = folium.Map(location=[20,25], tiles="OpenStreetMap", zoom_start=2)

# add circles to the map
for i in range(0,len(df_old)):
    folium.Circle(
        location=[df_old.iloc[i]['lat'], df_old.iloc[i]['lng']],
        popup=f'{df_old.iloc[i]["place"]}<br>{str(df_old.iloc[i]["raceId"])} races',
        radius=float(df_old.iloc[i]['raceId'])*30000,
        color='red',
        fill=True,
        fill_color='red'
    ).add_to(m_old)

# save as HTML file and preview    
m_old.save('map_old.html')
m_old

In [61]:
# create map
m_new = folium.Map(location=[20,25], tiles="OpenStreetMap", zoom_start=2)

# add circles to the map
for i in range(0,len(df_new)):
    folium.Circle(
        location=[df_new.iloc[i]['lat'], df_new.iloc[i]['lng']],
        popup=f'{df_new.iloc[i]["place"]}<br>{str(df_new.iloc[i]["raceId"])} races',
        radius=float(df_new.iloc[i]['raceId'])*40000,
        color='red',
        fill=True,
        fill_color='red'
    ).add_to(m_new)

# save map as HTML and display preview    
m_new.save('map_new.html')
m_new

In [62]:
# create map
m_all = folium.Map(location=[20,25], tiles="OpenStreetMap", zoom_start=2)

# add circles to the map
for i in range(0,len(df_all)):
    folium.Circle(
    location=[df_all.iloc[i]['lat'], df_all.iloc[i]['lng']],
    popup=f'{df_all.iloc[i]["place"]}<br>{str(df_all.iloc[i]["raceId"])} races',
    radius=float(df_all.iloc[i]['raceId'])*20000,
    color='red',
    fill=True,
    fill_color='red'
    ).add_to(m_all)

# display preview of map    
m_all