# MotoGP Data Preview

This notebook provides a quick overview of all downloaded MotoGP datasets.

In [8]:
import pandas as pd
from pathlib import Path

In [9]:
# Configuration
DATA_DIR = '../data/raw'

data_dir = Path(DATA_DIR)
print(f"Data directory: {data_dir.resolve()}")
print(f"Directory exists: {data_dir.exists()}")

Data directory: /Users/diogosilva/Developer/Projects/motogp-analytics/data/raw
Directory exists: True


In [10]:
# Get list of available datasets
csv_files = list(data_dir.glob("*.csv"))

if csv_files:
    print(f"Found {len(csv_files)} datasets:")
    for file in sorted(csv_files):
        print(f"  - {file.name}")
else:
    print("No CSV files found. Please run 00_data_download.ipynb first.")

Found 6 datasets:
  - constructure-world-championship.csv
  - grand-prix-events-held.csv
  - grand-prix-race-winners.csv
  - riders-finishing-positions.csv
  - riders-info.csv
  - same-nation-podium-lockouts.csv


## Dataset 1: Constructors World Championship

In [11]:
# Load constructors dataset
constructors_df = pd.read_csv(data_dir / 'constructure-world-championship.csv')

print(f"Registos: {len(constructors_df)}, Colunas: {len(constructors_df.columns)}")
print(f"Colunas: {list(constructors_df.columns)}")

constructors_df

Registos: 284, Colunas: 3
Colunas: ['Season', 'Constructor', 'Class']


Unnamed: 0,Season,Constructor,Class
0,2021,Yamaha,MotoGP™
1,2021,Kalex,Moto2™
2,2021,KTM,Moto3™
3,2021,Energica,MotoE™
4,2020,Suzuki,MotoGP™
...,...,...,...
279,1950,Velocette,350cc
280,1949,AJS,MotoGP™
281,1949,Moto Guzzi,250cc
282,1949,Mondial,125cc


In [12]:
# First 10 rows
constructors_df.head(10)

Unnamed: 0,Season,Constructor,Class
0,2021,Yamaha,MotoGP™
1,2021,Kalex,Moto2™
2,2021,KTM,Moto3™
3,2021,Energica,MotoE™
4,2020,Suzuki,MotoGP™
5,2020,Kalex,Moto2™
6,2020,KTM,Moto3™
7,2020,Energica,MotoE™
8,2019,Honda,MotoGP™
9,2019,Kalex,Moto2™


In [13]:
# Last 10 rows
constructors_df.tail(10)

Unnamed: 0,Season,Constructor,Class
274,1951,Mondial,125cc
275,1951,Norton,350cc
276,1950,Gilera,MotoGP™
277,1950,Benelli,250cc
278,1950,Mondial,125cc
279,1950,Velocette,350cc
280,1949,AJS,MotoGP™
281,1949,Moto Guzzi,250cc
282,1949,Mondial,125cc
283,1949,Velocette,350cc


## Dataset 2: Grand Prix Events Held

In [14]:
# Load events dataset
events_df = pd.read_csv(data_dir / 'grand-prix-events-held.csv')

print(f"Registos: {len(events_df)}, Colunas: {len(events_df.columns)}")
print(f"Colunas: {list(events_df.columns)}")

events_df

Registos: 72, Colunas: 3
Colunas: ['Times', 'Track', 'Country']


Unnamed: 0,Times,Track,Country
0,280,TT Circuit Assen,NL
1,168,Automotodrom Brno,CZ
2,131,Spa-Francorchamps,BE
3,125,Sachsenring,DE
4,118,Circuito de Jerez - Angel Nieto,ES
...,...,...,...
67,3,Interlagos,BR
68,3,Magny Cours,FR
69,3,Johor,MY
70,3,Albi,FR


In [15]:
# First 10 rows
events_df.head(10)

Unnamed: 0,Times,Track,Country
0,280,TT Circuit Assen,NL
1,168,Automotodrom Brno,CZ
2,131,Spa-Francorchamps,BE
3,125,Sachsenring,DE
4,118,Circuito de Jerez - Angel Nieto,ES
5,116,Autodromo Internazionale del Mugello,IT
6,116,Le Mans,FR
7,114,Isle of Man,GB
8,111,Monza,IT
9,102,Misano World Circuit Marco Simoncelli,IT


In [16]:
# Last 10 rows
events_df.tail(10)

Unnamed: 0,Times,Track,Country
62,5,KymiRing,FI
63,4,Hedemora,SE
64,3,Reims,FR
65,3,Pertamina Mandalika Circuit,ID
66,3,Mosport,CA
67,3,Interlagos,BR
68,3,Magny Cours,FR
69,3,Johor,MY
70,3,Albi,FR
71,2,Schotten,DE


## Dataset 3: Grand Prix Race Winners

In [17]:
# Load race winners dataset
winners_df = pd.read_csv(data_dir / 'grand-prix-race-winners.csv')

print(f"Registos: {len(winners_df)}, Colunas: {len(winners_df.columns)}")
print(f"Colunas: {list(winners_df.columns)}")

winners_df

Registos: 3083, Colunas: 6
Colunas: ['Circuit', 'Class', 'Constructor', 'Country', 'Rider', 'Season']


Unnamed: 0,Circuit,Class,Constructor,Country,Rider,Season
0,Circuit Of The Americas,Moto3™,KTM,ES,Jaume Masia,2022
1,Circuit Of The Americas,Moto2™,Kalex,IT,Tony Arbolino,2022
2,Circuit Of The Americas,MotoGP™,Ducati,IT,Enea Bastianini,2022
3,Termas de Río Hondo,Moto3™,GASGAS,ES,Sergio Garcia,2022
4,Termas de Río Hondo,MotoGP™,Aprilia,ES,Aleix Espargaro,2022
...,...,...,...,...,...,...
3078,Berne,MotoGP™,AJS,GB,Leslie Graham,1949
3079,Berne,125cc,Mondial,IT,Nello Pagani,1949
3080,Isle of Man,MotoGP™,Norton,GB,Harold Daniell,1949
3081,Isle of Man,250cc,Moto Guzzi,IE,Manliefe Barrington,1949


In [18]:
# First 10 rows
winners_df.head(10)

Unnamed: 0,Circuit,Class,Constructor,Country,Rider,Season
0,Circuit Of The Americas,Moto3™,KTM,ES,Jaume Masia,2022
1,Circuit Of The Americas,Moto2™,Kalex,IT,Tony Arbolino,2022
2,Circuit Of The Americas,MotoGP™,Ducati,IT,Enea Bastianini,2022
3,Termas de Río Hondo,Moto3™,GASGAS,ES,Sergio Garcia,2022
4,Termas de Río Hondo,MotoGP™,Aprilia,ES,Aleix Espargaro,2022
5,Termas de Río Hondo,Moto2™,Kalex,IT,Celestino Vietti,2022
6,Pertamina Mandalika Circuit,MotoGP™,KTM,PT,Miguel Oliveira,2022
7,Pertamina Mandalika Circuit,Moto2™,Kalex,TH,Somkiat Chantra,2022
8,Pertamina Mandalika Circuit,Moto3™,Honda,IT,Dennis Foggia,2022
9,Lusail International Circuit,Moto3™,Honda,IT,Andrea Migno,2022


In [19]:
# Last 10 rows
winners_df.tail(10)

Unnamed: 0,Circuit,Class,Constructor,Country,Rider,Season
3073,TT Circuit Assen,MotoGP™,Mondial,IT,Nello Pagani,1949
3074,TT Circuit Assen,125cc,Mondial,IT,Nello Pagani,1949
3075,TT Circuit Assen,350cc,Velocette,GB,Freddie Frith,1949
3076,Berne,350cc,Velocette,GB,Freddie Frith,1949
3077,Berne,250cc,Moto Guzzi,IT,Bruno Ruffo,1949
3078,Berne,MotoGP™,AJS,GB,Leslie Graham,1949
3079,Berne,125cc,Mondial,IT,Nello Pagani,1949
3080,Isle of Man,MotoGP™,Norton,GB,Harold Daniell,1949
3081,Isle of Man,250cc,Moto Guzzi,IE,Manliefe Barrington,1949
3082,Isle of Man,350cc,Velocette,GB,Freddie Frith,1949


## Dataset 4: Riders Finishing Positions

In [20]:
# Load riders positions dataset
positions_df = pd.read_csv(data_dir / 'riders-finishing-positions.csv')

print(f"Registos: {len(positions_df)}, Colunas: {len(positions_df.columns)}")
print(f"Colunas: {list(positions_df.columns)}")

positions_df

Registos: 394, Colunas: 8
Colunas: ['Rider', 'Victories', 'NumberofSecond', 'NumberofThird', 'Numberof4th', 'Numberof5th', 'Numberof6th', 'Country']


Unnamed: 0,Rider,Victories,NumberofSecond,NumberofThird,Numberof4th,Numberof5th,Numberof6th,Country
0,Giacomo Agostini,122,67,53,48,34,24,IT
1,Valentino Rossi,115,52,47,38,30,21,IT
2,Angel Nieto,90,51,40,35,25,21,ES
3,Marc Marquez,85,44,36,28,23,21,ES
4,Mike Hailwood,76,41,33,26,22,20,GB
...,...,...,...,...,...,...,...,...
389,Tetsuta Nagashima,1,1,2,2,2,2,JP
390,Gyula Marsovszky,1,1,2,2,2,2,CH
391,Regis Laconi,1,1,2,2,2,2,FR
392,Can öncü,1,1,2,2,2,2,TR


In [21]:
# First 10 rows
positions_df.head(10)

Unnamed: 0,Rider,Victories,NumberofSecond,NumberofThird,Numberof4th,Numberof5th,Numberof6th,Country
0,Giacomo Agostini,122,67,53,48,34,24,IT
1,Valentino Rossi,115,52,47,38,30,21,IT
2,Angel Nieto,90,51,40,35,25,21,ES
3,Marc Marquez,85,44,36,28,23,21,ES
4,Mike Hailwood,76,41,33,26,22,20,GB
5,Jorge Lorenzo,68,39,28,25,22,20,ES
6,Mick Doohan,54,36,27,24,22,19,AU
7,Dani Pedrosa,54,35,26,22,21,19,ES
8,Phil Read,52,35,25,22,21,18,GB
9,Casey Stoner,45,35,25,21,20,17,AU


In [22]:
# Last 10 rows
positions_df.tail(10)

Unnamed: 0,Rider,Victories,NumberofSecond,NumberofThird,Numberof4th,Numberof5th,Numberof6th,Country
384,Phil Carpenter,1,1,2,2,2,3,GB
385,Jack Ahearn,1,1,2,2,2,3,AU
386,Angelo Copeta,1,1,2,2,2,3,IT
387,Jean Louis Tournadre,1,1,2,2,2,3,FR
388,Derek Minter,1,1,2,2,2,2,GB
389,Tetsuta Nagashima,1,1,2,2,2,2,JP
390,Gyula Marsovszky,1,1,2,2,2,2,CH
391,Regis Laconi,1,1,2,2,2,2,FR
392,Can öncü,1,1,2,2,2,2,TR
393,R. Creith,1,1,2,2,2,2,IE


## Dataset 5: Riders Info

In [23]:
# Load riders info dataset
riders_df = pd.read_csv(data_dir / 'riders-info.csv')

print(f"Registos: {len(riders_df)}, Colunas: {len(riders_df.columns)}")
print(f"Colunas: {list(riders_df.columns)}")

riders_df

Registos: 368, Colunas: 7
Colunas: ['Riders All Time in All Classes', 'Victories', '2nd places', '3rd places', "Pole positions from '74 to 2022", 'Race fastest lap to 2022', 'World Championships']


Unnamed: 0,Riders All Time in All Classes,Victories,2nd places,3rd places,Pole positions from '74 to 2022,Race fastest lap to 2022,World Championships
0,AGOSTINI Giacomo,122,35.0,2.0,9.0,117.0,15.0
1,ROSSI Valentino,115,67.0,53.0,65.0,96.0,9.0
2,NIETO Angel,90,35.0,14.0,34.0,81.0,13.0
3,MARQUEZ Marc,85,36.0,17.0,90.0,75.0,8.0
4,HAILWOOD Mike,76,25.0,11.0,,79.0,9.0
...,...,...,...,...,...,...,...
363,ROSSI Louis,1,,,,,
364,LEONI Guido,1,,,,,
365,FELGENHEIER Roland,1,,,,,
366,AUREAL Jean,1,,,,,


In [24]:
# First 10 rows
riders_df.head(10)

Unnamed: 0,Riders All Time in All Classes,Victories,2nd places,3rd places,Pole positions from '74 to 2022,Race fastest lap to 2022,World Championships
0,AGOSTINI Giacomo,122,35.0,2.0,9.0,117.0,15.0
1,ROSSI Valentino,115,67.0,53.0,65.0,96.0,9.0
2,NIETO Angel,90,35.0,14.0,34.0,81.0,13.0
3,MARQUEZ Marc,85,36.0,17.0,90.0,75.0,8.0
4,HAILWOOD Mike,76,25.0,11.0,,79.0,9.0
5,LORENZO Jorge,68,51.0,33.0,69.0,37.0,5.0
6,PEDROSA Dani,54,52.0,47.0,49.0,64.0,3.0
7,DOOHAN Mick,54,31.0,10.0,58.0,46.0,5.0
8,READ Phil,52,44.0,25.0,5.0,36.0,7.0
9,REDMAN Jim,45,33.0,20.0,,35.0,6.0


In [25]:
# Last 10 rows
riders_df.tail(10)

Unnamed: 0,Riders All Time in All Classes,Victories,2nd places,3rd places,Pole positions from '74 to 2022,Race fastest lap to 2022,World Championships
358,ÖNCÜ Can,1,,,1.0,,
359,MUDFORD Ken,1,,,1.0,,
360,CZIHAK Edmund,1,,,1.0,,
361,BARRINGTON Manliefe,1,,,,,
362,IRELAND Dennis,1,,,,,
363,ROSSI Louis,1,,,,,
364,LEONI Guido,1,,,,,
365,FELGENHEIER Roland,1,,,,,
366,AUREAL Jean,1,,,,,
367,CARPENTER Phil,1,,,,,


## Dataset 6: Same Nation Podium Lockouts

In [26]:
# Load podium lockouts dataset
lockouts_df = pd.read_csv(data_dir / 'same-nation-podium-lockouts.csv')

print(f"Registos: {len(lockouts_df)}, Colunas: {len(lockouts_df.columns)}")
print(f"Colunas: {list(lockouts_df.columns)}")

lockouts_df

Registos: 264, Colunas: 4
Colunas: ['Season', 'Track', 'Riders` Nation', 'Class']


Unnamed: 0,Season,Track,Riders` Nation,Class
0,2022,Termas de Río Hondo,ES,MotoGP™
1,2021,Circuit Ricardo Tormo,ES,Moto3™
2,2021,Misano World Circuit Marco Simoncelli,IT,MotoE™
3,2021,Misano World Circuit Marco Simoncelli,IT,Moto3™
4,2021,Silverstone Circuit,IT,Moto3™
...,...,...,...,...
259,1949,Spa-Francorchamps,GB,350cc
260,1949,TT Circuit Assen,GB,350cc
261,1949,TT Circuit Assen,IT,125cc
262,1949,Berne,GB,350cc


In [27]:
# First 10 rows
lockouts_df.head(10)

Unnamed: 0,Season,Track,Riders` Nation,Class
0,2022,Termas de Río Hondo,ES,MotoGP™
1,2021,Circuit Ricardo Tormo,ES,Moto3™
2,2021,Misano World Circuit Marco Simoncelli,IT,MotoE™
3,2021,Misano World Circuit Marco Simoncelli,IT,Moto3™
4,2021,Silverstone Circuit,IT,Moto3™
5,2020,Circuit Ricardo Tormo,ES,MotoGP™
6,2020,MotorLand Aragón,ES,MotoGP™
7,2020,Misano World Circuit Marco Simoncelli,IT,MotoE™
8,2020,Misano World Circuit Marco Simoncelli,ES,MotoGP™
9,2020,Misano World Circuit Marco Simoncelli,IT,Moto2™


In [28]:
# Last 10 rows
lockouts_df.tail(10)

Unnamed: 0,Season,Track,Riders` Nation,Class
254,1950,Berne,GB,350cc
255,1950,TT Circuit Assen,GB,350cc
256,1950,TT Circuit Assen,IT,125cc
257,1949,Monza,IT,125cc
258,1949,Monza,IT,250cc
259,1949,Spa-Francorchamps,GB,350cc
260,1949,TT Circuit Assen,GB,350cc
261,1949,TT Circuit Assen,IT,125cc
262,1949,Berne,GB,350cc
263,1949,Berne,IT,125cc
