In [None]:
import fastf1
import pandas as pd
import os
import time
import logging
from datetime import datetime

CACHE_DIR = 'cache_folder'
OUTPUT_FILE = 'f1_master_dataset.csv'
START_YEAR_DEFAULT = 2000 
END_YEAR = datetime.now().year

if not os.path.exists(CACHE_DIR):
    os.makedirs(CACHE_DIR)

fastf1.Cache.enable_cache(CACHE_DIR)
logging.getLogger('fastf1').setLevel(logging.WARNING)

def obtener_anio_inicio():
    if os.path.exists(OUTPUT_FILE):
        try:
            df_existente = pd.read_csv(OUTPUT_FILE)
            if not df_existente.empty:
                ultimo_anio = df_existente['year'].max()
                print(f"Archivo encontrado con datos hasta: {ultimo_anio}")
                return int(ultimo_anio) + 1
        except:
            pass
    return START_YEAR_DEFAULT

def descargar_lento_y_seguro():
    start_year = obtener_anio_inicio()
    
    if start_year > END_YEAR:
        print("‚úÖ ¬°Ya tienes todos los datos!")
        return

    print(f"üê¢ MODO LENTO ACTIVADO: {start_year} -> {END_YEAR}")
    print("Vamos a ir despacio para que la API no nos bloquee.")

    for year in range(start_year, END_YEAR + 1):
        print(f"\n--- Procesando Temporada {year} ---")
        year_data = []
        
        try:
            schedule = fastf1.get_event_schedule(year, include_testing=False)
            
            schedule['Session5Date'] = pd.to_datetime(schedule['Session5Date'], utc=True)
            schedule['EventDate'] = pd.to_datetime(schedule['EventDate'], utc=True)
            schedule['RaceDate'] = schedule['Session5Date'].fillna(schedule['EventDate'])
            
            now = pd.Timestamp.now(tz='UTC')
            races_disputadas = schedule[schedule['RaceDate'] < now]

            for _, race in races_disputadas.iterrows():
                round_num = race['RoundNumber']
                gp_name = race['EventName']
                date_val = race['RaceDate']
                
                print(f"   R{round_num}: {gp_name}...", end=" ")
                
                try:
                    time.sleep(3)
                    
                    session = fastf1.get_session(year, round_num, 'R')
                    session.load(telemetry=False, weather=False, messages=False)
                    results = session.results
                    
                    if results.empty:
                        print("Vac√≠o, reintentando en 10s...", end=" ")
                        time.sleep(10)
                        session.load(telemetry=False, weather=False, messages=False)
                        results = session.results
                    
                    if results.empty:
                        print("‚ùå Fall√≥ definitivamente.")
                        continue

                    for _, driver_data in results.iterrows():
                        code = driver_data.get('Abbreviation', 'UNK')
                        if pd.isna(code) or code == '':
                            surname = driver_data.get('Surname', '')
                            code = surname[:3].upper() if surname else 'UNK'

                        row = {
                            'year': year,
                            'round': round_num,
                            'date': date_val,
                            'circuit_name': gp_name,
                            'driver_code': code,
                            'driver_number': driver_data.get('DriverNumber', 0),
                            'team_name': driver_data.get('TeamName', 'Unknown'),
                            'grid_position': driver_data.get('GridPosition', 0),
                            'classified_position': driver_data.get('ClassifiedPosition', 'R'),
                            'status': driver_data.get('Status', 'Finished'),
                            'points_real': driver_data.get('Points', 0.0)
                        }
                        year_data.append(row)
                    print("‚úÖ")
                        
                except Exception as e:
                    print(f"‚ö†Ô∏è Error: {e}")
                    continue
            
            if year_data:
                df_year = pd.DataFrame(year_data)
                header_mode = not os.path.exists(OUTPUT_FILE)
                df_year.to_csv(OUTPUT_FILE, mode='a', header=header_mode, index=False)
                print(f"üíæ Guardado a√±o {year}")
            
        except Exception as e:
            print(f"\n‚ùå Error cr√≠tico en {year}: {e}")
            break

if __name__ == "__main__":
    descargar_lento_y_seguro()

üìÇ Archivo encontrado con datos hasta: 2022
üê¢ MODO LENTO ACTIVADO: 2023 -> 2025
Vamos a ir despacio para que la API no nos bloquee.

--- Procesando Temporada 2023 ---
   R1: Bahrain Grand Prix... ‚úÖ
   R2: Saudi Arabian Grand Prix... 



‚úÖ
   R3: Australian Grand Prix... ‚úÖ
   R4: Azerbaijan Grand Prix... ‚úÖ
   R5: Miami Grand Prix... ‚úÖ
   R6: Monaco Grand Prix... ‚úÖ
   R7: Spanish Grand Prix... 



‚úÖ
   R8: Canadian Grand Prix... 



‚úÖ
   R9: Austrian Grand Prix... 

This might be a bug and should be reported.


‚úÖ
   R10: British Grand Prix... ‚úÖ
   R11: Hungarian Grand Prix... 



‚úÖ
   R12: Belgian Grand Prix... ‚úÖ
   R13: Dutch Grand Prix... 



‚úÖ
   R14: Italian Grand Prix... 



‚úÖ
   R15: Singapore Grand Prix... 



‚úÖ
   R16: Japanese Grand Prix... 



‚úÖ
   R17: Qatar Grand Prix... 



‚úÖ
   R18: United States Grand Prix... ‚úÖ
   R19: Mexico City Grand Prix... 



‚úÖ
   R20: S√£o Paulo Grand Prix... 



‚úÖ
   R21: Las Vegas Grand Prix... 



‚úÖ
   R22: Abu Dhabi Grand Prix... ‚úÖ
üíæ Guardado a√±o 2023

--- Procesando Temporada 2024 ---


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  schedule['Session5Date'] = pd.to_datetime(schedule['Session5Date'], utc=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  schedule['EventDate'] = pd.to_datetime(schedule['EventDate'], utc=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  schedule['RaceDate'] = schedule['Session5Date'].fillna(

   R1: Bahrain Grand Prix... ‚úÖ
   R2: Saudi Arabian Grand Prix... 



‚úÖ
   R3: Australian Grand Prix... ‚úÖ
   R4: Japanese Grand Prix... ‚úÖ
   R5: Chinese Grand Prix... 



‚úÖ
   R6: Miami Grand Prix... ‚úÖ
   R7: Emilia Romagna Grand Prix... ‚úÖ
   R8: Monaco Grand Prix... ‚úÖ
   R9: Canadian Grand Prix... ‚úÖ
   R10: Spanish Grand Prix... 



‚úÖ
   R11: Austrian Grand Prix... ‚úÖ
   R12: British Grand Prix... ‚úÖ
   R13: Hungarian Grand Prix... ‚úÖ
   R14: Belgian Grand Prix... 



‚úÖ
   R15: Dutch Grand Prix... ‚úÖ
   R16: Italian Grand Prix... ‚úÖ
   R17: Azerbaijan Grand Prix... ‚úÖ
   R18: Singapore Grand Prix... ‚úÖ
   R19: United States Grand Prix... 



‚úÖ
   R20: Mexico City Grand Prix... ‚úÖ
   R21: S√£o Paulo Grand Prix... 



‚úÖ
   R22: Las Vegas Grand Prix... 



‚úÖ
   R23: Qatar Grand Prix... 



‚úÖ
   R24: Abu Dhabi Grand Prix... ‚úÖ
üíæ Guardado a√±o 2024

--- Procesando Temporada 2025 ---


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  schedule['Session5Date'] = pd.to_datetime(schedule['Session5Date'], utc=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  schedule['EventDate'] = pd.to_datetime(schedule['EventDate'], utc=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  schedule['RaceDate'] = schedule['Session5Date'].fillna(

   R1: Australian Grand Prix... 



‚úÖ
   R2: Chinese Grand Prix... ‚úÖ
   R3: Japanese Grand Prix... ‚úÖ
   R4: Bahrain Grand Prix... ‚úÖ
   R5: Saudi Arabian Grand Prix... 



‚úÖ
   R6: Miami Grand Prix... 



‚úÖ
   R7: Emilia Romagna Grand Prix... ‚úÖ
   R8: Monaco Grand Prix... ‚úÖ
   R9: Spanish Grand Prix... ‚úÖ
   R10: Canadian Grand Prix... 



‚ö†Ô∏è Vac√≠o, reintentando en 10s... 



‚ùå Fall√≥ definitivamente.
   R11: Austrian Grand Prix... 



‚ö†Ô∏è Vac√≠o, reintentando en 10s... 



‚ùå Fall√≥ definitivamente.
   R12: British Grand Prix... 



‚ö†Ô∏è Vac√≠o, reintentando en 10s... 



‚ùå Fall√≥ definitivamente.
   R13: Belgian Grand Prix... 



‚ö†Ô∏è Vac√≠o, reintentando en 10s... 



‚ùå Fall√≥ definitivamente.
   R14: Hungarian Grand Prix... 



‚ö†Ô∏è Vac√≠o, reintentando en 10s... 



‚ùå Fall√≥ definitivamente.
   R15: Dutch Grand Prix... 



‚ö†Ô∏è Vac√≠o, reintentando en 10s... 



‚ùå Fall√≥ definitivamente.
   R16: Italian Grand Prix... 



‚ö†Ô∏è Vac√≠o, reintentando en 10s... 



‚ùå Fall√≥ definitivamente.
   R17: Azerbaijan Grand Prix... 



‚ö†Ô∏è Vac√≠o, reintentando en 10s... 



‚ùå Fall√≥ definitivamente.
   R18: Singapore Grand Prix... 



‚ö†Ô∏è Vac√≠o, reintentando en 10s... 



‚ùå Fall√≥ definitivamente.
   R19: United States Grand Prix... 



‚ö†Ô∏è Vac√≠o, reintentando en 10s... 



‚ùå Fall√≥ definitivamente.
   R20: Mexico City Grand Prix... 



‚ö†Ô∏è Vac√≠o, reintentando en 10s... 



‚ùå Fall√≥ definitivamente.
   R21: S√£o Paulo Grand Prix... 



‚ö†Ô∏è Vac√≠o, reintentando en 10s... 



‚ùå Fall√≥ definitivamente.
   R22: Las Vegas Grand Prix... 



‚ö†Ô∏è Vac√≠o, reintentando en 10s... 



‚ùå Fall√≥ definitivamente.
üíæ Guardado a√±o 2025


In [None]:
import fastf1
import pandas as pd
import os
import time
import logging
from datetime import datetime

CACHE_DIR = 'cache_folder'
OUTPUT_FILE = 'f1_master_dataset.csv'

START_YEAR = 2000
END_YEAR = 2025

if not os.path.exists(CACHE_DIR):
    os.makedirs(CACHE_DIR)

fastf1.Cache.enable_cache(CACHE_DIR)
logging.getLogger('fastf1').setLevel(logging.ERROR)

def rellenador_universal():
    print(f"üåç INICIANDO ESCANEO COMPLETO ({START_YEAR}-{END_YEAR})...")
    
    if not os.path.exists(OUTPUT_FILE):
        print("‚ùå No existe el archivo base. Ejecuta el descargador inicial.")
        return

    try:
        df = pd.read_csv(OUTPUT_FILE)
    except:
        print("‚ùå Archivo CSV corrupto.")
        return
    
    total_recuperadas = 0

    for year in range(START_YEAR, END_YEAR + 1):
        print(f"\n--- Analizando {year} ---")
        
        try:
            try:
                schedule = fastf1.get_event_schedule(year, include_testing=False)
            except:
                time.sleep(5)
                schedule = fastf1.get_event_schedule(year, include_testing=False)

            schedule['Session5Date'] = pd.to_datetime(schedule['Session5Date'], utc=True)
            schedule['EventDate'] = pd.to_datetime(schedule['EventDate'], utc=True)
            schedule['RaceDate'] = schedule['Session5Date'].fillna(schedule['EventDate'])
            
            now = pd.Timestamp.now(tz='UTC')
            oficiales = schedule[schedule['RaceDate'] < now]
            
            if oficiales.empty:
                print("   (Sin carreras disputadas)")
                continue

            rondas_teoricas = set(oficiales['RoundNumber'].unique())
            
            if year in df['year'].unique():
                rondas_reales = set(df[df['year'] == year]['round'].unique())
            else:
                rondas_reales = set()
            
            faltantes = sorted(list(rondas_teoricas - rondas_reales))
            
            if not faltantes:
                print(f"Completo ({len(rondas_reales)} carreras).")
                continue
            
            print(f"‚ö†Ô∏è FALTAN RONDAS: {faltantes}")
            print("   üîß Descargando...")
            
            nuevas_filas = []
            
            for r in faltantes:
                gp_info = oficiales[oficiales['RoundNumber'] == r]
                gp_name = gp_info['EventName'].values[0] if not gp_info.empty else f"R{r}"
                date_val = gp_info['RaceDate'].values[0] if not gp_info.empty else None
                
                print(f"      + Recuperando R{r} ({gp_name})...", end=" ")
                
                try:
                    time.sleep(5)
                    
                    session = fastf1.get_session(year, r, 'R')
                    session.load(telemetry=False, weather=False, messages=False)
                    res = session.results
                    
                    if res.empty:
                        print("‚ùå Vac√≠o")
                        continue

                    for _, d in res.iterrows():
                        code = d.get('Abbreviation', '')
                        if not code: code = (d.get('Surname', '')[:3] or 'UNK').upper()
                            
                        nuevas_filas.append({
                            'year': year,
                            'round': r,
                            'date': date_val,
                            'circuit_name': gp_name,
                            'driver_code': code,
                            'driver_number': d.get('DriverNumber', 0),
                            'team_name': d.get('TeamName', 'Unknown'),
                            'grid_position': d.get('GridPosition', 0),
                            'classified_position': d.get('ClassifiedPosition', 'R'),
                            'status': d.get('Status', 'Finished'),
                            'points_real': d.get('Points', 0.0)
                        })
                    print("‚úÖ")
                
                except Exception as e:
                    print(f"‚ö†Ô∏è Error: {e}")
                    time.sleep(10)
            
            if nuevas_filas:
                df_new = pd.DataFrame(nuevas_filas)
                df_new.to_csv(OUTPUT_FILE, mode='a', header=False, index=False)
                total_recuperadas += len(faltantes)
                print(f"   üíæ Bloque guardado.")

        except Exception as e:
            print(f"‚ùå Error en a√±o {year}: {e}")

    print(f"\nüèÅ PROCESO FINALIZADO. Se han a√±adido {total_recuperadas} carreras nuevas.")
    
    if total_recuperadas > 0:
        print("üßπ Reordenando dataset final...")
        try:
            df_final = pd.read_csv(OUTPUT_FILE)
            df_final.drop_duplicates(subset=['year', 'round', 'driver_code'], inplace=True)
            df_final.sort_values(by=['year', 'round', 'classified_position'], inplace=True)
            df_final.to_csv(OUTPUT_FILE, index=False)
            print("‚ú® Dataset Maestro Actualizado y Ordenado.")
        except:
            print("‚ö†Ô∏è No se pudo reordenar autom√°ticamente.")

if __name__ == "__main__":
    rellenador_universal()

üåç INICIANDO ESCANEO COMPLETO (2000-2025)...

--- Analizando 2000 ---
‚úÖ Completo (17 carreras).

--- Analizando 2001 ---
‚úÖ Completo (17 carreras).

--- Analizando 2002 ---
‚úÖ Completo (17 carreras).

--- Analizando 2003 ---
‚úÖ Completo (16 carreras).

--- Analizando 2004 ---
‚úÖ Completo (18 carreras).

--- Analizando 2005 ---
‚úÖ Completo (19 carreras).

--- Analizando 2006 ---
‚úÖ Completo (18 carreras).

--- Analizando 2007 ---
‚úÖ Completo (17 carreras).

--- Analizando 2008 ---
‚úÖ Completo (18 carreras).

--- Analizando 2009 ---
‚úÖ Completo (17 carreras).

--- Analizando 2010 ---
‚úÖ Completo (19 carreras).

--- Analizando 2011 ---
‚úÖ Completo (19 carreras).

--- Analizando 2012 ---
‚úÖ Completo (20 carreras).

--- Analizando 2013 ---
‚úÖ Completo (19 carreras).

--- Analizando 2014 ---
‚úÖ Completo (19 carreras).

--- Analizando 2015 ---
‚úÖ Completo (19 carreras).

--- Analizando 2016 ---
‚úÖ Completo (21 carreras).

--- Analizando 2017 ---
‚úÖ Completo (20 carreras).

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  schedule['Session5Date'] = pd.to_datetime(schedule['Session5Date'], utc=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  schedule['EventDate'] = pd.to_datetime(schedule['EventDate'], utc=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  schedule['RaceDate'] = schedule['Session5Date'].fillna(

‚úÖ Completo (17 carreras).

--- Analizando 2021 ---
‚úÖ Completo (22 carreras).

--- Analizando 2022 ---


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  schedule['Session5Date'] = pd.to_datetime(schedule['Session5Date'], utc=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  schedule['EventDate'] = pd.to_datetime(schedule['EventDate'], utc=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  schedule['RaceDate'] = schedule['Session5Date'].fillna(

‚úÖ Completo (22 carreras).

--- Analizando 2023 ---


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  schedule['Session5Date'] = pd.to_datetime(schedule['Session5Date'], utc=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  schedule['EventDate'] = pd.to_datetime(schedule['EventDate'], utc=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  schedule['RaceDate'] = schedule['Session5Date'].fillna(

‚úÖ Completo (22 carreras).

--- Analizando 2024 ---
‚úÖ Completo (24 carreras).

--- Analizando 2025 ---


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  schedule['Session5Date'] = pd.to_datetime(schedule['Session5Date'], utc=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  schedule['EventDate'] = pd.to_datetime(schedule['EventDate'], utc=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  schedule['RaceDate'] = schedule['Session5Date'].fillna(

‚ö†Ô∏è FALTAN RONDAS: [np.int64(10), np.int64(11), np.int64(12), np.int64(13), np.int64(14), np.int64(15), np.int64(16), np.int64(17), np.int64(18), np.int64(19), np.int64(20), np.int64(21), np.int64(22)]
   üîß Descargando...
      + Recuperando R10 (Canadian Grand Prix)... ‚úÖ
      + Recuperando R11 (Austrian Grand Prix)... ‚úÖ
      + Recuperando R12 (British Grand Prix)... ‚úÖ
      + Recuperando R13 (Belgian Grand Prix)... ‚úÖ
      + Recuperando R14 (Hungarian Grand Prix)... ‚úÖ
      + Recuperando R15 (Dutch Grand Prix)... ‚úÖ
      + Recuperando R16 (Italian Grand Prix)... ‚úÖ
      + Recuperando R17 (Azerbaijan Grand Prix)... ‚úÖ
      + Recuperando R18 (Singapore Grand Prix)... ‚úÖ
      + Recuperando R19 (United States Grand Prix)... ‚úÖ
      + Recuperando R20 (Mexico City Grand Prix)... ‚úÖ
      + Recuperando R21 (S√£o Paulo Grand Prix)... ‚úÖ
      + Recuperando R22 (Las Vegas Grand Prix)... ‚úÖ
   üíæ Bloque guardado.

üèÅ PROCESO FINALIZADO. Se han a√±adido 13 carr

In [2]:
import pandas as pd
df = pd.read_csv('f1_dataset_base.csv')

print("PRIMERAS FILAS:")
print(df.head(10))

print("\n√öLTIMAS FILAS:")
print(df.tail(10))

print("\nINFO GENERAL:")
print(df.info())

print("\nESTAD√çSTICAS:")
print(df.describe())

print("\nVALORES √öNICOS POR COLUMNA:")
for col in df.columns:
    print(f"{col}: {df[col].nunique()} valores √∫nicos")

print("\nVALORES NULOS:")
print(df.isnull().sum())

print("\nTEMPORADAS DISPONIBLES:")
print(sorted(df['year'].unique()))

print("\nCARRERAS POR TEMPORADA:")
print(df.groupby('year')['round'].max().sort_index())

PRIMERAS FILAS:
   year  round                       date           circuit_name driver_code  \
0  2000      1  2000-03-12 00:00:00+00:00  Australian Grand Prix         MSC   
1  2000      1  2000-03-12 00:00:00+00:00  Australian Grand Prix         BAR   
2  2000      1  2000-03-12 00:00:00+00:00  Australian Grand Prix         SCH   
3  2000      1  2000-03-12 00:00:00+00:00  Australian Grand Prix         VIL   
4  2000      1  2000-03-12 00:00:00+00:00  Australian Grand Prix         FIS   
5  2000      1  2000-03-12 00:00:00+00:00  Australian Grand Prix         ZON   
6  2000      1  2000-03-12 00:00:00+00:00  Australian Grand Prix         WUR   
7  2000      1  2000-03-12 00:00:00+00:00  Australian Grand Prix         NaN   
8  2000      1  2000-03-12 00:00:00+00:00  Australian Grand Prix         HEI   
9  2000      1  2000-03-12 00:00:00+00:00  Australian Grand Prix         BUT   

   driver_number team_name  grid_position classified_position    status  \
0              3   Ferrari  

In [3]:
df

Unnamed: 0,year,round,date,circuit_name,driver_code,driver_number,team_name,grid_position,classified_position,status,points_real
0,2000,1,2000-03-12 00:00:00+00:00,Australian Grand Prix,MSC,3,Ferrari,3.0,1,Finished,10.0
1,2000,1,2000-03-12 00:00:00+00:00,Australian Grand Prix,BAR,4,Ferrari,4.0,2,Finished,6.0
2,2000,1,2000-03-12 00:00:00+00:00,Australian Grand Prix,SCH,9,Williams,11.0,3,Finished,4.0
3,2000,1,2000-03-12 00:00:00+00:00,Australian Grand Prix,VIL,22,BAR,8.0,4,Finished,3.0
4,2000,1,2000-03-12 00:00:00+00:00,Australian Grand Prix,FIS,11,Benetton,9.0,5,Finished,2.0
...,...,...,...,...,...,...,...,...,...,...,...
9980,2025,22,2025-11-23 04:00:00,Las Vegas Grand Prix,NOR,4,McLaren,1.0,D,Disqualified,0.0
9981,2025,22,2025-11-23 04:00:00,Las Vegas Grand Prix,PIA,81,McLaren,5.0,D,Disqualified,0.0
9982,2025,22,2025-11-23 04:00:00,Las Vegas Grand Prix,ALB,23,Williams,16.0,R,Retired,0.0
9983,2025,22,2025-11-23 04:00:00,Las Vegas Grand Prix,BOR,5,Kick Sauber,18.0,R,Retired,0.0


In [4]:
import pandas as pd

df = pd.read_csv('f1_dataset_base.csv')

print("="*50)
print("INFORMACI√ìN GENERAL")
print("="*50)
print(f"Filas: {len(df)}")
print(f"Columnas: {list(df.columns)}")
print(f"A√±os: {df['year'].min()} - {df['year'].max()}")

print("\n" + "="*50)
print("PRIMERAS 20 FILAS")
print("="*50)
print(df.head(20).to_string())

print("\n" + "="*50)
print("√öLTIMAS 20 FILAS")
print("="*50)
print(df.tail(20).to_string())

print("\n" + "="*50)
print("CARRERAS POR TEMPORADA")
print("="*50)
print(df.groupby('year')['round'].max().to_string())

print("\n" + "="*50)
print("VALORES NULOS")
print("="*50)
print(df.isnull().sum())

print("\n" + "="*50)
print("TIPOS DE DATOS")
print("="*50)
print(df.dtypes)

INFORMACI√ìN GENERAL
Filas: 9985
Columnas: ['year', 'round', 'date', 'circuit_name', 'driver_code', 'driver_number', 'team_name', 'grid_position', 'classified_position', 'status', 'points_real']
A√±os: 2000 - 2025

PRIMERAS 20 FILAS
    year  round                       date           circuit_name driver_code  driver_number team_name  grid_position classified_position      status  points_real
0   2000      1  2000-03-12 00:00:00+00:00  Australian Grand Prix         MSC              3   Ferrari            3.0                   1    Finished         10.0
1   2000      1  2000-03-12 00:00:00+00:00  Australian Grand Prix         BAR              4   Ferrari            4.0                   2    Finished          6.0
2   2000      1  2000-03-12 00:00:00+00:00  Australian Grand Prix         SCH              9  Williams           11.0                   3    Finished          4.0
3   2000      1  2000-03-12 00:00:00+00:00  Australian Grand Prix         VIL             22       BAR            8