# 📝 Ejercicios Prácticos - Pandas

## Dataset: Reservas de Viajes en Uber/Taxi
Este cuaderno contiene ejercicios resueltos para practicar Pandas.

In [None]:
import pandas as pd

## 📥 Parte 1: Carga y Exploración Inicial

In [None]:
df_rides = pd.read_csv('ncr_ride_bookings.csv')
df_rides.head()

In [None]:
df_rides.head(10)

In [None]:
df_rides.info()

In [None]:
df_rides.shape

In [None]:
df_rides.describe()

In [None]:
(df_rides.isnull().mean() * 100)[lambda x: x > 5]

## 🔍 Parte 2: Filtrado y Selección

In [None]:
df_selected = df_rides[['Booking ID','Vehicle Type','Booking Status','Booking Value','Ride Distance']]
df_selected.head()

In [None]:
df_filtered = df_rides[
    (df_rides['Booking Status']=='Completed') &
    (df_rides['Booking Value']>500) &
    (df_rides['Vehicle Type'].isin(['Auto','Go Mini']))
]
df_filtered.head()

In [None]:
df_rides['Date'] = pd.to_datetime(df_rides['Date'], errors='coerce')
df_rides['Month_Name'] = df_rides['Date'].dt.month_name(locale='es_ES')
df_rides[['Date','Month_Name']].head()

In [None]:
df_rides[df_rides['Date'].dt.month==8].sort_values('Booking Value',ascending=False).head()

## 📊 Parte 3: value_counts y unique

In [None]:
df_rides['Vehicle Type'].value_counts()

In [None]:
df_rides['Payment Method'].value_counts().head(3)

In [None]:
(df_rides['Booking Status'].value_counts(normalize=True)*100)

## 📐 Parte 4: Estadísticas y Operaciones

In [None]:
completed = df_rides[df_rides['Booking Status']=='Completed']
completed['Booking Value'].agg(['mean','median','std'])

In [None]:
df_rides['Price_Per_Km'] = df_rides['Booking Value']/df_rides['Ride Distance']
df_rides[['Booking Value','Ride Distance','Price_Per_Km']].head()

In [None]:
df_rides.nlargest(5,'Ride Distance')[['Customer ID','Vehicle Type','Ride Distance','Booking Value']]

In [None]:
df_rides.groupby('Payment Method')['Booking Value'].sum()

## 🔄 Parte 5: Transformación de Datos

In [None]:
df_rides['Booking Value'].fillna(df_rides['Booking Value'].median(), inplace=True)
df_rides['Payment Method'].fillna('Unknown', inplace=True)
df_rides.dropna(subset=['Vehicle Type'], inplace=True)

In [None]:
df_rides.rename(columns={'Booking ID':'ID_Reserva','Vehicle Type':'Tipo_Vehiculo','Booking Value':'Valor_Reserva'}, inplace=True)
df_rides.head()

In [None]:
def cat_rating(x):
    if pd.isna(x): return 'Sin valorar'
    if x>=4.5: return 'Excelente'
    if x>=4.0: return 'Bueno'
    if x>=3.5: return 'Regular'
    return 'Malo'
df_rides['Rating_Category'] = df_rides['Driver Ratings'].apply(cat_rating)
df_rides[['Driver Ratings','Rating_Category']].head()

In [None]:
df_rides['Tipo_Vehiculo'] = df_rides['Tipo_Vehiculo'].replace({
    'eBike':'Moto Eléctrica',
    'Bike':'Moto'
})
df_rides['Tipo_Vehiculo'] = df_rides['Tipo_Vehiculo'].str.replace('Sedan','Sedan',regex=False)
df_rides['Tipo_Vehiculo'].unique()

## 📈 Parte 6: Agrupación y Análisis Avanzado

In [None]:
df_rides.groupby('Tipo_Vehiculo').agg({
    'Valor_Reserva':'mean',
    'ID_Reserva':'count',
    'Ride Distance':'sum'
})

In [None]:
pd.pivot_table(df_rides, index='Tipo_Vehiculo', columns='Booking Status', values='Valor_Reserva', aggfunc='mean')

In [None]:
df_rides.groupby([df_rides['Date'].dt.month,'Tipo_Vehiculo']).agg(
    total_reservas=('Valor_Reserva','sum'),
    num_viajes=('ID_Reserva','count'),
    valor_promedio=('Valor_Reserva','mean')
)

In [None]:
df_rides.groupby('Pickup Location').agg(
    num_viajes=('ID_Reserva',lambda x:(df_rides.loc[x.index,'Booking Status']=='Completed').sum()),
    valor_promedio=('Valor_Reserva','mean'),
    rating_promedio=('Driver Ratings','mean')
).nlargest(5,'num_viajes')

## 🎯 Parte 7: Caso Práctico Final

In [None]:
print('='*60)
print('REPORTE DE ANÁLISIS DE VIAJES')
print('='*60)

print('\n1. Vehículo más rentable')
print(df_rides.groupby('Tipo_Vehiculo')['Valor_Reserva'].mean().sort_values(ascending=False).head(1))

print('\n2. Cancelaciones')
print(df_rides['Booking Status'].value_counts(normalize=True)*100)

print('\n3. Distancia promedio por vehículo')
print(df_rides.groupby('Tipo_Vehiculo')['Ride Distance'].mean())

print('\n4. Método de pago preferido')
print(df_rides['Payment Method'].value_counts().head(1))

print('\n5. Ratings por vehículo')
print(df_rides.groupby('Tipo_Vehiculo')['Driver Ratings'].mean())