# Ejercicios de pandas
Notebook con ejemplos prácticos para cada punto.

## 1. Introducción a pandas

In [2]:
import pandas as pd
import numpy as np
pd.__version__

'2.2.2'

## 2. Series y DataFrames

In [11]:
s = pd.Series([10, 20, 30], index=['a','b','c'])

print(s.head(1))
print(s.tail(1))
print(s.shape)
print(s.index)
print(s.info)
print(s.describe())
print(s.dtypes)


a    10
dtype: int64
c    30
dtype: int64
(3,)
Index(['a', 'b', 'c'], dtype='object')
<bound method Series.info of a    10
b    20
c    30
dtype: int64>
count     3.0
mean     20.0
std      10.0
min      10.0
25%      15.0
50%      20.0
75%      25.0
max      30.0
dtype: float64
int64


In [7]:
df = pd.DataFrame({'pais':['MX','AR','MX'], 'ventas':[100,200,150]})

print(df.head(1))
print(df.tail(1))
print(df.shape)
print(df.index)
print(df.info)
print(df.describe())
print(df.dtypes)


  pais  ventas
0   MX     100
  pais  ventas
2   MX     150
(3, 2)


## 3. Importación y limpieza

In [18]:
url="https://github.com/Twoeme/Prueba2/raw/refs/heads/main/Food_Supply_Quantity_kg_Data.csv"
df = pd.read_csv(url, sep=";")
df.head()

Unnamed: 0,"Country,""Alcoholic Beverages"",""Animal fats"",""Animal Products"",""Aquatic Products, Other"",""Cereals - Excluding Beer"",""Eggs"",""Fish, Seafood"",""Fruits - Excluding Wine"",""Meat"",""Milk - Excluding Butter"",""Miscellaneous"",""Offals"",""Oilcrops"",""Pulses"",""Spices"",""Starchy Roots"",""Stimulants"",""Sugar & Sweeteners"",""Sugar Crops"",""Treenuts"",""Vegetable Oils"",""Vegetables"",""Vegetal Products"",""Obesity"",""Undernourished"",""Confirmed"",""Deaths"",""Recovered"",""Active"",""Population"",""Unit (all except Population)"""
0,"Afghanistan,0.0014,0.1973,9.4341,0,24.8097,0.2..."
1,"Albania,1.6719,0.1357,18.7684,0,5.7817,0.5815,..."
2,"Algeria,0.2711,0.0282,9.6334,0,13.6816,0.5277,..."
3,"Angola,5.8087,0.056,4.9278,0,9.1085,0.0587,1.7..."
4,"Antigua and Barbuda,3.5764,0.0087,16.6613,0,5...."
5,"Argentina,4.2672,0.2234,19.3454,0,8.4102,0.997..."
6,"Armenia,0.4014,0.1833,13.564,0,7.2982,0.5783,0..."
7,"Australia,5.5436,0.3143,21.4175,0.0033,5.4979,..."
8,"Austria,7.0215,0.8555,19.5654,0.0011,6.2116,0...."
9,"Azerbaijan,3.5969,0.2544,11.6416,0,13.0898,0.5..."


#Limpieza de datos
•	Valores faltantes

•	Tipos incorrectos

•	Errores de formato

•	Nombres de columnas inconsistentes


In [None]:
df.isna()
df.isna().sum()

df.dropna(how='all', subset=["col1"])
df.dropna(how='any')
df.fillna(0)
#df.fillna(method="bfill", "ffill")
#Rellenar con un número
#Rellenar con la media de la columna
#Rellenar con el valor anterior (method="ffill")
#Rellenar con el siguiente (method="bfill")


In [None]:
df = pd.DataFrame({'col':[1,2,np.nan,4], 'duplicado':[1,1,2,2]})
df

In [None]:
df.duplicated()
df.drop_duplicates()
#subset=["col1","col2"] → considerar duplicadas solo esas columnas
#keep="first" o keep="last"


In [None]:
df.rename(columns={"old":"new"})

In [None]:
df.columns = df.columns.str.lower()

In [None]:
df.columns = df.columns.str.replace(" ", "_")

In [None]:
df.columns = df.columns.str.replace(" ", "_")

In [None]:
df["col"] = df["col"].astype(int)
df["fecha"] = pd.to_datetime(df["fecha"])


In [None]:
df["col"] = df["col"].astype("category") #No es buena practica

# Flujo típico de limpieza
1.	Cargar archivo (CSV, Excel, JSON).

2.	Explorar estructura con .head(), .info(), .describe().

3.	Renombrar columnas.

4.	Tratar valores nulos.

5.	Eliminar duplicados.

6.	Arreglar tipos de datos.

7.	Verificar resultados.


## 4. Selección y filtrado

In [None]:
df = pd.DataFrame({'A':[1,2,3,4], 'B':[10,20,30,40]}, index=['a','b','c','d'])
df.loc['b':'d']

In [None]:
df.iloc[1:3]

In [None]:
df[df['A']>2]

## 5. Transformación de datos

In [None]:
df = pd.DataFrame({'A':[1,2,3], 'B':[10,20,30]})
df['C']=df['A']+df['B']
df

In [None]:
df.apply(lambda x: x*2)

## 6. Agrupaciones y análisis

In [None]:
df = pd.DataFrame({'pais':['MX','MX','AR'], 'ventas':[100,200,300]})
df.groupby('pais').sum()

In [None]:
df.groupby('pais').agg({'ventas':['sum','mean']})

## 7. Unión y combinación

In [None]:
df1 = pd.DataFrame({'id':[1,2], 'A':[10,20]})
df2 = pd.DataFrame({'id':[1,3], 'B':[100,300]})
pd.merge(df1, df2, on='id', how='outer')

## 8. Visualización básica (conceptual)

Los siguientes comandos funcionarían si matplotlib estuviera configurado:
```
df['A'].plot()
```

## 9. Optimización

In [None]:
df = pd.DataFrame({'x':np.random.randint(0,100,size=1000)})
df.info()

In [None]:
df['x'] = df['x'].astype('int32')
df.info()

## 10. Mini proyecto final

In [None]:
df = pd.DataFrame({'pais':['MX','AR','MX','CL'], 'ventas':[100,200,150,300]})
df