In [3]:
import pandas as pd

In [4]:
#Series de Pandas
numeros = [3,4,5,6,7]
serie = pd.Series(numeros)
serie, type(serie)

(0    3
 1    4
 2    5
 3    6
 4    7
 dtype: int64,
 pandas.core.series.Series)

In [5]:
#DataFrames 
data = {
    "name": ["Alice", "Bob", "Charlie"],
    "age": [25, 30, 35],
    "city": ["New York", "London", "Paris"]
}
data, type(data)

({'name': ['Alice', 'Bob', 'Charlie'],
  'age': [25, 30, 35],
  'city': ['New York', 'London', 'Paris']},
 dict)

In [6]:
#Generar un DataFrame apartir de un diccionario
df = pd.DataFrame(data=data)
df

Unnamed: 0,name,age,city
0,Alice,25,New York
1,Bob,30,London
2,Charlie,35,Paris


In [7]:
#Exportar DataFrame
df.to_csv("data.csv")

In [8]:
#Importar DataFrame
import_df = pd.read_csv("data.csv", index_col=0)

import_df

Unnamed: 0,name,age,city
0,Alice,25,New York
1,Bob,30,London
2,Charlie,35,Paris


In [9]:
#Seleccionar una columna 

name = df["name"]
print(name, type(name))



0      Alice
1        Bob
2    Charlie
Name: name, dtype: object <class 'pandas.core.series.Series'>


In [10]:
#Seleccionar una o mas columnas 

df[["name", "age"]]

Unnamed: 0,name,age
0,Alice,25
1,Bob,30
2,Charlie,35


In [11]:
#Filtrar por Indice

fila = df.loc[2]
fila

name    Charlie
age          35
city      Paris
Name: 2, dtype: object

In [12]:
#Filtrar por Condicion

df[df["age"] > 28]

Unnamed: 0,name,age,city
1,Bob,30,London
2,Charlie,35,Paris


In [13]:
#Filtrar por una o mas Condiciones

filtrar = (df["name"].str.startswith("A")) & (df["age"] > 20)
df[filtrar]

Unnamed: 0,name,age,city
0,Alice,25,New York


In [14]:
#Filtrar por Query

df.query("age > 28")

Unnamed: 0,name,age,city
1,Bob,30,London
2,Charlie,35,Paris


In [15]:
df[df["name"].isin(["Bob", "Messi", "Charlie"])]


Unnamed: 0,name,age,city
1,Bob,30,London
2,Charlie,35,Paris


In [16]:
#Filtrar por Funcion

def longitud_5(name):
    return  len(name) == 5

df[df["name"].apply(longitud_5)]

Unnamed: 0,name,age,city
0,Alice,25,New York


In [17]:
#Filtrar por edad entre 30 y 35 (inclusive)

df[df["age"].between(30, 35)]

Unnamed: 0,name,age,city
1,Bob,30,London
2,Charlie,35,Paris


In [18]:
import numpy as np

In [21]:
data = {
    "name": ["Alice", "Bob", "Charlie"],
    "age": [25, np.nan, 35],
    "city": ["New York", None, "Paris"]
}
df = pd.DataFrame(data)
df

Unnamed: 0,name,age,city
0,Alice,25.0,New York
1,Bob,,
2,Charlie,35.0,Paris


In [22]:
#Rellenar los valores faltantes
df_fill = df.fillna(
    {
        "age": df["age"].mean(), 
        "city": "Desconocido"
    }
)
df_fill

Unnamed: 0,name,age,city
0,Alice,25.0,New York
1,Bob,30.0,Desconocido
2,Charlie,35.0,Paris


In [24]:
#Eliminar valores faltantes
df_sin_nan = df.dropna()
df_sin_nan

Unnamed: 0,name,age,city
0,Alice,25.0,New York
2,Charlie,35.0,Paris


In [27]:
#Remplazar valores faltantes
df_rem = df.replace(
    {
        "age" : {np.nan: 0},   #Cambiar el valor NaN por cero en la columna age
        'city':  {None: 'Desconocido'}
    }
)
df_rem

Unnamed: 0,name,age,city
0,Alice,25.0,New York
1,Bob,0.0,Desconocido
2,Charlie,35.0,Paris


In [29]:
#Interpolar datos

df_interpolado = df.copy()
df_interpolado["age"] =  df["age"].interpolate()
df_interpolado

Unnamed: 0,name,age,city
0,Alice,25.0,New York
1,Bob,30.0,
2,Charlie,35.0,Paris


In [42]:
#datos duplicados
data_duplicate = {
    "name": ["Ana","Alice", "Juan","Bob", "Charlie","Ana","Juan"],
    "age": [22, 25, 25, np.nan, 35, 22, 25],
    "city": ["Barcelona","New York", "Buenos Aires","London", None, "Barcelona", "Buenos Aires"]
}
df_duplicado = pd.DataFrame(data_duplicate)
df_duplicado

Unnamed: 0,name,age,city
0,Ana,22.0,Barcelona
1,Alice,25.0,New York
2,Juan,25.0,Buenos Aires
3,Bob,,London
4,Charlie,35.0,
5,Ana,22.0,Barcelona
6,Juan,25.0,Buenos Aires


In [41]:
#Eliminar Duplicados
df_sin_duplicados = df_duplicado.drop_duplicates()
df_sin_duplicados

Unnamed: 0,name,age,city
0,Ana,22.0,Barcelona
1,Alice,25.0,New York
2,Juan,25.0,Buenos Aires
3,Bob,,London
4,Charlie,35.0,


In [58]:
#Renombrar columnas

df_renombrar= df.rename(columns={"name":"Nombre", "age":"Edad","city":"Ciudad"})
df_renombrar



Unnamed: 0,Nombre,Edad,Ciudad
0,Alice,25.0,New York
1,Bob,,
2,Charlie,35.0,Paris


In [56]:

# Ordenar columnas
columnas_ordenadas = ["city", "name", "age"]
df_ordenado = df[columnas_ordenadas]
df_ordenado


Unnamed: 0,city,name,age
0,New York,Alice,25.0
1,,Bob,
2,Paris,Charlie,35.0


In [60]:
#Transformacion de datos

def cuadrado (x):
    return x**2

df["age_cuadrate"] = df["age"].apply(cuadrado)
df

Unnamed: 0,name,age,city,age_cuadrate
0,Alice,25.0,New York,625.0
1,Bob,,,
2,Charlie,35.0,Paris,1225.0
