# Pandas

Pandas es otra librería que como fue mencionado anteriormente utiliza numpy como
parte de su core. Pandas tiene 2 objetos principales: **Series** y **Dataframes**

In [1]:
!pip install pandas

Collecting pandas
  Downloading pandas-2.2.3-cp39-cp39-macosx_11_0_arm64.whl.metadata (89 kB)
Collecting pytz>=2020.1 (from pandas)
  Downloading pytz-2024.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas)
  Downloading tzdata-2024.2-py2.py3-none-any.whl.metadata (1.4 kB)
Downloading pandas-2.2.3-cp39-cp39-macosx_11_0_arm64.whl (11.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.3/11.3 MB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading pytz-2024.2-py2.py3-none-any.whl (508 kB)
Downloading tzdata-2024.2-py2.py3-none-any.whl (346 kB)
Installing collected packages: pytz, tzdata, pandas
Successfully installed pandas-2.2.3 pytz-2024.2 tzdata-2024.2


In [1]:
# Importar la librería
import pandas as pd

In [2]:
# Version
print(f"Pandas version: {pd.__version__}")

Pandas version: 2.2.3


## Pandas Series

Una serie es un array de una dimensión. Es similar a los array de Numpy con la
diferencia de tener los índices etiquetados

### Creación de una Serie

In [4]:
# Creación de una serie a partir de una lista
data = [10, 20, 30, 40, 50]
series = pd.Series(data, name="Lista ejemplo")

series

0    10
1    20
2    30
3    40
4    50
Name: Lista ejemplo, dtype: int64

In [5]:
type(series)

pandas.core.series.Series

In [6]:
import numpy as np

array = np.array([1, 2, 3, 4, 5])

In [7]:
serie_np = pd.Series(array, name="Array numpy")

serie_np

0    1
1    2
2    3
3    4
4    5
Name: Array numpy, dtype: int64

In [8]:
serie_np.mean()

np.float64(3.0)

## Pandas DataFrame

Un Dataframe es un arreglo de dos dimensiones, similar a lo que conocemos a una
tabla con filas y columnas (se puede asemejar a una hoja de cálculo de excel)

### Creación de un Dataframe

#### A partir de Numpy

In [9]:
names = np.array(["Alice", "Bob", "Charlie"])
ages = np.array([25, 30, 35])
cities = np.array(["New York", "Los Angeles", "Chicago"])

In [11]:
df = pd.DataFrame({"Name": names, "Age": ages, "City": cities})

df

Unnamed: 0,Name,Age,City
0,Alice,25,New York
1,Bob,30,Los Angeles
2,Charlie,35,Chicago


In [None]:
df = pd.DataFrame({"Age": ages, "City": cities}, index=names)

df

Unnamed: 0,Age,City
Alice,25,New York
Bob,30,Los Angeles
Charlie,35,Chicago


In [12]:
df

Unnamed: 0,Name,Age,City
0,Alice,25,New York
1,Bob,30,Los Angeles
2,Charlie,35,Chicago


In [13]:
df2 = df
df2

Unnamed: 0,Name,Age,City
0,Alice,25,New York
1,Bob,30,Los Angeles
2,Charlie,35,Chicago


In [18]:
hex(id(df))

'0x10c4a03d0'

In [19]:
hex(id(df2))

'0x10c44e3d0'

In [17]:
df2 = df.copy()

#### A partir de Diccionarios

In [None]:
data = {
    "Name": ["Alice", "Bob", "Charlie"],
    "Age": [25, 30, 35],
    "City": ["New York", "Los Angeles", "Chicago"],
}
df = pd.DataFrame(data)
df

Unnamed: 0,Name,Age,City
0,Alice,25,New York
1,Bob,30,Los Angeles
2,Charlie,35,Chicago


In [22]:
df = pd.DataFrame(
    [
        {"Name": "Alice", "Age": 25, "City": "New York"},
        {"Name": "Bob", "Age": 30, "City": "Los Angeles"},
        {"Name": "Charlie", "Age": 35, "City": "Chicago"},
    ]
)

df

Unnamed: 0,Name,Age,City
0,Alice,25,New York
1,Bob,30,Los Angeles
2,Charlie,35,Chicago


#### A partir de archivos CSV

In [25]:
countries_csv = pd.read_csv("md_country.csv")
countries_csv.head()

Unnamed: 0,uuid_country,country_name_es,country_name_en,alpha2,alpha3,continent,calling_code,ofac,created_at,created_by,updated_at,updated_by
0,b26509d7-7a09-56de-ae9d-5b64febfad57,AFGANISTÁN,AFGHANISTAN,AF,AFG,ASIA,93,False,2024-03-05 11:12:13.848,bd45e290-c5bc-5a2c-80bf-2372f0d822e7,2024-03-05 11:12:13.848,bd45e290-c5bc-5a2c-80bf-2372f0d822e7
1,960d00d2-38e2-57e7-bb13-b828555cd815,ALBANIA,ALBANIA,AL,ALB,EUROPA,355,False,2024-03-05 11:12:13.848,bd45e290-c5bc-5a2c-80bf-2372f0d822e7,2024-03-05 11:12:13.848,bd45e290-c5bc-5a2c-80bf-2372f0d822e7
2,7492b603-b7ec-5c15-b4b8-5c05397b68fd,ALEMANIA,GERMANY,DE,DEU,EUROPA,49,False,2024-03-05 11:12:13.848,bd45e290-c5bc-5a2c-80bf-2372f0d822e7,2024-03-05 11:12:13.848,bd45e290-c5bc-5a2c-80bf-2372f0d822e7
3,a351dfdf-7895-5563-8c76-07e769d49227,ANDORRA,ANDORRA,AD,AND,EUROPA,376,False,2024-03-05 11:12:13.848,bd45e290-c5bc-5a2c-80bf-2372f0d822e7,2024-03-05 11:12:13.848,bd45e290-c5bc-5a2c-80bf-2372f0d822e7
4,87bcfa97-f33b-5d29-b73b-b5c82be398de,ANGOLA,ANGOLA,AO,AGO,AFRICA,244,False,2024-03-05 11:12:13.848,bd45e290-c5bc-5a2c-80bf-2372f0d822e7,2024-03-05 11:12:13.848,bd45e290-c5bc-5a2c-80bf-2372f0d822e7


#### A partir de archivos JSON

In [26]:
countries_json = pd.read_json("md_port_0.json")
countries_json.head()

Unnamed: 0,uuid_port,locode,port_name,type_name,country_uuid,created_at,created_by,updated_at,updated_by
0,c940ebda-58ce-5f06-866e-05e85c856420,NLTHE,'T HORNTJE,MARITIME PORT,905a4460-6733-51d4-9301-925c3fa6d853,2023-01-01 05:00:00+00:00,bd45e290-c5bc-5a2c-80bf-2372f0d822e7,2023-01-01 05:00:00+00:00,bd45e290-c5bc-5a2c-80bf-2372f0d822e7
1,ed378e00-72c5-5d82-84a0-538a2c5b8419,CAHOS,100 MILE HOUSE (ONE HUNDRED MILE HO,AIR PORT,32894402-6493-5a21-8fed-f27d9e60ad06,2024-06-05 21:22:00.312000+00:00,bd45e290-c5bc-5a2c-80bf-2372f0d822e7,2024-06-05 21:22:00.312000+00:00,bd45e290-c5bc-5a2c-80bf-2372f0d822e7
2,cafc2f84-26c1-5479-8de2-ef45cc850346,ESAXO,A BAIUCA,MARITIME PORT,182f7f17-dcbf-5151-8ebe-ea841e58830f,2024-06-05 21:22:00.312000+00:00,bd45e290-c5bc-5a2c-80bf-2372f0d822e7,2024-06-05 21:22:00.312000+00:00,bd45e290-c5bc-5a2c-80bf-2372f0d822e7
3,48c1729f-3b50-5f1f-8c03-cd52a3bd068d,NOAAA,A I LOFOTEN,MARITIME PORT,c3e7bdfd-74a5-5451-a586-2e558c74bc5b,2024-06-05 21:22:00.312000+00:00,bd45e290-c5bc-5a2c-80bf-2372f0d822e7,2024-06-05 21:22:00.312000+00:00,bd45e290-c5bc-5a2c-80bf-2372f0d822e7
4,b4748cd5-80c4-5222-b501-5aaadae381e1,DEAAH,AACHEN,AIR PORT,7492b603-b7ec-5c15-b4b8-5c05397b68fd,2024-06-05 21:22:00.312000+00:00,bd45e290-c5bc-5a2c-80bf-2372f0d822e7,2024-06-05 21:22:00.312000+00:00,bd45e290-c5bc-5a2c-80bf-2372f0d822e7


In [None]:
data = {
    "Name": ["Alice", "Bob", "Charlie"],
    "Age": [25, 30, 35],
    "City": ["New York", "Los Angeles", "Chicago"],
}
df = pd.DataFrame(data)
df

Unnamed: 0,Name,Age,City
0,Alice,25,New York
1,Bob,30,Los Angeles
2,Charlie,35,Chicago


In [28]:
!pip install openpyxl

Collecting openpyxl
  Downloading openpyxl-3.1.5-py2.py3-none-any.whl.metadata (2.5 kB)
Collecting et-xmlfile (from openpyxl)
  Using cached et_xmlfile-1.1.0-py3-none-any.whl.metadata (1.8 kB)
Downloading openpyxl-3.1.5-py2.py3-none-any.whl (250 kB)
Using cached et_xmlfile-1.1.0-py3-none-any.whl (4.7 kB)
Installing collected packages: et-xmlfile, openpyxl
Successfully installed et-xmlfile-1.1.0 openpyxl-3.1.5


In [29]:
excel = pd.read_excel("fichero.xlsx", sheet_name="Sheet1")

In [30]:
excel

Unnamed: 0,nombre,apellido,edad
0,Pepito,Perez,12
1,Carlitos,Camargo,11


## Explorando DataFrames
Pandas provee varios métodos para explorar los datos:

- `.head()`: Presenta los primeros registros
- `.describe()`: Nos presenta un resumen estadístico de los datos
- `.info()`: Provee un resumen conciso del Dataframe


In [20]:
# Imprimir las 2 primeras filas
print(df.head(2))

    Name  Age         City
0  Alice   25     New York
1    Bob   30  Los Angeles


In [18]:
# Imprimir el resumen estadístico
print(df.describe())

        Age
count   3.0
mean   30.0
std     5.0
min    25.0
25%    27.5
50%    30.0
75%    32.5
max    35.0


In [19]:
# Imprimir un resumen conciso
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Name    3 non-null      object
 1   Age     3 non-null      int64 
 2   City    3 non-null      object
dtypes: int64(1), object(2)
memory usage: 200.0+ bytes
None


## Accediendo a las columnas
Pandas nos permite acceder a los datos de una columna o más si es necesario

### Accediendo a una columna

In [9]:
df["Name"]

0      Alice
1        Bob
2    Charlie
Name: Name, dtype: object

In [32]:
df[["Age", "City", "Name"]]

Unnamed: 0,Age,City,Name
0,25,New York,Alice
1,30,Los Angeles,Bob
2,35,Chicago,Charlie


## Modificando un DataFrame

### Creando una nueva columna

In [33]:
df["Score"] = [88, 92, 85]
df

Unnamed: 0,Name,Age,City,Score
0,Alice,25,New York,88
1,Bob,30,Los Angeles,92
2,Charlie,35,Chicago,85


In [34]:
df["New_Score"] = df["Score"] + 2
df

Unnamed: 0,Name,Age,City,Score,New_Score
0,Alice,25,New York,88,90
1,Bob,30,Los Angeles,92,94
2,Charlie,35,Chicago,85,87


## Filtrando los datos

### Filtrando basado en una condición


In [26]:
df[df["Age"] > 28]

Unnamed: 0,Name,Age,City,Score,New_Score
1,Bob,30,Los Angeles,92,94
2,Charlie,35,Chicago,85,87


In [27]:
df[(df["Age"] > 28) & (df["City"] == "Chicago")]

Unnamed: 0,Name,Age,City,Score,New_Score
2,Charlie,35,Chicago,85,87


## Actividad

Haciendo uso de pandas resolver los siguientes puntos:
1. Cargar el dataset que se encuentra en el siguiente link https://raw.githubusercontent.com/hrodriguezgi/computacion_fisica/refs/heads/main/Clase_21/motion_data.csv a un dataframe de pandas.
2. Calcular el desplazamiento de cada uno de los objetos
3. Graficar desplazamiento vs tiempo para cada uno de los objetos
4. Calcular la energía cinética de los objetos teniendo en cuenta que:
    - Ball: $0.5 \, Kg$
    - Car: $1.5 \, Kg$
5. Cuál es la máxima velocidad de cada objeto?