In [2]:
import pandas as pd
import numpy as np

# Arreglo unidimensional (Lista)
pd.Series([1,2,3,np.nan,5,6])

0    1.0
1    2.0
2    3.0
3    NaN
4    5.0
5    6.0
dtype: float64

In [3]:
import pandas as pd
import numpy as np
dates = pd.date_range("20130101", periods=6)

# Extructura bidimensional (Tabla)
df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=list("ABCD"))
df

Unnamed: 0,A,B,C,D
2013-01-01,0.55759,0.517302,0.753919,-0.746625
2013-01-02,-0.791205,0.501864,1.684047,-0.305148
2013-01-03,0.512328,0.234572,0.553979,-0.417111
2013-01-04,0.191143,-0.363964,-0.433105,-0.88841
2013-01-05,1.521654,-1.355512,0.786137,0.409917
2013-01-06,0.418564,-1.063125,0.059928,-0.067552


In [4]:
# La tabla se puede armar con diferentes tipos haciendola mas dinamica
df2 = pd.DataFrame(
    {
        "A": 1.0,
        "B": pd.Timestamp("20130102"),
        "C": pd.Series(1, index=list(range(4)), dtype="float32"),
        "D": np.array([3] * 4, dtype="int32"),
        "E": pd.Categorical(["test", "train", "test", "train"]),
        "F": "foo",
    }
)
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [5]:
# Tambien pidemos argumentar los tipos para analizar de una manera mas rapida
# Y asi encontrar la columna que nos interesa con mas rapidad.
df2.dtypes

A          float64
B    datetime64[s]
C          float32
D            int32
E         category
F           object
dtype: object

In [15]:
# Esto para ver el incio
df2.head(1)

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo


In [17]:
# Tail para ver el final
df2.tail(2)

Unnamed: 0,A,B,C,D,E,F
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [33]:
# Tambien podemos filtrar unicamente los indices con =>
df2.index

Index([0, 1, 2, 3], dtype='int64')

In [25]:
# Y filtrar unicamente las columnas con
df2.columns

Index(['A', 'B', 'C', 'D', 'E', 'F'], dtype='object')

In [35]:
# Transforma nuestro DataFrame en un array numpy
# Este array es mas compatible con otras librerias y ademas es mas potente al ser mal ligero
df2.to_numpy()

array([[1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'test', 'foo'],
       [1.0, Timestamp('2013-01-02 00:00:00'), 1.0, 3, 'train', 'foo']],
      dtype=object)

In [37]:
# Nos da un analisis basico de como esta distribuida nuestra tabla
df2.describe()

Unnamed: 0,A,B,C,D
count,4.0,4,4.0,4.0
mean,1.0,2013-01-02 00:00:00,1.0,3.0
min,1.0,2013-01-02 00:00:00,1.0,3.0
25%,1.0,2013-01-02 00:00:00,1.0,3.0
50%,1.0,2013-01-02 00:00:00,1.0,3.0
75%,1.0,2013-01-02 00:00:00,1.0,3.0
max,1.0,2013-01-02 00:00:00,1.0,3.0
std,0.0,,0.0,0.0


In [39]:
dates = pd.date_range("20130101", periods=6)

# Extructura bidimensional (Tabla)
df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=list("ABCD"))

df.T

Unnamed: 0,2013-01-01,2013-01-02,2013-01-03,2013-01-04,2013-01-05,2013-01-06
A,0.814051,0.644664,-1.874268,-0.226807,-1.381691,-0.058539
B,-0.476298,-1.277101,-0.186344,-2.105282,0.287866,0.586621
C,-0.23737,-0.129529,-0.65051,-0.420607,0.225346,-1.079269
D,0.22389,-0.591313,-0.318925,-0.07202,-0.983908,0.711651


In [45]:
# Ordenar nuestro Dataframe en funcion al index de manera descendente
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2013-01-01,0.22389,-0.23737,-0.476298,0.814051
2013-01-02,-0.591313,-0.129529,-1.277101,0.644664
2013-01-03,-0.318925,-0.65051,-0.186344,-1.874268
2013-01-04,-0.07202,-0.420607,-2.105282,-0.226807
2013-01-05,-0.983908,0.225346,0.287866,-1.381691
2013-01-06,0.711651,-1.079269,0.586621,-0.058539


In [47]:
# Ordenar los valores tomando en cuenta la columna "B"
df.sort_values(by="B")

Unnamed: 0,A,B,C,D
2013-01-04,-0.226807,-2.105282,-0.420607,-0.07202
2013-01-02,0.644664,-1.277101,-0.129529,-0.591313
2013-01-01,0.814051,-0.476298,-0.23737,0.22389
2013-01-03,-1.874268,-0.186344,-0.65051,-0.318925
2013-01-05,-1.381691,0.287866,0.225346,-0.983908
2013-01-06,-0.058539,0.586621,-1.079269,0.711651
