# Jugando con Pandas (contenido opcional)

Pandas es una libreria que tiene una cantidad enorme de funciones. Aqui solo veremos algunas de las mas utilizadas. Como decimos siempre no hace falta memorizarlas, cada vez que necesitemos hacer algo especifico podemos buscar en internet que funcion y como debemos utilizarla.

Los ejemplos de esta seccion estan basados en la documentacion oficial de [Pandas](https://pandas.pydata.org/docs/user_guide/index.html)

In [1]:
import pandas as pd

## Crear Series y Dataframes

In [7]:
pd.Series([1, 3, 5, np.nan, 6, 8])

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [8]:
pd.DataFrame({'A': [1, 2, 3]})

Unnamed: 0,A
0,1
1,2
2,3


In [9]:
dates = pd.date_range("20130101", periods=6)

In [10]:
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list("ABCD"))

In [11]:
df

Unnamed: 0,A,B,C,D
2013-01-01,-0.454358,0.627664,0.891408,-0.024442
2013-01-02,0.480017,-0.730066,-1.108936,-0.160607
2013-01-03,0.556655,2.128145,-1.685899,1.110209
2013-01-04,-2.744952,0.437,-1.089104,1.414878
2013-01-05,0.107375,0.82371,-0.082173,0.801242
2013-01-06,1.208453,-1.231494,0.663935,-1.936752


In [19]:
df2 = pd.DataFrame(
    {
        "A": 1.0,
        "B": pd.Timestamp("20130102"),
        "C": pd.Series(1, index=list(range(4)), dtype="float32"),
        "D": np.array([3] * 4, dtype="int32"),
        "E": pd.Categorical(["test", "train", "test", "train"]),
        "F": "foo",
    }
)

In [20]:
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


## Inspeccionar elementos

In [12]:
df.head()

Unnamed: 0,A,B,C,D
2013-01-01,-0.454358,0.627664,0.891408,-0.024442
2013-01-02,0.480017,-0.730066,-1.108936,-0.160607
2013-01-03,0.556655,2.128145,-1.685899,1.110209
2013-01-04,-2.744952,0.437,-1.089104,1.414878
2013-01-05,0.107375,0.82371,-0.082173,0.801242


In [13]:
df.tail(3)

Unnamed: 0,A,B,C,D
2013-01-04,-2.744952,0.437,-1.089104,1.414878
2013-01-05,0.107375,0.82371,-0.082173,0.801242
2013-01-06,1.208453,-1.231494,0.663935,-1.936752


In [14]:
df.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [15]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [18]:
df.to_numpy()

array([[-0.45435777,  0.62766444,  0.89140812, -0.02444165],
       [ 0.48001709, -0.73006569, -1.10893557, -0.16060704],
       [ 0.55665546,  2.12814513, -1.68589864,  1.11020861],
       [-2.74495235,  0.43700027, -1.08910355,  1.41487767],
       [ 0.10737544,  0.8237098 , -0.08217279,  0.80124239],
       [ 1.20845332, -1.23149361,  0.66393514, -1.93675208]])

In [21]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,-0.141135,0.342493,-0.401795,0.200755
std,1.388082,1.194838,1.051825,1.21754
min,-2.744952,-1.231494,-1.685899,-1.936752
25%,-0.313924,-0.438299,-1.103978,-0.126566
50%,0.293696,0.532332,-0.585638,0.3884
75%,0.537496,0.774698,0.477408,1.032967
max,1.208453,2.128145,0.891408,1.414878


In [22]:
df.T

Unnamed: 0,2013-01-01,2013-01-02,2013-01-03,2013-01-04,2013-01-05,2013-01-06
A,-0.454358,0.480017,0.556655,-2.744952,0.107375,1.208453
B,0.627664,-0.730066,2.128145,0.437,0.82371,-1.231494
C,0.891408,-1.108936,-1.685899,-1.089104,-0.082173,0.663935
D,-0.024442,-0.160607,1.110209,1.414878,0.801242,-1.936752


In [31]:
#ordenar por nombre de columna
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2013-01-01,-0.024442,0.891408,0.627664,-0.454358
2013-01-02,-0.160607,-1.108936,-0.730066,0.480017
2013-01-03,1.110209,-1.685899,2.128145,0.556655
2013-01-04,1.414878,-1.089104,0.437,-2.744952
2013-01-05,0.801242,-0.082173,0.82371,0.107375
2013-01-06,-1.936752,0.663935,-1.231494,1.208453


In [24]:
#ordenar por valores de columna
df.sort_values(by="B")

Unnamed: 0,A,B,C,D
2013-01-06,1.208453,-1.231494,0.663935,-1.936752
2013-01-02,0.480017,-0.730066,-1.108936,-0.160607
2013-01-04,-2.744952,0.437,-1.089104,1.414878
2013-01-01,-0.454358,0.627664,0.891408,-0.024442
2013-01-05,0.107375,0.82371,-0.082173,0.801242
2013-01-03,0.556655,2.128145,-1.685899,1.110209


## Seleccionar

In [32]:
df["A"]

2013-01-01   -0.454358
2013-01-02    0.480017
2013-01-03    0.556655
2013-01-04   -2.744952
2013-01-05    0.107375
2013-01-06    1.208453
Freq: D, Name: A, dtype: float64

In [33]:
df[0:3]

Unnamed: 0,A,B,C,D
2013-01-01,-0.454358,0.627664,0.891408,-0.024442
2013-01-02,0.480017,-0.730066,-1.108936,-0.160607
2013-01-03,0.556655,2.128145,-1.685899,1.110209


In [34]:
df.loc["20130102":"20130104", ["A", "B"]]

Unnamed: 0,A,B
2013-01-02,0.480017,-0.730066
2013-01-03,0.556655,2.128145
2013-01-04,-2.744952,0.437


In [35]:
df.iloc[3:5, 0:2]

Unnamed: 0,A,B
2013-01-04,-2.744952,0.437
2013-01-05,0.107375,0.82371


In [38]:
df.at[dates[0], "A"]

-0.4543577695083785

In [39]:
df.iat[1, 1]

-0.7300656924567785

In [40]:
df[df["A"] > 0]

Unnamed: 0,A,B,C,D
2013-01-02,0.480017,-0.730066,-1.108936,-0.160607
2013-01-03,0.556655,2.128145,-1.685899,1.110209
2013-01-05,0.107375,0.82371,-0.082173,0.801242
2013-01-06,1.208453,-1.231494,0.663935,-1.936752


In [41]:
df2 = df.copy()
df2["E"] = ["one", "one", "two", "three", "four", "three"]
df2

Unnamed: 0,A,B,C,D,E
2013-01-01,-0.454358,0.627664,0.891408,-0.024442,one
2013-01-02,0.480017,-0.730066,-1.108936,-0.160607,one
2013-01-03,0.556655,2.128145,-1.685899,1.110209,two
2013-01-04,-2.744952,0.437,-1.089104,1.414878,three
2013-01-05,0.107375,0.82371,-0.082173,0.801242,four
2013-01-06,1.208453,-1.231494,0.663935,-1.936752,three


In [42]:
df2[df2["E"].isin(["two", "four"])]

Unnamed: 0,A,B,C,D,E
2013-01-03,0.556655,2.128145,-1.685899,1.110209,two
2013-01-05,0.107375,0.82371,-0.082173,0.801242,four


## Modificar valores

In [50]:
s1 = pd.Series([1, 2, 3, 4, 5, 6], index=pd.date_range("20130102", periods=6))
df["F"] = s1

In [51]:
df.at[dates[0], "A"] = 0

In [52]:
df.iat[0, 1] = 0

In [53]:
df.loc[:, "D"] = np.array([5] * len(df))

In [54]:
df

Unnamed: 0,A,B,C,D,F
2013-01-01,0.0,0.0,0.891408,5,
2013-01-02,0.480017,-0.730066,-1.108936,5,1.0
2013-01-03,0.556655,2.128145,-1.685899,5,2.0
2013-01-04,-2.744952,0.437,-1.089104,5,3.0
2013-01-05,0.107375,0.82371,-0.082173,5,4.0
2013-01-06,1.208453,-1.231494,0.663935,5,5.0


In [55]:
df2 = df.copy()
df2[df2 > 0] = -df2
df2

Unnamed: 0,A,B,C,D,F
2013-01-01,0.0,0.0,-0.891408,-5,
2013-01-02,-0.480017,-0.730066,-1.108936,-5,-1.0
2013-01-03,-0.556655,-2.128145,-1.685899,-5,-2.0
2013-01-04,-2.744952,-0.437,-1.089104,-5,-3.0
2013-01-05,-0.107375,-0.82371,-0.082173,-5,-4.0
2013-01-06,-1.208453,-1.231494,-0.663935,-5,-5.0


## Valores faltantes

In [56]:
df1 = df.reindex(index=dates[0:4], columns=list(df.columns) + ["E"])
df1.loc[dates[0] : dates[1], "E"] = 1
df1

Unnamed: 0,A,B,C,D,F,E
2013-01-01,0.0,0.0,0.891408,5,,1.0
2013-01-02,0.480017,-0.730066,-1.108936,5,1.0,1.0
2013-01-03,0.556655,2.128145,-1.685899,5,2.0,
2013-01-04,-2.744952,0.437,-1.089104,5,3.0,


In [57]:
df1.dropna(how="any")

Unnamed: 0,A,B,C,D,F,E
2013-01-02,0.480017,-0.730066,-1.108936,5,1.0,1.0


In [58]:
df1.fillna(value=5)

Unnamed: 0,A,B,C,D,F,E
2013-01-01,0.0,0.0,0.891408,5,5.0,1.0
2013-01-02,0.480017,-0.730066,-1.108936,5,1.0,1.0
2013-01-03,0.556655,2.128145,-1.685899,5,2.0,5.0
2013-01-04,-2.744952,0.437,-1.089104,5,3.0,5.0


In [59]:
pd.isna(df1)

Unnamed: 0,A,B,C,D,F,E
2013-01-01,False,False,False,False,True,False
2013-01-02,False,False,False,False,False,False
2013-01-03,False,False,False,False,False,True
2013-01-04,False,False,False,False,False,True


## Operaciones

In [60]:
df.mean() #eje 0 por columnas

A   -0.065409
B    0.237883
C   -0.401795
D    5.000000
F    3.000000
dtype: float64

In [61]:
df.mean(1) #eje 1 por filas

2013-01-01    1.472852
2013-01-02    0.928203
2013-01-03    1.599780
2013-01-04    0.920589
2013-01-05    1.969782
2013-01-06    2.128179
Freq: D, dtype: float64

In [64]:
df.apply(np.cumsum, axis=0)

Unnamed: 0,A,B,C,D,F
2013-01-01,0.0,0.0,0.891408,5,
2013-01-02,0.480017,-0.730066,-0.217527,10,1.0
2013-01-03,1.036673,1.398079,-1.903426,15,3.0
2013-01-04,-1.70828,1.83508,-2.99253,20,6.0
2013-01-05,-1.600904,2.65879,-3.074702,25,10.0
2013-01-06,-0.392451,1.427296,-2.410767,30,15.0


In [65]:
df.apply(lambda x: x.max() - x.min())

A    3.953406
B    3.359639
C    2.577307
D    0.000000
F    4.000000
dtype: float64

In [67]:
s = pd.Series(np.random.randint(0, 7, size=10))
print(s)
s.value_counts()

0    6
1    6
2    2
3    3
4    5
5    3
6    6
7    6
8    1
9    1
dtype: int64


6    4
3    2
1    2
2    1
5    1
dtype: int64

In [68]:
s = pd.Series(["A", "B", "C", "Aaba", "Baca", np.nan, "CABA", "dog", "cat"])
s.str.lower()

0       a
1       b
2       c
3    aaba
4    baca
5     NaN
6    caba
7     dog
8     cat
dtype: object

## Combinar dataframes

In [43]:
left = pd.DataFrame({"lkey": ["foo", "bar"], "lval": [1, 2]})
right = pd.DataFrame({"rkey": ["foo", "bar"], "rval": [4, 5]})
print(left)
print(right)

  lkey  lval
0  foo     1
1  bar     2
  rkey  rval
0  foo     4
1  bar     5


In [47]:
new_df = left.join(right)
new_df

Unnamed: 0,lkey,lval,rkey,rval
0,foo,1,foo,4
1,bar,2,bar,5


In [46]:
right

Unnamed: 0,rkey,rval
0,foo,4
1,bar,5


In [55]:
top = pd.DataFrame({"lkey": ["foo", "bar"], "lval": [1, 2]})
down = pd.DataFrame({"rkey": ["foo", "bar"], "rval": [4, 5]})
pd.concat([top, down], axis=0)

Unnamed: 0,lkey,lval,rkey,rval
0,foo,1.0,,
1,bar,2.0,,
0,,,foo,4.0
1,,,bar,5.0


In [56]:
pd.concat([top, down], axis=1)

Unnamed: 0,lkey,lval,rkey,rval
0,foo,1,foo,4
1,bar,2,bar,5


In [58]:
top = pd.DataFrame({"key": ["foo1", "bar1"], "val": [1, 2]})
down = pd.DataFrame({"key": ["foo2", "bar2"], "val": [4, 5]})
pd.concat([top, down], axis=0)

Unnamed: 0,key,val
0,foo1,1
1,bar1,2
0,foo2,4
1,bar2,5


In [59]:
top = pd.DataFrame({"key": ["foo1", "bar1"], "val": [1, 2]})
down = pd.DataFrame({"key": ["foo2", "bar2"], "val": [4, 5]})
pd.concat([top, down], axis=1)

Unnamed: 0,key,val,key.1,val.1
0,foo1,1,foo2,4
1,bar1,2,bar2,5


## Combinar dataframes con una columna en comun

In [32]:
left = pd.DataFrame({"key": ["foo", "bar"], "lval": [1, 2]})
right = pd.DataFrame({"key": ["foo", "bar"], "rval": [4, 5]})
print(left)
print(right)

   key  lval
0  foo     1
1  bar     2
   key  rval
0  foo     4
1  bar     5


In [20]:
pd.merge(left, right, on="key")

Unnamed: 0,key,lval,rval
0,foo,1,4
1,bar,2,5


In [33]:
left = pd.DataFrame({"key": ["foo", "foo"], "lval": [1, 2]})
right = pd.DataFrame({"key": ["bar", "bar"], "rval": [4, 5]})
print(left)
print(right)

   key  lval
0  foo     1
1  foo     2
   key  rval
0  bar     4
1  bar     5


In [34]:
pd.merge(left, right, on="key")

Unnamed: 0,key,lval,rval


## Agrupar

In [36]:
df = pd.DataFrame(
    {
        "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"],
        "B": ["one", "one", "two", "three", "two", "two", "one", "three"],
        "C": np.random.randn(8),
        "D": np.random.randn(8),
    }
)
df

Unnamed: 0,A,B,C,D
0,foo,one,1.205185,-1.19031
1,bar,one,0.612185,-1.596358
2,foo,two,1.112486,0.54859
3,bar,three,-0.654032,-0.806613
4,foo,two,0.174001,0.134662
5,bar,two,-0.899722,0.517005
6,foo,one,1.675746,-0.987536
7,foo,three,0.189015,2.11272


In [38]:
df_group = df.groupby("A")[["C", "D"]]
df_group.sum()

Unnamed: 0_level_0,C,D
A,Unnamed: 1_level_1,Unnamed: 2_level_1
bar,-0.941569,-1.885966
foo,4.356433,0.618125


In [39]:
type(df_group)

pandas.core.groupby.generic.DataFrameGroupBy

In [40]:
type(df_group.sum())

pandas.core.frame.DataFrame

In [26]:
df.groupby("A").mean()

Unnamed: 0_level_0,C,D
A,Unnamed: 1_level_1,Unnamed: 2_level_1
bar,0.478881,-0.178509
foo,0.499821,0.063082


In [31]:
df.groupby("Country")["Rate"].mean().max()

A
bar    0.478881
foo    0.499821
Name: C, dtype: float64

In [30]:
df.groupby("A")["C"].mean().max()

0.49982056340373926

In [14]:
df.groupby(["A", "B"]).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,C,D
A,B,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,1.030485,-2.073704
bar,three,-0.113282,0.349802
bar,two,-0.750178,0.375785
foo,one,0.198018,3.478038
foo,three,1.260804,1.146542
foo,two,-0.359204,-0.79687


## Reformar dataframes

In [78]:
tuples = list(
    zip(
        ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
        ["one", "two", "one", "two", "one", "two", "one", "two"],
    )
)
index = pd.MultiIndex.from_tuples(tuples, names=["first", "second"])
df = pd.DataFrame(np.random.randn(8, 2), index=index, columns=["A", "B"])
df2 = df[:4]
df2

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,-0.480704,1.301521
bar,two,0.732761,-0.919214
baz,one,0.543472,0.778138
baz,two,0.649912,-0.277395


In [79]:
stacked = df2.stack()
stacked

first  second   
bar    one     A   -0.480704
               B    1.301521
       two     A    0.732761
               B   -0.919214
baz    one     A    0.543472
               B    0.778138
       two     A    0.649912
               B   -0.277395
dtype: float64

In [80]:
stacked.unstack()

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,-0.480704,1.301521
bar,two,0.732761,-0.919214
baz,one,0.543472,0.778138
baz,two,0.649912,-0.277395


In [81]:
stacked.unstack(1)

Unnamed: 0_level_0,second,one,two
first,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,A,-0.480704,0.732761
bar,B,1.301521,-0.919214
baz,A,0.543472,0.649912
baz,B,0.778138,-0.277395


In [82]:
stacked.unstack(0)

Unnamed: 0_level_0,first,bar,baz
second,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
one,A,-0.480704,0.543472
one,B,1.301521,0.778138
two,A,0.732761,0.649912
two,B,-0.919214,-0.277395


## Tablas pivot

In [83]:
df = pd.DataFrame(
    {
        "A": ["one", "one", "two", "three"] * 3,
        "B": ["A", "B", "C"] * 4,
        "C": ["foo", "foo", "foo", "bar", "bar", "bar"] * 2,
        "D": np.random.randn(12),
        "E": np.random.randn(12),
    }
)
df

Unnamed: 0,A,B,C,D,E
0,one,A,foo,0.001263,1.82412
1,one,B,foo,-0.217383,-0.101347
2,two,C,foo,-0.452849,0.789881
3,three,A,bar,0.286599,-0.358537
4,one,B,bar,-0.590526,0.200927
5,one,C,bar,-0.998354,1.834945
6,two,A,foo,0.503333,0.59845
7,three,B,foo,1.198225,1.96185
8,one,C,foo,0.927174,0.018226
9,one,A,bar,-0.25404,-0.912979


In [84]:
pd.pivot_table(df, values="D", index=["A", "B"], columns=["C"])

Unnamed: 0_level_0,C,bar,foo
A,B,Unnamed: 2_level_1,Unnamed: 3_level_1
one,A,-0.25404,0.001263
one,B,-0.590526,-0.217383
one,C,-0.998354,0.927174
three,A,0.286599,
three,B,,1.198225
three,C,0.850915,
two,A,,0.503333
two,B,0.854593,
two,C,,-0.452849


## Atributos categoricos

In [97]:
df = pd.DataFrame(
    {"id": [1, 2, 3, 4, 5, 6], "raw_grade": ["a", "b", "b", "a", "a", "e"]}
)
df["grade"] = df["raw_grade"].astype("category")
df["grade"]

0    a
1    b
2    b
3    a
4    a
5    e
Name: grade, dtype: category
Categories (3, object): ['a', 'b', 'e']

In [99]:
new_categories = ["very good", "good", "very bad"]
df["grade"] = df["grade"].cat.rename_categories(new_categories)
df

Unnamed: 0,id,raw_grade,grade
0,1,a,very good
1,2,b,good
2,3,b,good
3,4,a,very good
4,5,a,very good
5,6,e,very bad


In [100]:
df["grade"] = df["grade"].cat.set_categories(
    ["very bad", "bad", "medium", "good", "very good"]
)
df["grade"]

0    very good
1         good
2         good
3    very good
4    very good
5     very bad
Name: grade, dtype: category
Categories (5, object): ['very bad', 'bad', 'medium', 'good', 'very good']

In [101]:
df.sort_values(by="grade") #ordena por la posicion seteada de las categorias 

Unnamed: 0,id,raw_grade,grade
5,6,e,very bad
1,2,b,good
2,3,b,good
0,1,a,very good
3,4,a,very good
4,5,a,very good


In [102]:
df.groupby("grade").size() #groupby muestra las categorias vacias

grade
very bad     1
bad          0
medium       0
good         2
very good    3
dtype: int64

## Leer y escribir en archivos

In [103]:
df.to_csv("foo.csv")

In [104]:
pd.read_csv("foo.csv")

Unnamed: 0.1,Unnamed: 0,id,raw_grade,grade
0,0,1,a,very good
1,1,2,b,good
2,2,3,b,good
3,3,4,a,very good
4,4,5,a,very good
5,5,6,e,very bad


In [None]:
df.to_excel("foo.xlsx", sheet_name="Sheet1")
pd.read_excel("foo.xlsx", "Sheet1", index_col=None, na_values=["NA"])

# Ejercicios

## Crear un Dataframe
Crear un dataframe con dos columnas "A" y "B"
La columna A contiene los siguientes nombres: "Alicia", "Beto", "Camila"
La columna B contiene las respectivas edades: 31,42,35

In [74]:
df = pd.DataFrame({'A': ["Alicia", "Beto", "Camila"], 'B':[31,42,35]})
df

Unnamed: 0,A,B
0,Alicia,31
1,Beto,42
2,Camila,35


## Agregar una columna
Agregar la columna "C" con los siguientes datos 1.7,1.72,1.67

In [75]:
df["C"] = [1.7,1.72,1.67]
df

Unnamed: 0,A,B,C
0,Alicia,31,1.7
1,Beto,42,1.72
2,Camila,35,1.67


## Cambiar el indice
Cambiar el indice de 0,1,2 por "a", "b", "c"

In [76]:
df.index = ["a","b","c"]
df

Unnamed: 0,A,B,C
a,Alicia,31,1.7
b,Beto,42,1.72
c,Camila,35,1.67


## Cambiar el nombre de las columnas
Cambiar el nombre de las columnas A, B, C por "Nombre", "Edad", "Estatura"

In [77]:
df.columns = ["Nombre", "Edad", "Estatura"]
df

Unnamed: 0,Nombre,Edad,Estatura
a,Alicia,31,1.7
b,Beto,42,1.72
c,Camila,35,1.67


## Mostrar la columna "Nombre"
Utilizar la funcion loc

In [78]:
df.loc[:,"Nombre"]

a    Alicia
b      Beto
c    Camila
Name: Nombre, dtype: object

## Mostrar elementos

Mostrar el elemento del indice "b" y columna "Nombre" usando loc

In [79]:
df.loc["b","Nombre"]

'Beto'

Mostrar el elemento de la fila 1 y la columna 0 usando iloc

In [80]:
df.iloc[1,0]

'Beto'

Mostrar la fila 1 usando iloc

In [81]:
df.iloc[1,:]

Nombre      Beto
Edad          42
Estatura    1.72
Name: b, dtype: object

## Ordenar dataframe
Ordenar el dataframe por la altura de menor a mayor

In [82]:
df = df.sort_values(by="Estatura")
df

Unnamed: 0,Nombre,Edad,Estatura
c,Camila,35,1.67
a,Alicia,31,1.7
b,Beto,42,1.72


## Agregar datos al dataframe

In [83]:
df2 = pd.DataFrame({'Nombre': ["Diana", "Ernesto", "Francisco"], 'Edad':[26,24,53], 'Estatura':[1.8,1.76,np.nan]})
df2

Unnamed: 0,Nombre,Edad,Estatura
0,Diana,26,1.8
1,Ernesto,24,1.76
2,Francisco,53,


Agregar los datos del dataframe df2 a df

In [84]:
df = df.append(df2)
df

Unnamed: 0,Nombre,Edad,Estatura
c,Camila,35,1.67
a,Alicia,31,1.7
b,Beto,42,1.72
0,Diana,26,1.8
1,Ernesto,24,1.76
2,Francisco,53,


## Reemplazar valores faltantes
Reemplazar los valores faltantes en la columna Estatura por la media de dicha columna

In [85]:
df = df.fillna(np.round(df.Estatura.mean(),decimals=2))
df

Unnamed: 0,Nombre,Edad,Estatura
c,Camila,35,1.67
a,Alicia,31,1.7
b,Beto,42,1.72
0,Diana,26,1.8
1,Ernesto,24,1.76
2,Francisco,53,1.73


## Mostrar usando condicionales en los indices
Mostar las filas donde la altura es mayor o igual a la media

In [86]:
df[df.Estatura >= np.round(df.Estatura.mean(),decimals=2)]

Unnamed: 0,Nombre,Edad,Estatura
0,Diana,26,1.8
1,Ernesto,24,1.76
2,Francisco,53,1.73


# Fin: [Volver al contenido del curso](https://www.freecodingtour.com/cursos/espanol/datascience/datascience.html)