# Uaktualnianie indeksu

In [62]:
import pandas as pd
from pandas import Series, DataFrame
import numpy as np

In [63]:
obj = pd.Series([4.5,7.2,-5.3,3.6],index=['d','b','a','c'])
obj


d    4.5
b    7.2
a   -5.3
c    3.6
dtype: float64

## reindex - uaktualnianie indeksu
zmiana kolejnosci danych  i przystosowanie do nowego indeksu

In [64]:
obj2 = obj.reindex(['a','b','c','d','e'])
obj2

a   -5.3
b    7.2
c    3.6
d    4.5
e    NaN
dtype: float64

przy szeregach czasowych - moze bedzie trzeba uzyc opcji z metody *ffill*; wypelnia wartości do przodu

In [65]:
obj3 = pd.Series(['blue','purple','yellow'],index=[0,2,4])
obj3

0      blue
2    purple
4    yellow
dtype: object

In [66]:
obj3.reindex(range(6),method='ffill')

0      blue
1      blue
2    purple
3    purple
4    yellow
5    yellow
dtype: object

w przypadku DataFrame - zmiana kolejnosci wierszy indeksu, kolumn lub obu tych elementow

In [67]:
# np.arange - z biblioteki numpy wykorzystanie; wypelnia od 1 do 9 w 3 kolumny i 3 wiersze
frame = pd.DataFrame(np.arange(9).reshape((3,3)),
                      index = ['a','b','c'],
                      columns = ['Ohio','Texas','California']
                      )
frame

Unnamed: 0,Ohio,Texas,California
a,0,1,2
b,3,4,5
c,6,7,8


In [68]:
frame_t = pd.DataFrame(np.arange(16).reshape((4,4)),
                      index = ['a','b','c','d'],
                      columns = ['Ohio','Texas','California','aaa']
                      )
frame_t

Unnamed: 0,Ohio,Texas,California,aaa
a,0,1,2,3
b,4,5,6,7
c,8,9,10,11
d,12,13,14,15


In [69]:
frame2 = frame.reindex(['a','d','b','c'])
frame2

Unnamed: 0,Ohio,Texas,California
a,0.0,1.0,2.0
d,,,
b,3.0,4.0,5.0
c,6.0,7.0,8.0


In [70]:
# uaktualnienie kolumn
states = ['Texas','Utah','California']
frame.reindex(columns=states)

Unnamed: 0,Texas,Utah,California
a,1,,2
b,4,,5
c,7,,8


Another way to update the index, add new labels in the position argument and next point this axe with the use of the axis keyword 

In [71]:
frame.reindex(states, axis="columns")


Unnamed: 0,Texas,Utah,California
a,1,,2
b,4,,5
c,7,,8


In [72]:
# in case of reindex method - it inserts NaN values when adding new indexes

# rejecting axes elements - two methods possible: reindex or loc; method drop - returns a new object 

np.arange
NumPy arange() is one of the array creation routines based on numerical ranges. It creates an instance of ndarray with evenly spaced values and returns the reference to it
![image.png](attachment:image.png)

![image-2.png](attachment:image-2.png)

In [73]:
obj = pd.Series(np.arange(5.),index=(['a','b','c','d','e']))
#obj2 = pd.Series(np.arange(1,10,2),index=(['a','b','c','d','e']))
obj

a    0.0
b    1.0
c    2.0
d    3.0
e    4.0
dtype: float64

In [74]:
dropObjC = obj.drop('c')
dropObjC

a    0.0
b    1.0
d    3.0
e    4.0
dtype: float64

In [75]:
obj.drop(['d','c'])

a    0.0
b    1.0
e    4.0
dtype: float64

we can remove any data i dataframe. additional example

In [76]:
# creating dataframe with np.arange ang reshape, adding index and column names
data = pd.DataFrame(np.arange(16).reshape(4,4),
        index = ['Ohio','Colorado','Utha','New York'],
        columns = ['one','two','three','four']
)
data

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
Utha,8,9,10,11
New York,12,13,14,15


In [77]:
# drop with labels 
data.drop(['Ohio','Colorado'])

Unnamed: 0,one,two,three,four
Utha,8,9,10,11
New York,12,13,14,15


In [78]:
# drop columns
data.drop(columns=['two'])

Unnamed: 0,one,three,four
Ohio,0,2,3
Colorado,4,6,7
Utha,8,10,11
New York,12,14,15


In [79]:
# drop doesn't modify the object 
data

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
Utha,8,9,10,11
New York,12,13,14,15


In [80]:
# other way 
data.drop('two',axis=1) # removing column

Unnamed: 0,one,three,four
Ohio,0,2,3
Colorado,4,6,7
Utha,8,10,11
New York,12,14,15


In [81]:
data.drop(['two','four'], axis='columns')

Unnamed: 0,one,three
Ohio,0,2
Colorado,4,6
Utha,8,10
New York,12,14


## index, select and filter

In [82]:
obj = pd.Series(np.arange(4.),index = ['a','b','c','d'])
obj

a    0.0
b    1.0
c    2.0
d    3.0
dtype: float64

In [83]:
obj['b']

1.0

In [84]:
obj[1]

1.0

In [85]:
obj[2:4]

c    2.0
d    3.0
dtype: float64

In [86]:
obj

a    0.0
b    1.0
c    2.0
d    3.0
dtype: float64

In [87]:
 obj[['a','d','b']] # an error with multiindex, without two square brackets

a    0.0
d    3.0
b    1.0
dtype: float64

In [88]:
obj[[1,3]]

b    1.0
d    3.0
dtype: float64

In [89]:
obj[obj<=2]

a    0.0
b    1.0
c    2.0
dtype: float64

In [90]:
obj[obj<2]

a    0.0
b    1.0
dtype: float64

In [91]:
# this way you can select data by labels, but there is a better way - use loc

In [92]:
obj.loc[['b','a','d']]

b    1.0
a    0.0
d    3.0
dtype: float64

In [93]:
# why loc - it treats integers differently when indexing with square brackets

In [94]:
obj1 = pd.Series([1,2,3],index=[2,0,1])
obj1


2    1
0    2
1    3
dtype: int64

In [95]:
obj2 = pd.Series([1,2,3],index=['a','b','c'])
obj2

a    1
b    2
c    3
dtype: int64

In [96]:
obj1[[0,1,2]]

0    2
1    3
2    1
dtype: int64

In [97]:
obj2[[0,1,2]]

a    1
b    2
c    3
dtype: int64

In [98]:
# if an index doesn't contain integers, then using the expression obj.loc[[0,1,2]] ends a failure

In [99]:
#obj.loc[[0,1,2]]

In [None]:
# 20230709
# Except for loc operator, iloc indexes only with integers. Thanks to that, it works correctly regardless an index contains integers or not. 

In [101]:
obj1.iloc[[0,1,2]]

2    1
0    2
1    3
dtype: int64

In [102]:
obj2.iloc[[0,1,2]]

a    1
b    2
c    3
dtype: int64

In [105]:
obj2.loc['b':'c']

b    2
c    3
dtype: int64

In [107]:
obj2.loc['b':'c'] = 5
obj2

a    1
b    5
c    5
dtype: int64