In [1]:
import numpy as np
import pandas as pd

## Tworzenie ramki danych

In [2]:
ramka1 = pd.DataFrame({"A": np.round(np.random.uniform(0,1,5),2),
                      "B": ["k", None, "k", "m", "k"],
                      "C": [False, True, True,False, False]})

In [3]:
ramka1

Unnamed: 0,A,B,C
0,0.07,k,False
1,0.96,,True
2,0.33,k,True
3,0.6,m,False
4,0.08,k,False


In [4]:
pd.set_option("display.notebook_repr_html", True)

In [5]:
ramka1

Unnamed: 0,A,B,C
0,0.07,k,False
1,0.96,,True
2,0.33,k,True
3,0.6,m,False
4,0.08,k,False


In [6]:
pd.set_option("display.notebook_repr_html", False)

In [7]:
ramka1

      A     B      C
0  0.07     k  False
1  0.96  None   True
2  0.33     k   True
3  0.60     m  False
4  0.08     k  False

In [8]:
pd.set_option("display.notebook_repr_html", True)

## Podstawowe własności

In [9]:
ramka1.shape

(5, 3)

In [10]:
ramka1.size

15

In [11]:
len(ramka1)

5

In [12]:
import seaborn as sns

In [13]:
flights = sns.load_dataset("flights")

In [14]:
iris = sns.load_dataset("iris")

In [18]:
flights.head(n=7)

Unnamed: 0,year,month,passengers
0,1949,January,112
1,1949,February,118
2,1949,March,132
3,1949,April,129
4,1949,May,121
5,1949,June,135
6,1949,July,148


In [20]:
flights.tail(3)

Unnamed: 0,year,month,passengers
141,1960,October,461
142,1960,November,390
143,1960,December,432


In [21]:
ramka1.dtypes

A    float64
B     object
C       bool
dtype: object

In [23]:
flights.dtypes

year             int64
month         category
passengers       int64
dtype: object

In [24]:
ramka1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 3 columns):
A    5 non-null float64
B    4 non-null object
C    5 non-null bool
dtypes: bool(1), float64(1), object(1)
memory usage: 165.0+ bytes


## Wybieranie kolumny, typ kolumnowy (Series)

In [25]:
ramka1["A"]

0    0.07
1    0.96
2    0.33
3    0.60
4    0.08
Name: A, dtype: float64

In [26]:
type(ramka1)

pandas.core.frame.DataFrame

In [27]:
type(ramka1["A"])

pandas.core.series.Series

In [28]:
ramka1["A"].values

array([ 0.07,  0.96,  0.33,  0.6 ,  0.08])

In [29]:
type(ramka1["A"].values)

numpy.ndarray

In [30]:
ramka1["A"]

0    0.07
1    0.96
2    0.33
3    0.60
4    0.08
Name: A, dtype: float64

In [31]:
ramka1.get("A")

0    0.07
1    0.96
2    0.33
3    0.60
4    0.08
Name: A, dtype: float64

In [32]:
ramka1.A #columns

0    0.07
1    0.96
2    0.33
3    0.60
4    0.08
Name: A, dtype: float64

In [33]:
ramka1.columns

Index(['A', 'B', 'C'], dtype='object')

In [34]:
pd.DataFrame(ramka1.A)

Unnamed: 0,A
0,0.07
1,0.96
2,0.33
3,0.6
4,0.08


In [35]:
pd.Series(np.r_[np.nan,0:1:4j])

0         NaN
1    0.000000
2    0.333333
3    0.666667
4    1.000000
dtype: float64

In [38]:
pd.Series(np.r_[np.nan,0:12:4])

0    NaN
1    0.0
2    4.0
3    8.0
dtype: float64

## Zmienne czasowe

In [37]:
pd.Series(pd.date_range("2018-04-01", periods=3))

0   2018-04-01
1   2018-04-02
2   2018-04-03
dtype: datetime64[ns]

In [39]:
pd.Series(pd.date_range("2018-04-01", periods=3, freq="12H"))

0   2018-04-01 00:00:00
1   2018-04-01 12:00:00
2   2018-04-02 00:00:00
dtype: datetime64[ns]

## Typ kategoryczny

In [40]:
flights.month.dtype

category

In [41]:
ramka1

Unnamed: 0,A,B,C
0,0.07,k,False
1,0.96,,True
2,0.33,k,True
3,0.6,m,False
4,0.08,k,False


In [44]:
ramka1.dtypes

A    float64
B     object
C       bool
dtype: object

In [46]:
ramka1.B

0       k
1    None
2       k
3       m
4       k
Name: B, dtype: object

In [45]:
pd.Series(ramka1.B, dtype="category")

0      k
1    NaN
2      k
3      m
4      k
Name: B, dtype: category
Categories (2, object): [k, m]

In [47]:
ramka1.B.astype("category")

0      k
1    NaN
2      k
3      m
4      k
Name: B, dtype: category
Categories (2, object): [k, m]

In [48]:
ramka1.B = ramka1.B.astype("category")

In [49]:
ramka1.dtypes

A     float64
B    category
C        bool
dtype: object

In [51]:
pd.Series(pd.Categorical(["a", "b", "c", "a"], categories=["a", "b", "c"]))

0    a
1    b
2    c
3    a
dtype: category
Categories (3, object): [a, b, c]

In [59]:
innaKategoryczna = pd.Series(pd.Categorical(["a", "b", "c", "a"], categories=["a", "b", "c"], ordered=True))

In [53]:
ramka1.A.values

array([ 0.07,  0.96,  0.33,  0.6 ,  0.08])

In [54]:
pd.cut(ramka1.A, np.r_[0, 0.25, 0.4, 0.7,1])

0      (0, 0.25]
1       (0.7, 1]
2    (0.25, 0.4]
3     (0.4, 0.7]
4      (0, 0.25]
Name: A, dtype: category
Categories (4, object): [(0, 0.25] < (0.25, 0.4] < (0.4, 0.7] < (0.7, 1]]

In [55]:
pd.cut(ramka1.A, np.array([0, 0.25, 0.4, 0.7,1]))

0      (0, 0.25]
1       (0.7, 1]
2    (0.25, 0.4]
3     (0.4, 0.7]
4      (0, 0.25]
Name: A, dtype: category
Categories (4, object): [(0, 0.25] < (0.25, 0.4] < (0.4, 0.7] < (0.7, 1]]

In [56]:
pd.cut(ramka1.A, np.r_[0, 0.25, 0.4, 0.7,1], right=False, labels=["etykieta1", "etykieta2", "etykieta3", "etykieta4"])

0    etykieta1
1    etykieta4
2    etykieta2
3    etykieta3
4    etykieta1
Name: A, dtype: category
Categories (4, object): [etykieta1 < etykieta2 < etykieta3 < etykieta4]

In [57]:
pd.cut(ramka1.A, np.r_[0, 0.25, 0.4, 0.7,1], right=False)

0      [0, 0.25)
1       [0.7, 1)
2    [0.25, 0.4)
3     [0.4, 0.7)
4      [0, 0.25)
Name: A, dtype: category
Categories (4, object): [[0, 0.25) < [0.25, 0.4) < [0.4, 0.7) < [0.7, 1)]

In [61]:
innaKategoryczna.cat.codes

0    0
1    1
2    2
3    0
dtype: int8

In [62]:
innaKategoryczna.cat.categories

Index(['a', 'b', 'c'], dtype='object')

In [63]:
innaKategoryczna.cat.categories = ["maly", "sredni", "duzy"]

In [64]:
innaKategoryczna

0      maly
1    sredni
2      duzy
3      maly
dtype: category
Categories (3, object): [maly < sredni < duzy]

In [65]:
innaKategoryczna[0] = "sredni"

In [66]:
innaKategoryczna

0    sredni
1    sredni
2      duzy
3      maly
dtype: category
Categories (3, object): [maly < sredni < duzy]

In [67]:
innaKategoryczna[0] = "ogromny"

ValueError: Cannot setitem on a Categorical with a new category, set the categories first

In [68]:
innaKategoryczna = innaKategoryczna.cat.add_categories("ogromny")

In [69]:
innaKategoryczna

0    sredni
1    sredni
2      duzy
3      maly
dtype: category
Categories (4, object): [maly < sredni < duzy < ogromny]

In [70]:
innaKategoryczna[0] = "ogromny"

In [71]:
innaKategoryczna

0    ogromny
1     sredni
2       duzy
3       maly
dtype: category
Categories (4, object): [maly < sredni < duzy < ogromny]

In [73]:
innaKategoryczna[innaKategoryczna == "duzy"] = "ogromny"

In [74]:
innaKategoryczna

0    ogromny
1     sredni
2    ogromny
3       maly
dtype: category
Categories (4, object): [maly < sredni < duzy < ogromny]

In [75]:
innaKategoryczna = innaKategoryczna.cat.reorder_categories(["maly","sredni","ogromny","duzy"], ordered=True)

In [76]:
innaKategoryczna

0    ogromny
1     sredni
2    ogromny
3       maly
dtype: category
Categories (4, object): [maly < sredni < ogromny < duzy]

In [77]:
innaKategoryczna.sort_values()

3       maly
1     sredni
0    ogromny
2    ogromny
dtype: category
Categories (4, object): [maly < sredni < ogromny < duzy]

## Indeksy

### Wybieranie indeksu, tworzenie indeksu

In [78]:
ramka1.index

RangeIndex(start=0, stop=5, step=1)

In [79]:
ramka1.columns

Index(['A', 'B', 'C'], dtype='object')

In [80]:
ramka1.A.index

RangeIndex(start=0, stop=5, step=1)

In [81]:
ramka1.columns = ["a", "b", "c"]

In [82]:
ramka1

Unnamed: 0,a,b,c
0,0.07,k,False
1,0.96,,True
2,0.33,k,True
3,0.6,m,False
4,0.08,k,False


In [83]:
ramka2 = ramka1.set_index(np.linspace(0,1,5))

In [84]:
ramka2

Unnamed: 0,a,b,c
0.0,0.07,k,False
0.25,0.96,,True
0.5,0.33,k,True
0.75,0.6,m,False
1.0,0.08,k,False


In [85]:
ramka2.index

Float64Index([0.0, 0.25, 0.5, 0.75, 1.0], dtype='float64')

In [86]:
ramka2.index = np.r_[7:12]

In [87]:
ramka2

Unnamed: 0,a,b,c
7,0.07,k,False
8,0.96,,True
9,0.33,k,True
10,0.6,m,False
11,0.08,k,False


In [89]:
ramka1.index.name="Wiersze"
ramka1.columns.name="Kolumny"


In [90]:
ramka1

Kolumny,a,b,c
Wiersze,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.07,k,False
1,0.96,,True
2,0.33,k,True
3,0.6,m,False
4,0.08,k,False


In [93]:
ramka2 = pd.DataFrame({"A": np.round(np.random.uniform(0,1,6),2)})
ramka2

Unnamed: 0,A
0,0.68
1,0.9
2,0.94
3,0.2
4,0.88
5,0.15


In [94]:
ramka2.index = pd.MultiIndex(levels= [["x","y","z"], [1,2,3,4]],
                            labels=[[0,0,0,1,1,2], [0,1,3,0,1,2]],
                            names=["i1","i2"])

In [95]:
ramka2

Unnamed: 0_level_0,Unnamed: 1_level_0,A
i1,i2,Unnamed: 2_level_1
x,1,0.68
x,2,0.9
x,4,0.94
y,1,0.2
y,2,0.88
z,3,0.15


### Wybór wierszy po indeksie i miejscu porządkowym

In [96]:
b = pd.Series(np.round(np.random.uniform(0,1,10),2))
i = np.r_[0:10]
np.random.shuffle(i)
b.index = i
b

2    0.55
6    0.34
4    0.62
7    0.16
8    0.12
3    1.00
1    0.06
5    0.32
0    0.49
9    0.77
dtype: float64

In [97]:
c = b.copy()
c.index=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"]
c

a    0.55
b    0.34
c    0.62
d    0.16
e    0.12
f    1.00
g    0.06
h    0.32
i    0.49
j    0.77
dtype: float64

In [98]:
b[0:1]

2    0.55
dtype: float64

In [99]:
b[0:3]

2    0.55
6    0.34
4    0.62
dtype: float64

In [100]:
b[b.values > 0.7]

3    1.00
9    0.77
dtype: float64

In [101]:
b[b>0.7]

3    1.00
9    0.77
dtype: float64

In [102]:
b[0]

0.48999999999999999

In [103]:
c[0]

0.55000000000000004

In [104]:
b[ [0,1] ]

0    0.49
1    0.06
dtype: float64

In [105]:
c[ [0,1] ]

a    0.55
b    0.34
dtype: float64

In [106]:
b.iloc[0]

0.55000000000000004

In [107]:
c.iloc[0]

0.55000000000000004

In [108]:
b.loc[0]

0.48999999999999999

In [109]:
c.loc[0]

TypeError: cannot do label indexing on <class 'pandas.indexes.base.Index'> with these indexers [0] of <class 'int'>

In [111]:
b

2    0.55
6    0.34
4    0.62
7    0.16
8    0.12
3    1.00
1    0.06
5    0.32
0    0.49
9    0.77
dtype: float64

In [110]:
b.loc[6:0]

6    0.34
4    0.62
7    0.16
8    0.12
3    1.00
1    0.06
5    0.32
0    0.49
dtype: float64

In [112]:
b.loc[0:6]

Series([], dtype: float64)

In [113]:
c.loc["a":"c"]

a    0.55
b    0.34
c    0.62
dtype: float64

In [114]:
d = c.copy()
d

a    0.55
b    0.34
c    0.62
d    0.16
e    0.12
f    1.00
g    0.06
h    0.32
i    0.49
j    0.77
dtype: float64

In [115]:
d.index = pd.date_range("2018-04-01", periods=10)

In [116]:
d

2018-04-01    0.55
2018-04-02    0.34
2018-04-03    0.62
2018-04-04    0.16
2018-04-05    0.12
2018-04-06    1.00
2018-04-07    0.06
2018-04-08    0.32
2018-04-09    0.49
2018-04-10    0.77
Freq: D, dtype: float64

In [117]:
d.loc["2018-04-05"]

0.12

In [118]:
d.loc["2018-04-05":"2018-04-08"]

2018-04-05    0.12
2018-04-06    1.00
2018-04-07    0.06
2018-04-08    0.32
Freq: D, dtype: float64

In [119]:
ramka2

Unnamed: 0_level_0,Unnamed: 1_level_0,A
i1,i2,Unnamed: 2_level_1
x,1,0.68
x,2,0.9
x,4,0.94
y,1,0.2
y,2,0.88
z,3,0.15


In [125]:
ramka2.A.loc["x"]

i2
1    0.68
2    0.90
4    0.94
Name: A, dtype: float64

In [122]:
ramka2["A"].loc["x",1]

0.68000000000000005

In [131]:
ramka2["A"].loc["x",4]

0.93999999999999995

In [129]:
ramkaX = ramka2.A.loc["x"]
ramkaX.iloc[-1]

0.93999999999999995

In [138]:
ramka2.A.loc["x"].iloc[-1]

0.93999999999999995

In [133]:
ramka2

Unnamed: 0_level_0,Unnamed: 1_level_0,A
i1,i2,Unnamed: 2_level_1
x,1,0.68
x,2,0.9
x,4,0.94
y,1,0.2
y,2,0.88
z,3,0.15


In [132]:
ramka2.A.loc[:,1]

i1
x    0.68
y    0.20
Name: A, dtype: float64

In [136]:
ramka2.A.loc[["x","z"]]

i1  i2
x   1     0.68
    2     0.90
    4     0.94
z   3     0.15
Name: A, dtype: float64

In [137]:
ramka2.A.loc[["x","z"],:]

i1  i2
x   1     0.68
    2     0.90
    4     0.94
z   3     0.15
Name: A, dtype: float64

In [135]:
ramka2.A.loc[["x","z"],[1,3]]

i1  i2
x   1     0.68
z   3     0.15
Name: A, dtype: float64

## Wybór losowych wierszy

In [142]:
ramka2.sample(n=2)

Unnamed: 0_level_0,Unnamed: 1_level_0,A
i1,i2,Unnamed: 2_level_1
x,1,0.68
y,1,0.2


In [144]:
ramka2.sample(frac=0.6)

Unnamed: 0_level_0,Unnamed: 1_level_0,A
i1,i2,Unnamed: 2_level_1
y,2,0.88
x,2,0.9
y,1,0.2
x,1,0.68


## Dodawanie/usuwanie kolumn

In [147]:
ramka2["B"] = 2*ramka2.A+1
ramka2

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
i1,i2,Unnamed: 2_level_1,Unnamed: 3_level_1
x,1,0.68,2.36
x,2,0.9,2.8
x,4,0.94,2.88
y,1,0.2,1.4
y,2,0.88,2.76
z,3,0.15,1.3


In [152]:
ramka2 = ramka2.drop("B", axis=1)

In [153]:
ramka2

Unnamed: 0_level_0,Unnamed: 1_level_0,A
i1,i2,Unnamed: 2_level_1
x,1,0.68
x,2,0.9
x,4,0.94
y,1,0.2
y,2,0.88
z,3,0.15


In [154]:
ramka1

Kolumny,a,b,c
Wiersze,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.07,k,False
1,0.96,,True
2,0.33,k,True
3,0.6,m,False
4,0.08,k,False


In [155]:
ramka1.loc[8] = [0.77, "m", True]
ramka1

Kolumny,a,b,c
Wiersze,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.07,k,False
1,0.96,,True
2,0.33,k,True
3,0.6,m,False
4,0.08,k,False
8,0.77,m,True


In [160]:
ramka1 = ramka1.drop(8) # , axis=0
ramka1

Kolumny,a,b,c
Wiersze,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.07,k,False
1,0.96,,True
2,0.33,k,True
3,0.6,m,False
4,0.08,k,False


## Zwektoryzowane operacje na kolumnie

In [161]:
2*ramka1.a

Wiersze
0    0.14
1    1.92
2    0.66
3    1.20
4    0.16
Name: a, dtype: float64

In [162]:
np.exp(ramka1.a)

Wiersze
0    1.072508
1    2.611696
2    1.390968
3    1.822119
4    1.083287
Name: a, dtype: float64

## Statystyki, przekształcenia na ramce danych

In [163]:
tips = sns.load_dataset("tips")
tips.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [164]:
tips.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 244 entries, 0 to 243
Data columns (total 7 columns):
total_bill    244 non-null float64
tip           244 non-null float64
sex           244 non-null category
smoker        244 non-null category
day           244 non-null category
time          244 non-null category
size          244 non-null int64
dtypes: category(4), float64(2), int64(1)
memory usage: 6.8 KB


In [165]:
tips.describe()

Unnamed: 0,total_bill,tip,size
count,244.0,244.0,244.0
mean,19.785943,2.998279,2.569672
std,8.902412,1.383638,0.9511
min,3.07,1.0,1.0
25%,13.3475,2.0,2.0
50%,17.795,2.9,2.0
75%,24.1275,3.5625,3.0
max,50.81,10.0,6.0


In [166]:
tips.describe(include="all")

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
count,244.0,244.0,244,244,244,244,244.0
unique,,,2,2,4,2,
top,,,Male,No,Sat,Dinner,
freq,,,157,151,87,176,
mean,19.785943,2.998279,,,,,2.569672
std,8.902412,1.383638,,,,,0.9511
min,3.07,1.0,,,,,1.0
25%,13.3475,2.0,,,,,2.0
50%,17.795,2.9,,,,,2.0
75%,24.1275,3.5625,,,,,3.0


In [167]:
tips.sex.describe()

count      244
unique       2
top       Male
freq       157
Name: sex, dtype: object

In [168]:
tips.mean()

total_bill    19.785943
tip            2.998279
size           2.569672
dtype: float64

In [174]:
tips["size"].mean()

2.569672131147541

In [175]:
pd.pivot_table(tips[["smoker", "total_bill", "tip"]], index = ["smoker"], aggfunc="mean")

Unnamed: 0_level_0,tip,total_bill
smoker,Unnamed: 1_level_1,Unnamed: 2_level_1
Yes,3.00871,20.756344
No,2.991854,19.188278


In [179]:
list(flights.groupby("year"))

[(1949,     year      month  passengers
  0   1949    January         112
  1   1949   February         118
  2   1949      March         132
  3   1949      April         129
  4   1949        May         121
  5   1949       June         135
  6   1949       July         148
  7   1949     August         148
  8   1949  September         136
  9   1949    October         119
  10  1949   November         104
  11  1949   December         118), (1950,     year      month  passengers
  12  1950    January         115
  13  1950   February         126
  14  1950      March         141
  15  1950      April         135
  16  1950        May         125
  17  1950       June         149
  18  1950       July         170
  19  1950     August         170
  20  1950  September         158
  21  1950    October         133
  22  1950   November         114
  23  1950   December         140), (1951,     year      month  passengers
  24  1951    January         145
  25  1951   February       

In [181]:
flights.groupby("year").mean().head()

Unnamed: 0_level_0,passengers
year,Unnamed: 1_level_1
1949,126.666667
1950,139.666667
1951,170.166667
1952,197.0
1953,225.0


In [182]:
tips.groupby(["smoker","sex"]).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,total_bill,tip,size
smoker,sex,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Yes,Male,22.2845,3.051167,2.5
Yes,Female,17.977879,2.931515,2.242424
No,Male,19.791237,3.113402,2.71134
No,Female,18.105185,2.773519,2.592593


In [183]:
tips.smoker.value_counts()

No     151
Yes     93
Name: smoker, dtype: int64

In [184]:
pd.crosstab(tips.smoker, tips.sex)

sex,Male,Female
smoker,Unnamed: 1_level_1,Unnamed: 2_level_1
Yes,60,33
No,97,54


In [185]:
pd.crosstab(tips.smoker, [tips.sex, tips.day])

sex,Male,Male,Male,Male,Female,Female,Female,Female
day,Thur,Fri,Sat,Sun,Thur,Fri,Sat,Sun
smoker,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
Yes,10,8,27,15,7,7,15,4
No,20,2,32,43,25,2,13,14


In [189]:
pd.crosstab(tips.smoker, [tips.sex, tips.day], values=tips.total_bill, aggfunc="mean")

sex,Male,Male,Male,Male,Female,Female,Female,Female
day,Thur,Fri,Sat,Sun,Thur,Fri,Sat,Sun
smoker,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
Yes,19.171,20.4525,21.837778,26.141333,19.218571,12.654286,20.266667,16.54
No,18.4865,17.475,19.929063,20.403256,16.0144,19.365,19.003846,20.824286


In [186]:
tips.sort_values(by="tip")

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
67,3.07,1.00,Female,Yes,Sat,Dinner,1
236,12.60,1.00,Male,Yes,Sat,Dinner,2
92,5.75,1.00,Female,Yes,Fri,Dinner,2
111,7.25,1.00,Female,No,Sat,Dinner,1
0,16.99,1.01,Female,No,Sun,Dinner,2
215,12.90,1.10,Female,Yes,Sat,Dinner,2
237,32.83,1.17,Male,Yes,Sat,Dinner,2
235,10.07,1.25,Male,No,Sat,Dinner,2
75,10.51,1.25,Male,No,Sat,Dinner,2
135,8.51,1.25,Female,No,Thur,Lunch,2


In [192]:
tips.day.head()

0    Sun
1    Sun
2    Sun
3    Sun
4    Sun
Name: day, dtype: category
Categories (4, object): [Thur, Fri, Sat, Sun]

In [190]:
tips.sort_values(by=["size","day"], ascending=[True, False])

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
67,3.07,1.00,Female,Yes,Sat,Dinner,1
111,7.25,1.00,Female,No,Sat,Dinner,1
222,8.58,1.92,Male,Yes,Fri,Lunch,1
82,10.07,1.83,Female,No,Thur,Lunch,1
0,16.99,1.01,Female,No,Sun,Dinner,2
3,23.68,3.31,Male,No,Sun,Dinner,2
6,8.77,2.00,Male,No,Sun,Dinner,2
8,15.04,1.96,Male,No,Sun,Dinner,2
9,14.78,3.23,Male,No,Sun,Dinner,2
10,10.27,1.71,Male,No,Sun,Dinner,2


In [193]:
tips.sort_values(by=["day","size"], ascending=[False, True])

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
3,23.68,3.31,Male,No,Sun,Dinner,2
6,8.77,2.00,Male,No,Sun,Dinner,2
8,15.04,1.96,Male,No,Sun,Dinner,2
9,14.78,3.23,Male,No,Sun,Dinner,2
10,10.27,1.71,Male,No,Sun,Dinner,2
12,15.42,1.57,Male,No,Sun,Dinner,2
14,14.83,3.02,Female,No,Sun,Dinner,2
15,21.58,3.92,Male,No,Sun,Dinner,2
41,17.46,2.54,Male,No,Sun,Dinner,2


In [194]:
tips.T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,234,235,236,237,238,239,240,241,242,243
total_bill,16.99,10.34,21.01,23.68,24.59,25.29,8.77,26.88,15.04,14.78,...,15.53,10.07,12.6,32.83,35.83,29.03,27.18,22.67,17.82,18.78
tip,1.01,1.66,3.5,3.31,3.61,4.71,2,3.12,1.96,3.23,...,3,1.25,1,1.17,4.67,5.92,2,2,1.75,3
sex,Female,Male,Male,Male,Female,Male,Male,Male,Male,Male,...,Male,Male,Male,Male,Female,Male,Female,Male,Male,Female
smoker,No,No,No,No,No,No,No,No,No,No,...,Yes,No,Yes,Yes,No,No,Yes,Yes,No,No
day,Sun,Sun,Sun,Sun,Sun,Sun,Sun,Sun,Sun,Sun,...,Sat,Sat,Sat,Sat,Sat,Sat,Sat,Sat,Sat,Thur
time,Dinner,Dinner,Dinner,Dinner,Dinner,Dinner,Dinner,Dinner,Dinner,Dinner,...,Dinner,Dinner,Dinner,Dinner,Dinner,Dinner,Dinner,Dinner,Dinner,Dinner
size,2,3,3,2,4,4,2,4,2,2,...,2,2,2,2,3,3,2,2,2,2


In [196]:
tips.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
total_bill,244.0,19.785943,8.902412,3.07,13.3475,17.795,24.1275,50.81
tip,244.0,2.998279,1.383638,1.0,2.0,2.9,3.5625,10.0
size,244.0,2.569672,0.9511,1.0,2.0,2.0,3.0,6.0


In [197]:
tips.stack()

0    total_bill     16.99
     tip             1.01
     sex           Female
     smoker            No
     day              Sun
     time          Dinner
     size               2
1    total_bill     10.34
     tip             1.66
     sex             Male
     smoker            No
     day              Sun
     time          Dinner
     size               3
2    total_bill     21.01
     tip              3.5
     sex             Male
     smoker            No
     day              Sun
     time          Dinner
     size               3
3    total_bill     23.68
     tip             3.31
     sex             Male
     smoker            No
     day              Sun
     time          Dinner
     size               2
4    total_bill     24.59
     tip             3.61
                    ...  
239  time          Dinner
     size               3
240  total_bill     27.18
     tip                2
     sex           Female
     smoker           Yes
     day              Sat
     time   

In [198]:
tips.stack().unstack()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4
5,25.29,4.71,Male,No,Sun,Dinner,4
6,8.77,2,Male,No,Sun,Dinner,2
7,26.88,3.12,Male,No,Sun,Dinner,4
8,15.04,1.96,Male,No,Sun,Dinner,2
9,14.78,3.23,Male,No,Sun,Dinner,2


In [200]:
flights.head()

Unnamed: 0,year,month,passengers
0,1949,January,112
1,1949,February,118
2,1949,March,132
3,1949,April,129
4,1949,May,121


In [199]:
flights.pivot("month", "year", "passengers")

year,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
January,112,115,145,171,196,204,242,284,315,340,360,417
February,118,126,150,180,196,188,233,277,301,318,342,391
March,132,141,178,193,236,235,267,317,356,362,406,419
April,129,135,163,181,235,227,269,313,348,348,396,461
May,121,125,172,183,229,234,270,318,355,363,420,472
June,135,149,178,218,243,264,315,374,422,435,472,535
July,148,170,199,230,264,302,364,413,465,491,548,622
August,148,170,199,242,272,293,347,405,467,505,559,606
September,136,158,184,209,237,259,312,355,404,404,463,508
October,119,133,162,191,211,229,274,306,347,359,407,461


In [202]:
ramka1.b.isnull()

Wiersze
0    False
1     True
2    False
3    False
4    False
Name: b, dtype: bool