# Pandas

# Importing pandas

```sh
$ pip install pandas
```

In [2]:
import pandas as pd

# Basic objects in pandas

# Series

In [3]:
import numpy as np

In [4]:
data = np.array(["a", "b", "c", "d", "e"])
s = pd.Series(data)

print(type(s))
print(s)

<class 'pandas.core.series.Series'>
0    a
1    b
2    c
3    d
4    e
dtype: object


In [5]:
# Index lahko explicitno doloičimo
data = np.array(["a", "b", "c", "d", "e"])
s = pd.Series(data, index=[100, 101, 102, 103, 104])
print(s)

100    a
101    b
102    c
103    d
104    e
dtype: object


In [6]:
data = np.array(["a", "b", "c", "d", "e"])
s = pd.Series(data, index=[100, 101, 102, 103, 104, 105, 106, 107, 108])
print(s)

ValueError: Length of values (5) does not match length of index (9)

In [7]:
# Creating series with dictionary
s = pd.Series({1:"a", 2:"b", 3:"c", 4:"d", 5:"e"})
print(type(s))
print(s)

<class 'pandas.core.series.Series'>
1    a
2    b
3    c
4    d
5    e
dtype: object


In [8]:
s = pd.Series({1:"a", 2:"b", 3:"c", 4:"d", 5:"e"}, index=[1,3,5,7,9])
print(type(s))
print(s)

<class 'pandas.core.series.Series'>
1      a
3      c
5      e
7    NaN
9    NaN
dtype: object


In [9]:
# Series of same values
s = pd.Series("a", index=[1,2,3,4,6])
print(s)
print(type(s))

1    a
2    a
3    a
4    a
6    a
dtype: object
<class 'pandas.core.series.Series'>


---

In [10]:
s = pd.Series([1,2,3,4,5], index=["a", "b", "c", "d", "e"])
print(type(s))
print(s)

<class 'pandas.core.series.Series'>
a    1
b    2
c    3
d    4
e    5
dtype: int64


In [12]:
print(s.index)
print(s.values)

Index(['a', 'b', 'c', 'd', 'e'], dtype='object')
[1 2 3 4 5]


In [13]:
print(type(s.values))

<class 'numpy.ndarray'>


## Dataframe

![dataframe](images/df_exploded_resized.svg)

In [14]:
# Creating a DataFrame from series
data = {
    "one": pd.Series([10, 20, 30]),
    "two": pd.Series([40, 50, 50])
}

df = pd.DataFrame(data)
print(type(df))
print(df)

<class 'pandas.core.frame.DataFrame'>
   one  two
0   10   40
1   20   50
2   30   50


In [15]:
data = {
    "one": pd.Series([10, 20, 30], index=["a", "b", "c"]),
    "two": pd.Series(["x", "y", "z"], index=["a", "b", "d"])
}
df = pd.DataFrame(data)
print(type(df))
print(df)

<class 'pandas.core.frame.DataFrame'>
    one  two
a  10.0    x
b  20.0    y
c  30.0  NaN
d   NaN    z


In [16]:
# Defining DataFrame with python list
df = pd.DataFrame([1,2,3,4])
print(type(df))
print(df)

<class 'pandas.core.frame.DataFrame'>
   0
0  1
1  2
2  3
3  4


In [17]:
# Defining DataFrame with list of lists
data = [["Alex", 10],
       ["Bob", 12],
       ["Claire", 14]]
df = pd.DataFrame(data)
print(type(df))
print(df)

<class 'pandas.core.frame.DataFrame'>
        0   1
0    Alex  10
1     Bob  12
2  Claire  14


In [18]:
# Sapecifying column names
data = [["Alex", 10],
       ["Bob", 12],
       ["Claire", 14]]
df = pd.DataFrame(data, columns=["Name", "Age"])
print(type(df))
print(df)

<class 'pandas.core.frame.DataFrame'>
     Name  Age
0    Alex   10
1     Bob   12
2  Claire   14


---

# Importing real case data

Stolpce katere bomo uporabljali so:
* `company` - ime podjetja
* `rank` - kje na lestvici top 500 se podjetje nahaja
* `revenues` - koliko prihodka je imelo podjetje, v miljon dolarjih
* `revenue_change` - kolikšna je bila sprememba dobička med zdajšnjim in prejšnjim letom, izraženo v procentih (%)
* `profits` - kolikšen je bil profit podjetja, izražen v miljon dolarjih
* `ceo` - kdo je glavni direktor podjetja
* `industry` - v kateri industriji podjetje deluje
* `sector` - v katerem sektorju podjetje deluje
* `previous_rank` - lanskoletni Global 500 rank podjetja
* `country` - država v kateri se nahajajo glavne pisarne podjetja


In [19]:
import pandas as pd

f500 = pd.read_csv("data/f500.csv")
print(type(f500))
print(f500.shape)

<class 'pandas.core.frame.DataFrame'>
(500, 17)


[DataFrame.head()](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.head.html)

In [24]:
f500.head()

Unnamed: 0,company,rank,revenues,revenue_change,profits,assets,profit_change,ceo,industry,sector,previous_rank,country,hq_location,website,years_on_global_500_list,employees,total_stockholder_equity
0,Walmart,1,485873,0.8,13643.0,198825,-7.2,C. Douglas McMillon,General Merchandisers,Retailing,1,USA,"Bentonville, AR",http://www.walmart.com,23,2300000,77798
1,State Grid,2,315199,-4.4,9571.3,489838,-6.2,Kou Wei,Utilities,Energy,2,China,"Beijing, China",http://www.sgcc.com.cn,17,926067,209456
2,Sinopec Group,3,267518,-9.1,1257.9,310726,-65.0,Wang Yupu,Petroleum Refining,Energy,4,China,"Beijing, China",http://www.sinopec.com,19,713288,106523
3,China National Petroleum,4,262573,-12.3,1867.5,585619,-73.7,Zhang Jianhua,Petroleum Refining,Energy,3,China,"Beijing, China",http://www.cnpc.com.cn,17,1512048,301893
4,Toyota Motor,5,254694,7.7,16899.3,437575,-12.3,Akio Toyoda,Motor Vehicles and Parts,Motor Vehicles & Parts,8,Japan,"Toyota, Japan",http://www.toyota-global.com,23,364445,157210


In [29]:
f500.tail(10)

Unnamed: 0,company,rank,revenues,revenue_change,profits,assets,profit_change,ceo,industry,sector,previous_rank,country,hq_location,website,years_on_global_500_list,employees,total_stockholder_equity
490,National Grid,491,22036,-3.2,10150.6,82310,160.2,John Pettigrew,Utilities,Energy,471,Britain,"London, Britain",http://www.nationalgrid.com,12,22132,25463
491,Dollar General,492,21987,7.9,1251.1,11672,7.4,Todd J. Vasos,Specialty Retailers,Retailing,0,USA,"Goodlettsville, TN",http://www.dollargeneral.com,1,121000,5406
492,Telecom Italia,493,21941,-17.4,1999.4,74295,,Flavio Cattaneo,Telecommunications,Telecommunications,404,Italy,"Milan, Italy",http://www.telecomitalia.com,18,61227,22366
493,Xiamen ITG Holding Group,494,21930,34.3,35.6,12161,-25.1,Xu Xiaoxi,Trading,Wholesalers,0,China,"Xiamen, China",http://www.itgholding.com.cn,1,18454,1066
494,Xinjiang Guanghui Industry Investment,495,21919,31.1,251.8,31957,49.9,Shang Jiqiang,Trading,Wholesalers,0,China,"Urumqi, China",http://www.guanghui.com,1,65616,4563
495,Teva Pharmaceutical Industries,496,21903,11.5,329.0,92890,-79.3,Yitzhak Peterburg,Pharmaceuticals,Health Care,0,Israel,"Petach Tikva, Israel",http://www.tevapharm.com,1,56960,33337
496,New China Life Insurance,497,21796,-13.3,743.9,100609,-45.6,Wan Feng,"Insurance: Life, Health (stock)",Financials,427,China,"Beijing, China",http://www.newchinalife.com,2,54378,8507
497,Wm. Morrison Supermarkets,498,21741,-11.3,406.4,11630,20.4,David T. Potts,Food and Drug Stores,Food & Drug Stores,437,Britain,"Bradford, Britain",http://www.morrisons.com,13,77210,5111
498,TUI,499,21655,-5.5,1151.7,16247,195.5,Friedrich Joussen,Travel Services,Business Services,467,Germany,"Hanover, Germany",http://www.tuigroup.com,23,66779,3006
499,AutoNation,500,21609,3.6,430.5,10060,-2.7,Michael J. Jackson,Specialty Retailers,Retailing,0,USA,"Fort Lauderdale, FL",http://www.autonation.com,12,26000,2310


In [27]:
f500

Unnamed: 0,company,rank,revenues,revenue_change,profits,assets,profit_change,ceo,industry,sector,previous_rank,country,hq_location,website,years_on_global_500_list,employees,total_stockholder_equity
0,Walmart,1,485873,0.8,13643.0,198825,-7.2,C. Douglas McMillon,General Merchandisers,Retailing,1,USA,"Bentonville, AR",http://www.walmart.com,23,2300000,77798
1,State Grid,2,315199,-4.4,9571.3,489838,-6.2,Kou Wei,Utilities,Energy,2,China,"Beijing, China",http://www.sgcc.com.cn,17,926067,209456
2,Sinopec Group,3,267518,-9.1,1257.9,310726,-65.0,Wang Yupu,Petroleum Refining,Energy,4,China,"Beijing, China",http://www.sinopec.com,19,713288,106523
3,China National Petroleum,4,262573,-12.3,1867.5,585619,-73.7,Zhang Jianhua,Petroleum Refining,Energy,3,China,"Beijing, China",http://www.cnpc.com.cn,17,1512048,301893
4,Toyota Motor,5,254694,7.7,16899.3,437575,-12.3,Akio Toyoda,Motor Vehicles and Parts,Motor Vehicles & Parts,8,Japan,"Toyota, Japan",http://www.toyota-global.com,23,364445,157210
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,Teva Pharmaceutical Industries,496,21903,11.5,329.0,92890,-79.3,Yitzhak Peterburg,Pharmaceuticals,Health Care,0,Israel,"Petach Tikva, Israel",http://www.tevapharm.com,1,56960,33337
496,New China Life Insurance,497,21796,-13.3,743.9,100609,-45.6,Wan Feng,"Insurance: Life, Health (stock)",Financials,427,China,"Beijing, China",http://www.newchinalife.com,2,54378,8507
497,Wm. Morrison Supermarkets,498,21741,-11.3,406.4,11630,20.4,David T. Potts,Food and Drug Stores,Food & Drug Stores,437,Britain,"Bradford, Britain",http://www.morrisons.com,13,77210,5111
498,TUI,499,21655,-5.5,1151.7,16247,195.5,Friedrich Joussen,Travel Services,Business Services,467,Germany,"Hanover, Germany",http://www.tuigroup.com,23,66779,3006


In [30]:
f500.columns

Index(['company', 'rank', 'revenues', 'revenue_change', 'profits', 'assets',
       'profit_change', 'ceo', 'industry', 'sector', 'previous_rank',
       'country', 'hq_location', 'website', 'years_on_global_500_list',
       'employees', 'total_stockholder_equity'],
      dtype='object')

In [31]:
f500.index

RangeIndex(start=0, stop=500, step=1)

In [32]:
import pandas as pd
f500 = pd.read_csv("data/f500.csv", index_col = "company")
print(type(f500))
print(f500.shape)
print(f500.index)
f500.head()

<class 'pandas.core.frame.DataFrame'>
(500, 16)
Index(['Walmart', 'State Grid', 'Sinopec Group', 'China National Petroleum',
       'Toyota Motor', 'Volkswagen', 'Royal Dutch Shell', 'Berkshire Hathaway',
       'Apple', 'Exxon Mobil',
       ...
       'National Grid', 'Dollar General', 'Telecom Italia',
       'Xiamen ITG Holding Group', 'Xinjiang Guanghui Industry Investment',
       'Teva Pharmaceutical Industries', 'New China Life Insurance',
       'Wm. Morrison Supermarkets', 'TUI', 'AutoNation'],
      dtype='object', name='company', length=500)


Unnamed: 0_level_0,rank,revenues,revenue_change,profits,assets,profit_change,ceo,industry,sector,previous_rank,country,hq_location,website,years_on_global_500_list,employees,total_stockholder_equity
company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Walmart,1,485873,0.8,13643.0,198825,-7.2,C. Douglas McMillon,General Merchandisers,Retailing,1,USA,"Bentonville, AR",http://www.walmart.com,23,2300000,77798
State Grid,2,315199,-4.4,9571.3,489838,-6.2,Kou Wei,Utilities,Energy,2,China,"Beijing, China",http://www.sgcc.com.cn,17,926067,209456
Sinopec Group,3,267518,-9.1,1257.9,310726,-65.0,Wang Yupu,Petroleum Refining,Energy,4,China,"Beijing, China",http://www.sinopec.com,19,713288,106523
China National Petroleum,4,262573,-12.3,1867.5,585619,-73.7,Zhang Jianhua,Petroleum Refining,Energy,3,China,"Beijing, China",http://www.cnpc.com.cn,17,1512048,301893
Toyota Motor,5,254694,7.7,16899.3,437575,-12.3,Akio Toyoda,Motor Vehicles and Parts,Motor Vehicles & Parts,8,Japan,"Toyota, Japan",http://www.toyota-global.com,23,364445,157210


---

In [33]:
f500.info()

<class 'pandas.core.frame.DataFrame'>
Index: 500 entries, Walmart to AutoNation
Data columns (total 16 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   rank                      500 non-null    int64  
 1   revenues                  500 non-null    int64  
 2   revenue_change            498 non-null    float64
 3   profits                   499 non-null    float64
 4   assets                    500 non-null    int64  
 5   profit_change             436 non-null    float64
 6   ceo                       500 non-null    object 
 7   industry                  500 non-null    object 
 8   sector                    500 non-null    object 
 9   previous_rank             500 non-null    int64  
 10  country                   500 non-null    object 
 11  hq_location               500 non-null    object 
 12  website                   500 non-null    object 
 13  years_on_global_500_list  500 non-null    int64  
 14  em

In [36]:
import pandas as pd
import numpy as np
f500 = pd.read_csv("data/f500.csv", index_col="company", dtype={"rank": np.int16})
f500.info()

<class 'pandas.core.frame.DataFrame'>
Index: 500 entries, Walmart to AutoNation
Data columns (total 16 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   rank                      500 non-null    int16  
 1   revenues                  500 non-null    int64  
 2   revenue_change            498 non-null    float64
 3   profits                   499 non-null    float64
 4   assets                    500 non-null    int64  
 5   profit_change             436 non-null    float64
 6   ceo                       500 non-null    object 
 7   industry                  500 non-null    object 
 8   sector                    500 non-null    object 
 9   previous_rank             500 non-null    int64  
 10  country                   500 non-null    object 
 11  hq_location               500 non-null    object 
 12  website                   500 non-null    object 
 13  years_on_global_500_list  500 non-null    int64  
 14  em

## Categorical Data

In [41]:
s = pd.Series(["sl", "hr", "at", "sl", "de"], dtype="category")
s

0    sl
1    hr
2    at
3    sl
4    de
dtype: category
Categories (4, object): ['at', 'de', 'hr', 'sl']

In [43]:
s = pd.Series(["sl", "hr", "at", "sl", "de"])
print(s)

s = s.astype("category")
s

0    sl
1    hr
2    at
3    sl
4    de
dtype: object


0    sl
1    hr
2    at
3    sl
4    de
dtype: category
Categories (4, object): ['at', 'de', 'hr', 'sl']

In [67]:
s = pd.Series(
    pd.Categorical(["sl", "hr", "at", "sl", "de"], categories=["sl", "hr", "at", "de"])
)
s

0    sl
1    hr
2    at
3    sl
4    de
dtype: category
Categories (4, object): ['sl', 'hr', 'at', 'de']

---

In [50]:
s1 = pd.Series(["sl", "hr", "at", "sl", "de"]*10_000, dtype="object")
print(s1)
s1.memory_usage()

0        sl
1        hr
2        at
3        sl
4        de
         ..
49995    sl
49996    hr
49997    at
49998    sl
49999    de
Length: 50000, dtype: object


400128

In [52]:
s2 = pd.Series(["sl", "hr", "at", "sl", "de"]*10_000, dtype="category")
print(s2)
s2.memory_usage()

0        sl
1        hr
2        at
3        sl
4        de
         ..
49995    sl
49996    hr
49997    at
49998    sl
49999    de
Length: 50000, dtype: category
Categories (4, object): ['at', 'de', 'hr', 'sl']


50332

In [54]:
s.cat.categories

Index(['sl', 'hr', 'at', 'de'], dtype='object')

In [55]:
s.cat.codes

0    0
1    1
2    2
3    0
4    3
dtype: int8

In [64]:
df = pd.DataFrame({"A": ["sl", "hr", "at", "sl", "de"], "B": ["sl", "hr", "at", "sl", "pl"]}, dtype="category")
print(df["A"])
print(df["B"])
df

0    sl
1    hr
2    at
3    sl
4    de
Name: A, dtype: category
Categories (4, object): ['at', 'de', 'hr', 'sl']
0    sl
1    hr
2    at
3    sl
4    pl
Name: B, dtype: category
Categories (4, object): ['at', 'hr', 'pl', 'sl']


Unnamed: 0,A,B
0,sl,sl
1,hr,hr
2,at,at
3,sl,sl
4,de,pl


In [65]:
df = pd.DataFrame({"A": ["sl", "hr", "at", "sl", "de"], "B": ["sl", "hr", "at", "sl", "pl"]})
cat_type = pd.CategoricalDtype(categories=["sl", "at", "hr", "de", "pl"])
df_cat = df.astype(cat_type)

print(df_cat["A"])
print(df_cat["B"])

0    sl
1    hr
2    at
3    sl
4    de
Name: A, dtype: category
Categories (5, object): ['sl', 'at', 'hr', 'de', 'pl']
0    sl
1    hr
2    at
3    sl
4    pl
Name: B, dtype: category
Categories (5, object): ['sl', 'at', 'hr', 'de', 'pl']


----

In [70]:
print(s)
s.cat.categories

0    sl
1    hr
2    at
3    sl
4    de
dtype: category
Categories (4, object): ['sl', 'hr', 'at', 'de']


Index(['sl', 'hr', 'at', 'de'], dtype='object')

In [74]:
s.cat.rename_categories({"sl": "slovenija", "hr":"hrvaška", "at":"avstrija", "de":"nemčija"})

0    slovenija
1      hrvaška
2     avstrija
3    slovenija
4      nemčija
dtype: category
Categories (4, object): ['slovenija', 'hrvaška', 'avstrija', 'nemčija']

In [72]:
s.cat.rename_categories(["slovenija", "hrvaška", "avstrija", "nemčija"])

0    slovenija
1      hrvaška
2     avstrija
3    slovenija
4      nemčija
dtype: category
Categories (4, object): ['slovenija', 'hrvaška', 'avstrija', 'nemčija']

In [75]:
s.cat.add_categories(["pl"])

0    sl
1    hr
2    at
3    sl
4    de
dtype: category
Categories (5, object): ['sl', 'hr', 'at', 'de', 'pl']

In [79]:
s2 = s.cat.remove_categories(["de"])
print(s2)
print(s2.cat.categories)
print(s2.cat.codes)

0     sl
1     hr
2     at
3     sl
4    NaN
dtype: category
Categories (3, object): ['sl', 'hr', 'at']
Index(['sl', 'hr', 'at'], dtype='object')
0    0
1    1
2    2
3    0
4   -1
dtype: int8


In [82]:
# določimo vrstni red med kategorijam
s = pd.Series(pd.Categorical(["low", "med", "high", "low"]*3, ordered=False))
s.sort_values()

2     high
6     high
10    high
0      low
3      low
4      low
7      low
8      low
11     low
1      med
5      med
9      med
dtype: category
Categories (3, object): ['high', 'low', 'med']

In [84]:
s = pd.Series(pd.Categorical(["low", "med", "high", "low"]*3, categories=["low", "med", "high"], ordered=True))
s.sort_values()

0      low
3      low
4      low
7      low
8      low
11     low
1      med
5      med
9      med
2     high
6     high
10    high
dtype: category
Categories (3, object): ['low' < 'med' < 'high']

In [85]:
s

0      low
1      med
2     high
3      low
4      low
5      med
6     high
7      low
8      low
9      med
10    high
11     low
dtype: category
Categories (3, object): ['low' < 'med' < 'high']

In [86]:
pd.get_dummies(s)

Unnamed: 0,low,med,high
0,1,0,0
1,0,1,0
2,0,0,1
3,1,0,0
4,1,0,0
5,0,1,0
6,0,0,1
7,1,0,0
8,1,0,0
9,0,1,0


# Data Selection / Data Indexing

# Data Selection / Data Indexing

## Selecting Columns by Label - .loc

`DataFrame.loc[ row_label , column_label ]`.

[DataFrame.loc[  ]](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.loc.html)

<table>
<thead>
<tr>
<th>Select by Label</th>
<th>Explicit Syntax</th>
<th>Common Shorthand</th>
<th>Other Shorthand</th>
</tr>
</thead>
<tbody>
<tr>
<td>Single column</td>
<td><code>df.loc[:,"col1"]</code></td>
<td bgcolor="#00FF00"><code>df["col1"]</code></td>
<td><code>df.col1</code></td>
</tr>
<tr>
<td>List of columns</td>
<td><code>df.loc[:,["col1", "col7"]]</code></td>
<td bgcolor="#00FF00"><code>df[["col1", "col7"]]</code></td>
<td></td>
</tr>
<tr>
<td>Slice of columns</td>
<td bgcolor="#00FF00"><code>df.loc[:,"col1":"col4"]</code></td>
<td></td>
<td></td>
</tr>
</tbody>
</table>

## Select Rows by Label - .loc

![image](images/df_series_s_updated.svg)

![image](images/df_series_df_updated.svg)

## Selecting Items - .loc

<table>
<thead>
<tr>
<th>Select by Label</th>
<th>Explicit Syntax</th>
<th>Shorthand Convention</th>
</tr>
</thead>
<tbody>
<tr>
<td>Single column from dataframe</td>
<td><code>df.loc[:,"col1"]</code></td>
<td bgcolor="#00FF00"><code>df["col1"]</code></td>
</tr>
<tr>
<td>List of columns from dataframe</td>
<td><code>df.loc[:,["col1","col7"]]</code></td>
<td bgcolor="#00FF00"><code>df[["col1","col7"]]</code></td>
</tr>
<tr>
<td>Slice of columns from dataframe</td>
<td bgcolor="#00FF00"><code>df.loc[:,"col1":"col4"]</code></td>
<td></td>
</tr>
<tr>
<td>Single row from dataframe</td>
<td bgcolor="#00FF00"><code>df.loc["row4"]</code></td>
<td></td>
</tr>
<tr>
<td>List of rows from dataframe</td>
<td bgcolor="#00FF00"><code>df.loc[["row1", "row8"]]</code></td>
<td></td>
</tr>
<tr>
<td>Slice of rows from dataframe</td>
<td bgcolor="#00FF00"><code>df.loc["row3":"row5"]</code></td>
<td><code>df["row3":"row5"]</code></td>
</tr>
<tr>
<td>Single item from series</td>
<td><code>s.loc["item8"]</code></td>
<td bgcolor="#00FF00"><code>s["item8"]</code></td>
</tr>
<tr>
<td>List of items from series</td>
<td><code>s.loc[["item1","item7"]]</code></td>
<td bgcolor="#00FF00"><code>s[["item1","item7"]]</code></td>
</tr>
<tr>
<td>Slice of items from series</td>
<td><code>s.loc["item2":"item4"]</code></td>
<td bgcolor="#00FF00"><code>s["item2":"item4"]</code></td>
</tr>
</tbody>
</table>

---

## Selecting using .iloc

![image](images/selection_iloc.svg)

---

# Vectorized Operations

![vectorized](images/vectorized.gif)

# Series Data Exploration Methods

---

## Series Describe

---

# DataFrame Data Exploration Methods

![slika](images/axis_param.svg)

---

## DataFrame Describe

---

# Changing values

# Boolean Indexing pandas objects

---

---

# Adding data

---

---

## Deleting Data

---

## Multiple filters

<table>
<thead>
<tr>
<th>pandas</th>
<th>Python equivalent</th>
<th>Meaning</th>
</tr>
</thead>
<tbody>
<tr>
<td><code>a &amp; b</code></td>
<td><code>a and b</code></td>
<td><code>True</code> if both <code>a</code> and <code>b</code> are <code>True</code>, else <code>False</code></td>
</tr>
<tr>
<td><code>a | b</code></td>
<td><code>a or b</code></td>
<td><code>True</code> if either <code>a</code> or <code>b</code> is <code>True</code></td>
</tr>
<tr>
<td><code>~a</code></td>
<td><code>not a</code></td>
<td><code>True</code> if <code>a</code> is <code>False</code>, else <code>False</code></td>
</tr>
</tbody>
</table>

---

# Sorting