# Pandas

# Importing pandas

```sh
$ pip install pandas
```

In [1]:
import pandas as pd

# Basic objects in pandas

# Series

In [2]:
import numpy as np

In [3]:
data = np.array(["a", "b", "c", "d", "e"])
s = pd.Series(data)

print(type(s))
print(s)

<class 'pandas.core.series.Series'>
0    a
1    b
2    c
3    d
4    e
dtype: object


In [4]:
# Index lahko explicitno doloičimo
data = np.array(["a", "b", "c", "d", "e"])
s = pd.Series(data, index=[100, 101, 102, 103, 104])
print(s)

100    a
101    b
102    c
103    d
104    e
dtype: object


In [5]:
data = np.array(["a", "b", "c", "d", "e"])
s = pd.Series(data, index=[100, 101, 102, 103, 104, 105, 106, 107, 108])
print(s)

ValueError: Length of values (5) does not match length of index (9)

In [6]:
# Creating series with dictionary
s = pd.Series({1:"a", 2:"b", 3:"c", 4:"d", 5:"e"})
print(type(s))
print(s)

<class 'pandas.core.series.Series'>
1    a
2    b
3    c
4    d
5    e
dtype: object


In [12]:
s = pd.Series({1:"a", 2:"b", 3:"c", 4:"d", 5:"e"}, index=[1,3,5,7,9])
print(type(s))
print(s)

<class 'pandas.core.series.Series'>
1      a
3      c
5      e
7    NaN
9    NaN
dtype: object


In [13]:
# Series of same values
s = pd.Series("a", index=[1,2,3,4,6])
print(s)
print(type(s))

1    a
2    a
3    a
4    a
6    a
dtype: object
<class 'pandas.core.series.Series'>


---

In [14]:
s = pd.Series([1,2,3,4,5], index=["a", "b", "c", "d", "e"])
print(type(s))
print(s)

<class 'pandas.core.series.Series'>
a    1
b    2
c    3
d    4
e    5
dtype: int64


In [15]:
print(s.index)
print(s.values)

Index(['a', 'b', 'c', 'd', 'e'], dtype='object')
[1 2 3 4 5]


In [16]:
print(type(s.values))

<class 'numpy.ndarray'>


## Dataframe

![dataframe](images/df_exploded_resized.svg)

In [17]:
# Creating a DataFrame from series
data = {
    "one": pd.Series([10, 20, 30]),
    "two": pd.Series([40, 50, 50])
}

df = pd.DataFrame(data)
print(type(df))
print(df)

<class 'pandas.core.frame.DataFrame'>
   one  two
0   10   40
1   20   50
2   30   50


In [18]:
data = {
    "one": pd.Series([10, 20, 30], index=["a", "b", "c"]),
    "two": pd.Series(["x", "y", "z"], index=["a", "b", "d"])
}
df = pd.DataFrame(data)
print(type(df))
print(df)

<class 'pandas.core.frame.DataFrame'>
    one  two
a  10.0    x
b  20.0    y
c  30.0  NaN
d   NaN    z


In [19]:
# Defining DataFrame with python list
df = pd.DataFrame([1,2,3,4])
print(type(df))
print(df)

<class 'pandas.core.frame.DataFrame'>
   0
0  1
1  2
2  3
3  4


In [20]:
# Defining DataFrame with list of lists
data = [["Alex", 10],
       ["Bob", 12],
       ["Claire", 14]]
df = pd.DataFrame(data)
print(type(df))
print(df)

<class 'pandas.core.frame.DataFrame'>
        0   1
0    Alex  10
1     Bob  12
2  Claire  14


In [21]:
# Sapecifying column names
data = [["Alex", 10],
       ["Bob", 12],
       ["Claire", 14]]
df = pd.DataFrame(data, columns=["Name", "Age"])
print(type(df))
print(df)

<class 'pandas.core.frame.DataFrame'>
     Name  Age
0    Alex   10
1     Bob   12
2  Claire   14


---

# Importing real case data

Stolpce katere bomo uporabljali so:
* `company` - ime podjetja
* `rank` - kje na lestvici top 500 se podjetje nahaja
* `revenues` - koliko prihodka je imelo podjetje, v miljon dolarjih
* `revenue_change` - kolikšna je bila sprememba dobička med zdajšnjim in prejšnjim letom, izraženo v procentih (%)
* `profits` - kolikšen je bil profit podjetja, izražen v miljon dolarjih
* `ceo` - kdo je glavni direktor podjetja
* `industry` - v kateri industriji podjetje deluje
* `sector` - v katerem sektorju podjetje deluje
* `previous_rank` - lanskoletni Global 500 rank podjetja
* `country` - država v kateri se nahajajo glavne pisarne podjetja


In [195]:
import pandas as pd

f500 = pd.read_csv("data/f500.csv")
print(type(f500))
print(f500.shape)

<class 'pandas.core.frame.DataFrame'>
(500, 17)


[DataFrame.head()](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.head.html)

In [196]:
f500.head()

Unnamed: 0,company,rank,revenues,revenue_change,profits,assets,profit_change,ceo,industry,sector,previous_rank,country,hq_location,website,years_on_global_500_list,employees,total_stockholder_equity
0,Walmart,1,485873,0.8,13643.0,198825,-7.2,C. Douglas McMillon,General Merchandisers,Retailing,1,USA,"Bentonville, AR",http://www.walmart.com,23,2300000,77798
1,State Grid,2,315199,-4.4,9571.3,489838,-6.2,Kou Wei,Utilities,Energy,2,China,"Beijing, China",http://www.sgcc.com.cn,17,926067,209456
2,Sinopec Group,3,267518,-9.1,1257.9,310726,-65.0,Wang Yupu,Petroleum Refining,Energy,4,China,"Beijing, China",http://www.sinopec.com,19,713288,106523
3,China National Petroleum,4,262573,-12.3,1867.5,585619,-73.7,Zhang Jianhua,Petroleum Refining,Energy,3,China,"Beijing, China",http://www.cnpc.com.cn,17,1512048,301893
4,Toyota Motor,5,254694,7.7,16899.3,437575,-12.3,Akio Toyoda,Motor Vehicles and Parts,Motor Vehicles & Parts,8,Japan,"Toyota, Japan",http://www.toyota-global.com,23,364445,157210


In [197]:
f500.tail(10)

Unnamed: 0,company,rank,revenues,revenue_change,profits,assets,profit_change,ceo,industry,sector,previous_rank,country,hq_location,website,years_on_global_500_list,employees,total_stockholder_equity
490,National Grid,491,22036,-3.2,10150.6,82310,160.2,John Pettigrew,Utilities,Energy,471,Britain,"London, Britain",http://www.nationalgrid.com,12,22132,25463
491,Dollar General,492,21987,7.9,1251.1,11672,7.4,Todd J. Vasos,Specialty Retailers,Retailing,0,USA,"Goodlettsville, TN",http://www.dollargeneral.com,1,121000,5406
492,Telecom Italia,493,21941,-17.4,1999.4,74295,,Flavio Cattaneo,Telecommunications,Telecommunications,404,Italy,"Milan, Italy",http://www.telecomitalia.com,18,61227,22366
493,Xiamen ITG Holding Group,494,21930,34.3,35.6,12161,-25.1,Xu Xiaoxi,Trading,Wholesalers,0,China,"Xiamen, China",http://www.itgholding.com.cn,1,18454,1066
494,Xinjiang Guanghui Industry Investment,495,21919,31.1,251.8,31957,49.9,Shang Jiqiang,Trading,Wholesalers,0,China,"Urumqi, China",http://www.guanghui.com,1,65616,4563
495,Teva Pharmaceutical Industries,496,21903,11.5,329.0,92890,-79.3,Yitzhak Peterburg,Pharmaceuticals,Health Care,0,Israel,"Petach Tikva, Israel",http://www.tevapharm.com,1,56960,33337
496,New China Life Insurance,497,21796,-13.3,743.9,100609,-45.6,Wan Feng,"Insurance: Life, Health (stock)",Financials,427,China,"Beijing, China",http://www.newchinalife.com,2,54378,8507
497,Wm. Morrison Supermarkets,498,21741,-11.3,406.4,11630,20.4,David T. Potts,Food and Drug Stores,Food & Drug Stores,437,Britain,"Bradford, Britain",http://www.morrisons.com,13,77210,5111
498,TUI,499,21655,-5.5,1151.7,16247,195.5,Friedrich Joussen,Travel Services,Business Services,467,Germany,"Hanover, Germany",http://www.tuigroup.com,23,66779,3006
499,AutoNation,500,21609,3.6,430.5,10060,-2.7,Michael J. Jackson,Specialty Retailers,Retailing,0,USA,"Fort Lauderdale, FL",http://www.autonation.com,12,26000,2310


In [198]:
f500

Unnamed: 0,company,rank,revenues,revenue_change,profits,assets,profit_change,ceo,industry,sector,previous_rank,country,hq_location,website,years_on_global_500_list,employees,total_stockholder_equity
0,Walmart,1,485873,0.8,13643.0,198825,-7.2,C. Douglas McMillon,General Merchandisers,Retailing,1,USA,"Bentonville, AR",http://www.walmart.com,23,2300000,77798
1,State Grid,2,315199,-4.4,9571.3,489838,-6.2,Kou Wei,Utilities,Energy,2,China,"Beijing, China",http://www.sgcc.com.cn,17,926067,209456
2,Sinopec Group,3,267518,-9.1,1257.9,310726,-65.0,Wang Yupu,Petroleum Refining,Energy,4,China,"Beijing, China",http://www.sinopec.com,19,713288,106523
3,China National Petroleum,4,262573,-12.3,1867.5,585619,-73.7,Zhang Jianhua,Petroleum Refining,Energy,3,China,"Beijing, China",http://www.cnpc.com.cn,17,1512048,301893
4,Toyota Motor,5,254694,7.7,16899.3,437575,-12.3,Akio Toyoda,Motor Vehicles and Parts,Motor Vehicles & Parts,8,Japan,"Toyota, Japan",http://www.toyota-global.com,23,364445,157210
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,Teva Pharmaceutical Industries,496,21903,11.5,329.0,92890,-79.3,Yitzhak Peterburg,Pharmaceuticals,Health Care,0,Israel,"Petach Tikva, Israel",http://www.tevapharm.com,1,56960,33337
496,New China Life Insurance,497,21796,-13.3,743.9,100609,-45.6,Wan Feng,"Insurance: Life, Health (stock)",Financials,427,China,"Beijing, China",http://www.newchinalife.com,2,54378,8507
497,Wm. Morrison Supermarkets,498,21741,-11.3,406.4,11630,20.4,David T. Potts,Food and Drug Stores,Food & Drug Stores,437,Britain,"Bradford, Britain",http://www.morrisons.com,13,77210,5111
498,TUI,499,21655,-5.5,1151.7,16247,195.5,Friedrich Joussen,Travel Services,Business Services,467,Germany,"Hanover, Germany",http://www.tuigroup.com,23,66779,3006


In [199]:
f500.columns

Index(['company', 'rank', 'revenues', 'revenue_change', 'profits', 'assets',
       'profit_change', 'ceo', 'industry', 'sector', 'previous_rank',
       'country', 'hq_location', 'website', 'years_on_global_500_list',
       'employees', 'total_stockholder_equity'],
      dtype='object')

In [200]:
f500.index

RangeIndex(start=0, stop=500, step=1)

In [201]:
import pandas as pd
f500 = pd.read_csv("data/f500.csv", index_col = "company")
print(type(f500))
print(f500.shape)
print(f500.index)
f500.head()

<class 'pandas.core.frame.DataFrame'>
(500, 16)
Index(['Walmart', 'State Grid', 'Sinopec Group', 'China National Petroleum',
       'Toyota Motor', 'Volkswagen', 'Royal Dutch Shell', 'Berkshire Hathaway',
       'Apple', 'Exxon Mobil',
       ...
       'National Grid', 'Dollar General', 'Telecom Italia',
       'Xiamen ITG Holding Group', 'Xinjiang Guanghui Industry Investment',
       'Teva Pharmaceutical Industries', 'New China Life Insurance',
       'Wm. Morrison Supermarkets', 'TUI', 'AutoNation'],
      dtype='object', name='company', length=500)


Unnamed: 0_level_0,rank,revenues,revenue_change,profits,assets,profit_change,ceo,industry,sector,previous_rank,country,hq_location,website,years_on_global_500_list,employees,total_stockholder_equity
company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Walmart,1,485873,0.8,13643.0,198825,-7.2,C. Douglas McMillon,General Merchandisers,Retailing,1,USA,"Bentonville, AR",http://www.walmart.com,23,2300000,77798
State Grid,2,315199,-4.4,9571.3,489838,-6.2,Kou Wei,Utilities,Energy,2,China,"Beijing, China",http://www.sgcc.com.cn,17,926067,209456
Sinopec Group,3,267518,-9.1,1257.9,310726,-65.0,Wang Yupu,Petroleum Refining,Energy,4,China,"Beijing, China",http://www.sinopec.com,19,713288,106523
China National Petroleum,4,262573,-12.3,1867.5,585619,-73.7,Zhang Jianhua,Petroleum Refining,Energy,3,China,"Beijing, China",http://www.cnpc.com.cn,17,1512048,301893
Toyota Motor,5,254694,7.7,16899.3,437575,-12.3,Akio Toyoda,Motor Vehicles and Parts,Motor Vehicles & Parts,8,Japan,"Toyota, Japan",http://www.toyota-global.com,23,364445,157210


---

In [202]:
f500.info()

<class 'pandas.core.frame.DataFrame'>
Index: 500 entries, Walmart to AutoNation
Data columns (total 16 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   rank                      500 non-null    int64  
 1   revenues                  500 non-null    int64  
 2   revenue_change            498 non-null    float64
 3   profits                   499 non-null    float64
 4   assets                    500 non-null    int64  
 5   profit_change             436 non-null    float64
 6   ceo                       500 non-null    object 
 7   industry                  500 non-null    object 
 8   sector                    500 non-null    object 
 9   previous_rank             500 non-null    int64  
 10  country                   500 non-null    object 
 11  hq_location               500 non-null    object 
 12  website                   500 non-null    object 
 13  years_on_global_500_list  500 non-null    int64  
 14  em

In [203]:
import pandas as pd
import numpy as np
f500 = pd.read_csv("data/f500.csv", index_col="company", dtype={"rank": np.int16})
f500.info()

<class 'pandas.core.frame.DataFrame'>
Index: 500 entries, Walmart to AutoNation
Data columns (total 16 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   rank                      500 non-null    int16  
 1   revenues                  500 non-null    int64  
 2   revenue_change            498 non-null    float64
 3   profits                   499 non-null    float64
 4   assets                    500 non-null    int64  
 5   profit_change             436 non-null    float64
 6   ceo                       500 non-null    object 
 7   industry                  500 non-null    object 
 8   sector                    500 non-null    object 
 9   previous_rank             500 non-null    int64  
 10  country                   500 non-null    object 
 11  hq_location               500 non-null    object 
 12  website                   500 non-null    object 
 13  years_on_global_500_list  500 non-null    int64  
 14  em

## Categorical Data

In [204]:
s = pd.Series(["sl", "hr", "at", "sl", "de"], dtype="category")
s

0    sl
1    hr
2    at
3    sl
4    de
dtype: category
Categories (4, object): ['at', 'de', 'hr', 'sl']

In [205]:
s = pd.Series(["sl", "hr", "at", "sl", "de"])
print(s)

s = s.astype("category")
s

0    sl
1    hr
2    at
3    sl
4    de
dtype: object


0    sl
1    hr
2    at
3    sl
4    de
dtype: category
Categories (4, object): ['at', 'de', 'hr', 'sl']

In [206]:
s = pd.Series(
    pd.Categorical(["sl", "hr", "at", "sl", "de"], categories=["sl", "hr", "at", "de"])
)
s

0    sl
1    hr
2    at
3    sl
4    de
dtype: category
Categories (4, object): ['sl', 'hr', 'at', 'de']

---

In [207]:
s1 = pd.Series(["sl", "hr", "at", "sl", "de"]*10_000, dtype="object")
print(s1)
s1.memory_usage()

0        sl
1        hr
2        at
3        sl
4        de
         ..
49995    sl
49996    hr
49997    at
49998    sl
49999    de
Length: 50000, dtype: object


400128

In [208]:
s2 = pd.Series(["sl", "hr", "at", "sl", "de"]*10_000, dtype="category")
print(s2)
s2.memory_usage()

0        sl
1        hr
2        at
3        sl
4        de
         ..
49995    sl
49996    hr
49997    at
49998    sl
49999    de
Length: 50000, dtype: category
Categories (4, object): ['at', 'de', 'hr', 'sl']


50332

In [209]:
s.cat.categories

Index(['sl', 'hr', 'at', 'de'], dtype='object')

In [210]:
s.cat.codes

0    0
1    1
2    2
3    0
4    3
dtype: int8

In [211]:
df = pd.DataFrame({"A": ["sl", "hr", "at", "sl", "de"], "B": ["sl", "hr", "at", "sl", "pl"]}, dtype="category")
print(df["A"])
print(df["B"])
df

0    sl
1    hr
2    at
3    sl
4    de
Name: A, dtype: category
Categories (4, object): ['at', 'de', 'hr', 'sl']
0    sl
1    hr
2    at
3    sl
4    pl
Name: B, dtype: category
Categories (4, object): ['at', 'hr', 'pl', 'sl']


Unnamed: 0,A,B
0,sl,sl
1,hr,hr
2,at,at
3,sl,sl
4,de,pl


In [212]:
df = pd.DataFrame({"A": ["sl", "hr", "at", "sl", "de"], "B": ["sl", "hr", "at", "sl", "pl"]})
cat_type = pd.CategoricalDtype(categories=["sl", "at", "hr", "de", "pl"])
df_cat = df.astype(cat_type)

print(df_cat["A"])
print(df_cat["B"])

0    sl
1    hr
2    at
3    sl
4    de
Name: A, dtype: category
Categories (5, object): ['sl', 'at', 'hr', 'de', 'pl']
0    sl
1    hr
2    at
3    sl
4    pl
Name: B, dtype: category
Categories (5, object): ['sl', 'at', 'hr', 'de', 'pl']


----

In [213]:
print(s)
s.cat.categories

0    sl
1    hr
2    at
3    sl
4    de
dtype: category
Categories (4, object): ['sl', 'hr', 'at', 'de']


Index(['sl', 'hr', 'at', 'de'], dtype='object')

In [214]:
s.cat.rename_categories({"sl": "slovenija", "hr":"hrvaška", "at":"avstrija", "de":"nemčija"})

0    slovenija
1      hrvaška
2     avstrija
3    slovenija
4      nemčija
dtype: category
Categories (4, object): ['slovenija', 'hrvaška', 'avstrija', 'nemčija']

In [215]:
s.cat.rename_categories(["slovenija", "hrvaška", "avstrija", "nemčija"])

0    slovenija
1      hrvaška
2     avstrija
3    slovenija
4      nemčija
dtype: category
Categories (4, object): ['slovenija', 'hrvaška', 'avstrija', 'nemčija']

In [216]:
s.cat.add_categories(["pl"])

0    sl
1    hr
2    at
3    sl
4    de
dtype: category
Categories (5, object): ['sl', 'hr', 'at', 'de', 'pl']

In [217]:
s2 = s.cat.remove_categories(["de"])
print(s2)
print(s2.cat.categories)
print(s2.cat.codes)

0     sl
1     hr
2     at
3     sl
4    NaN
dtype: category
Categories (3, object): ['sl', 'hr', 'at']
Index(['sl', 'hr', 'at'], dtype='object')
0    0
1    1
2    2
3    0
4   -1
dtype: int8


In [218]:
# določimo vrstni red med kategorijam
s = pd.Series(pd.Categorical(["low", "med", "high", "low"]*3, ordered=False))
s.sort_values()

2     high
6     high
10    high
0      low
3      low
4      low
7      low
8      low
11     low
1      med
5      med
9      med
dtype: category
Categories (3, object): ['high', 'low', 'med']

In [219]:
s = pd.Series(pd.Categorical(["low", "med", "high", "low"]*3, categories=["low", "med", "high"], ordered=True))
s.sort_values()

0      low
3      low
4      low
7      low
8      low
11     low
1      med
5      med
9      med
2     high
6     high
10    high
dtype: category
Categories (3, object): ['low' < 'med' < 'high']

In [220]:
s

0      low
1      med
2     high
3      low
4      low
5      med
6     high
7      low
8      low
9      med
10    high
11     low
dtype: category
Categories (3, object): ['low' < 'med' < 'high']

In [221]:
pd.get_dummies(s)

Unnamed: 0,low,med,high
0,1,0,0
1,0,1,0
2,0,0,1
3,1,0,0
4,1,0,0
5,0,1,0
6,0,0,1
7,1,0,0
8,1,0,0
9,0,1,0


# Data Selection / Data Indexing

## Selecting Columns by Label - .loc

`DataFrame.loc[ row_label , column_label ]`.

[DataFrame.loc[  ]](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.loc.html)

In [222]:
f500.head()

Unnamed: 0_level_0,rank,revenues,revenue_change,profits,assets,profit_change,ceo,industry,sector,previous_rank,country,hq_location,website,years_on_global_500_list,employees,total_stockholder_equity
company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Walmart,1,485873,0.8,13643.0,198825,-7.2,C. Douglas McMillon,General Merchandisers,Retailing,1,USA,"Bentonville, AR",http://www.walmart.com,23,2300000,77798
State Grid,2,315199,-4.4,9571.3,489838,-6.2,Kou Wei,Utilities,Energy,2,China,"Beijing, China",http://www.sgcc.com.cn,17,926067,209456
Sinopec Group,3,267518,-9.1,1257.9,310726,-65.0,Wang Yupu,Petroleum Refining,Energy,4,China,"Beijing, China",http://www.sinopec.com,19,713288,106523
China National Petroleum,4,262573,-12.3,1867.5,585619,-73.7,Zhang Jianhua,Petroleum Refining,Energy,3,China,"Beijing, China",http://www.cnpc.com.cn,17,1512048,301893
Toyota Motor,5,254694,7.7,16899.3,437575,-12.3,Akio Toyoda,Motor Vehicles and Parts,Motor Vehicles & Parts,8,Japan,"Toyota, Japan",http://www.toyota-global.com,23,364445,157210


In [223]:
rank_col = f500.loc[:, "rank"]
print(type(rank_col))
rank_col

<class 'pandas.core.series.Series'>


company
Walmart                             1
State Grid                          2
Sinopec Group                       3
China National Petroleum            4
Toyota Motor                        5
                                 ... 
Teva Pharmaceutical Industries    496
New China Life Insurance          497
Wm. Morrison Supermarkets         498
TUI                               499
AutoNation                        500
Name: rank, Length: 500, dtype: int16

In [224]:
rank_col = f500["rank"]
print(type(rank_col))
rank_col

<class 'pandas.core.series.Series'>


company
Walmart                             1
State Grid                          2
Sinopec Group                       3
China National Petroleum            4
Toyota Motor                        5
                                 ... 
Teva Pharmaceutical Industries    496
New China Life Insurance          497
Wm. Morrison Supermarkets         498
TUI                               499
AutoNation                        500
Name: rank, Length: 500, dtype: int16

In [225]:
slice_ = f500.loc[:, ["country", "rank"]]
print(type(slice_))
slice_

<class 'pandas.core.frame.DataFrame'>


Unnamed: 0_level_0,country,rank
company,Unnamed: 1_level_1,Unnamed: 2_level_1
Walmart,USA,1
State Grid,China,2
Sinopec Group,China,3
China National Petroleum,China,4
Toyota Motor,Japan,5
...,...,...
Teva Pharmaceutical Industries,Israel,496
New China Life Insurance,China,497
Wm. Morrison Supermarkets,Britain,498
TUI,Germany,499


In [226]:
slice_ = f500[["country", "rank"]]
print(type(slice_))
slice_

<class 'pandas.core.frame.DataFrame'>


Unnamed: 0_level_0,country,rank
company,Unnamed: 1_level_1,Unnamed: 2_level_1
Walmart,USA,1
State Grid,China,2
Sinopec Group,China,3
China National Petroleum,China,4
Toyota Motor,Japan,5
...,...,...
Teva Pharmaceutical Industries,Israel,496
New China Life Insurance,China,497
Wm. Morrison Supermarkets,Britain,498
TUI,Germany,499


In [227]:
slice_ = f500.loc[:, "rank":"profits"]
print(type(slice_))
slice_

<class 'pandas.core.frame.DataFrame'>


Unnamed: 0_level_0,rank,revenues,revenue_change,profits
company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Walmart,1,485873,0.8,13643.0
State Grid,2,315199,-4.4,9571.3
Sinopec Group,3,267518,-9.1,1257.9
China National Petroleum,4,262573,-12.3,1867.5
Toyota Motor,5,254694,7.7,16899.3
...,...,...,...,...
Teva Pharmaceutical Industries,496,21903,11.5,329.0
New China Life Insurance,497,21796,-13.3,743.9
Wm. Morrison Supermarkets,498,21741,-11.3,406.4
TUI,499,21655,-5.5,1151.7


In [228]:
profits_col = f500.profits
print(type(profits_col))
profits_col

<class 'pandas.core.series.Series'>


company
Walmart                           13643.0
State Grid                         9571.3
Sinopec Group                      1257.9
China National Petroleum           1867.5
Toyota Motor                      16899.3
                                   ...   
Teva Pharmaceutical Industries      329.0
New China Life Insurance            743.9
Wm. Morrison Supermarkets           406.4
TUI                                1151.7
AutoNation                          430.5
Name: profits, Length: 500, dtype: float64

<table>
<thead>
<tr>
<th>Select by Label</th>
<th>Explicit Syntax</th>
<th>Common Shorthand</th>
<th>Other Shorthand</th>
</tr>
</thead>
<tbody>
<tr>
<td>Single column</td>
<td><code>df.loc[:,"col1"]</code></td>
<td bgcolor="#00FF00"><code>df["col1"]</code></td>
<td><code>df.col1</code></td>
</tr>
<tr>
<td>List of columns</td>
<td><code>df.loc[:,["col1", "col7"]]</code></td>
<td bgcolor="#00FF00"><code>df[["col1", "col7"]]</code></td>
<td></td>
</tr>
<tr>
<td>Slice of columns</td>
<td bgcolor="#00FF00"><code>df.loc[:,"col1":"col4"]</code></td>
<td></td>
<td></td>
</tr>
</tbody>
</table>

## Select Rows by Label - .loc

In [229]:
f500.head()

Unnamed: 0_level_0,rank,revenues,revenue_change,profits,assets,profit_change,ceo,industry,sector,previous_rank,country,hq_location,website,years_on_global_500_list,employees,total_stockholder_equity
company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Walmart,1,485873,0.8,13643.0,198825,-7.2,C. Douglas McMillon,General Merchandisers,Retailing,1,USA,"Bentonville, AR",http://www.walmart.com,23,2300000,77798
State Grid,2,315199,-4.4,9571.3,489838,-6.2,Kou Wei,Utilities,Energy,2,China,"Beijing, China",http://www.sgcc.com.cn,17,926067,209456
Sinopec Group,3,267518,-9.1,1257.9,310726,-65.0,Wang Yupu,Petroleum Refining,Energy,4,China,"Beijing, China",http://www.sinopec.com,19,713288,106523
China National Petroleum,4,262573,-12.3,1867.5,585619,-73.7,Zhang Jianhua,Petroleum Refining,Energy,3,China,"Beijing, China",http://www.cnpc.com.cn,17,1512048,301893
Toyota Motor,5,254694,7.7,16899.3,437575,-12.3,Akio Toyoda,Motor Vehicles and Parts,Motor Vehicles & Parts,8,Japan,"Toyota, Japan",http://www.toyota-global.com,23,364445,157210


In [230]:
single_row = f500.loc["Sinopec Group", :]
print(type(single_row))
single_row

<class 'pandas.core.series.Series'>


rank                                             3
revenues                                    267518
revenue_change                                -9.1
profits                                     1257.9
assets                                      310726
profit_change                                -65.0
ceo                                      Wang Yupu
industry                        Petroleum Refining
sector                                      Energy
previous_rank                                    4
country                                      China
hq_location                         Beijing, China
website                     http://www.sinopec.com
years_on_global_500_list                        19
employees                                   713288
total_stockholder_equity                    106523
Name: Sinopec Group, dtype: object

In [231]:
single_row = f500.loc["Sinopec Group"]
print(type(single_row))
single_row

<class 'pandas.core.series.Series'>


rank                                             3
revenues                                    267518
revenue_change                                -9.1
profits                                     1257.9
assets                                      310726
profit_change                                -65.0
ceo                                      Wang Yupu
industry                        Petroleum Refining
sector                                      Energy
previous_rank                                    4
country                                      China
hq_location                         Beijing, China
website                     http://www.sinopec.com
years_on_global_500_list                        19
employees                                   713288
total_stockholder_equity                    106523
Name: Sinopec Group, dtype: object

In [232]:
slice_ = f500.loc[["Toyota Motor", "Walmart"], :]
print(type(slice_))
slice_

<class 'pandas.core.frame.DataFrame'>


Unnamed: 0_level_0,rank,revenues,revenue_change,profits,assets,profit_change,ceo,industry,sector,previous_rank,country,hq_location,website,years_on_global_500_list,employees,total_stockholder_equity
company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Toyota Motor,5,254694,7.7,16899.3,437575,-12.3,Akio Toyoda,Motor Vehicles and Parts,Motor Vehicles & Parts,8,Japan,"Toyota, Japan",http://www.toyota-global.com,23,364445,157210
Walmart,1,485873,0.8,13643.0,198825,-7.2,C. Douglas McMillon,General Merchandisers,Retailing,1,USA,"Bentonville, AR",http://www.walmart.com,23,2300000,77798


In [233]:
slice_ = f500.loc["State Grid":"Toyota Motor"]
print(type(slice_))
slice_

<class 'pandas.core.frame.DataFrame'>


Unnamed: 0_level_0,rank,revenues,revenue_change,profits,assets,profit_change,ceo,industry,sector,previous_rank,country,hq_location,website,years_on_global_500_list,employees,total_stockholder_equity
company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
State Grid,2,315199,-4.4,9571.3,489838,-6.2,Kou Wei,Utilities,Energy,2,China,"Beijing, China",http://www.sgcc.com.cn,17,926067,209456
Sinopec Group,3,267518,-9.1,1257.9,310726,-65.0,Wang Yupu,Petroleum Refining,Energy,4,China,"Beijing, China",http://www.sinopec.com,19,713288,106523
China National Petroleum,4,262573,-12.3,1867.5,585619,-73.7,Zhang Jianhua,Petroleum Refining,Energy,3,China,"Beijing, China",http://www.cnpc.com.cn,17,1512048,301893
Toyota Motor,5,254694,7.7,16899.3,437575,-12.3,Akio Toyoda,Motor Vehicles and Parts,Motor Vehicles & Parts,8,Japan,"Toyota, Japan",http://www.toyota-global.com,23,364445,157210


![image](images/df_series_s_updated.svg)

![image](images/df_series_df_updated.svg)

## Selecting Items - .loc

In [234]:
f500.head()

Unnamed: 0_level_0,rank,revenues,revenue_change,profits,assets,profit_change,ceo,industry,sector,previous_rank,country,hq_location,website,years_on_global_500_list,employees,total_stockholder_equity
company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Walmart,1,485873,0.8,13643.0,198825,-7.2,C. Douglas McMillon,General Merchandisers,Retailing,1,USA,"Bentonville, AR",http://www.walmart.com,23,2300000,77798
State Grid,2,315199,-4.4,9571.3,489838,-6.2,Kou Wei,Utilities,Energy,2,China,"Beijing, China",http://www.sgcc.com.cn,17,926067,209456
Sinopec Group,3,267518,-9.1,1257.9,310726,-65.0,Wang Yupu,Petroleum Refining,Energy,4,China,"Beijing, China",http://www.sinopec.com,19,713288,106523
China National Petroleum,4,262573,-12.3,1867.5,585619,-73.7,Zhang Jianhua,Petroleum Refining,Energy,3,China,"Beijing, China",http://www.cnpc.com.cn,17,1512048,301893
Toyota Motor,5,254694,7.7,16899.3,437575,-12.3,Akio Toyoda,Motor Vehicles and Parts,Motor Vehicles & Parts,8,Japan,"Toyota, Japan",http://www.toyota-global.com,23,364445,157210


In [235]:
walmart_profits = f500.loc["Walmart", "profits"]
print(type(walmart_profits))
walmart_profits

<class 'numpy.float64'>


13643.0

In [236]:
walmart_row = f500.loc["Walmart", :]
print(walmart_row)

print()

walmart_profits = walmart_row.loc["profits"]
print(walmart_profits)

rank                                             1
revenues                                    485873
revenue_change                                 0.8
profits                                    13643.0
assets                                      198825
profit_change                                 -7.2
ceo                            C. Douglas McMillon
industry                     General Merchandisers
sector                                   Retailing
previous_rank                                    1
country                                        USA
hq_location                        Bentonville, AR
website                     http://www.walmart.com
years_on_global_500_list                        23
employees                                  2300000
total_stockholder_equity                     77798
Name: Walmart, dtype: object

13643.0


<table>
<thead>
<tr>
<th>Select by Label</th>
<th>Explicit Syntax</th>
<th>Shorthand Convention</th>
</tr>
</thead>
<tbody>
<tr>
<td>Single column from dataframe</td>
<td><code>df.loc[:,"col1"]</code></td>
<td bgcolor="#00FF00"><code>df["col1"]</code></td>
</tr>
<tr>
<td>List of columns from dataframe</td>
<td><code>df.loc[:,["col1","col7"]]</code></td>
<td bgcolor="#00FF00"><code>df[["col1","col7"]]</code></td>
</tr>
<tr>
<td>Slice of columns from dataframe</td>
<td bgcolor="#00FF00"><code>df.loc[:,"col1":"col4"]</code></td>
<td></td>
</tr>
<tr>
<td>Single row from dataframe</td>
<td bgcolor="#00FF00"><code>df.loc["row4"]</code></td>
<td></td>
</tr>
<tr>
<td>List of rows from dataframe</td>
<td bgcolor="#00FF00"><code>df.loc[["row1", "row8"]]</code></td>
<td></td>
</tr>
<tr>
<td>Slice of rows from dataframe</td>
<td bgcolor="#00FF00"><code>df.loc["row3":"row5"]</code></td>
<td><code>df["row3":"row5"]</code></td>
</tr>
<tr>
<td>Single item from series</td>
<td><code>s.loc["item8"]</code></td>
<td bgcolor="#00FF00"><code>s["item8"]</code></td>
</tr>
<tr>
<td>List of items from series</td>
<td><code>s.loc[["item1","item7"]]</code></td>
<td bgcolor="#00FF00"><code>s[["item1","item7"]]</code></td>
</tr>
<tr>
<td>Slice of items from series</td>
<td><code>s.loc["item2":"item4"]</code></td>
<td bgcolor="#00FF00"><code>s["item2":"item4"]</code></td>
</tr>
</tbody>
</table>

---

In [237]:
data = f500.loc[["Aviva", "HP", "JD.com", "BHP Billiton"], ["rank", "previous_rank", "profits"]]
data

Unnamed: 0_level_0,rank,previous_rank,profits
company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Aviva,90,279,948.8
HP,194,48,2496.0
JD.com,261,366,-573.0
BHP Billiton,350,168,-6385.0


## Selecting using .iloc

In [238]:
f500.loc["Walmart", "sector"]

'Retailing'

In [239]:
f500.iloc[0, 8]

'Retailing'

![image](images/selection_iloc.svg)

In [240]:
f500.iloc[0]

rank                                             1
revenues                                    485873
revenue_change                                 0.8
profits                                    13643.0
assets                                      198825
profit_change                                 -7.2
ceo                            C. Douglas McMillon
industry                     General Merchandisers
sector                                   Retailing
previous_rank                                    1
country                                        USA
hq_location                        Bentonville, AR
website                     http://www.walmart.com
years_on_global_500_list                        23
employees                                  2300000
total_stockholder_equity                     77798
Name: Walmart, dtype: object

In [241]:
f500.iloc[:, 6]

company
Walmart                           C. Douglas McMillon
State Grid                                    Kou Wei
Sinopec Group                               Wang Yupu
China National Petroleum                Zhang Jianhua
Toyota Motor                              Akio Toyoda
                                         ...         
Teva Pharmaceutical Industries      Yitzhak Peterburg
New China Life Insurance                     Wan Feng
Wm. Morrison Supermarkets              David T. Potts
TUI                                 Friedrich Joussen
AutoNation                         Michael J. Jackson
Name: ceo, Length: 500, dtype: object

In [242]:
f500.iloc[5:12]

Unnamed: 0_level_0,rank,revenues,revenue_change,profits,assets,profit_change,ceo,industry,sector,previous_rank,country,hq_location,website,years_on_global_500_list,employees,total_stockholder_equity
company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Volkswagen,6,240264,1.5,5937.3,432116,,Matthias Muller,Motor Vehicles and Parts,Motor Vehicles & Parts,7,Germany,"Wolfsburg, Germany",http://www.volkswagen.com,23,626715,97753
Royal Dutch Shell,7,240033,-11.8,4575.0,411275,135.9,Ben van Beurden,Petroleum Refining,Energy,5,Netherlands,"The Hague, Netherlands",http://www.shell.com,23,89000,186646
Berkshire Hathaway,8,223604,6.1,24074.0,620854,,Warren E. Buffett,Insurance: Property and Casualty (Stock),Financials,11,USA,"Omaha, NE",http://www.berkshirehathaway.com,21,367700,283001
Apple,9,215639,-7.7,45687.0,321686,-14.4,Timothy D. Cook,"Computers, Office Equipment",Technology,9,USA,"Cupertino, CA",http://www.apple.com,15,116000,128249
Exxon Mobil,10,205004,-16.7,7840.0,330314,-51.5,Darren W. Woods,Petroleum Refining,Energy,6,USA,"Irving, TX",http://www.exxonmobil.com,23,72700,167325
McKesson,11,198533,3.1,5070.0,60969,124.5,John H. Hammergren,Wholesalers: Health Care,Wholesalers,12,USA,"San Francisco, CA",http://www.mckesson.com,23,64500,11095
BP,12,186606,-17.4,115.0,263316,,Robert W. Dudley,Petroleum Refining,Energy,10,Britain,"London, Britain",http://www.bp.com,23,74500,95286


In [243]:
f500.iloc[:5, 5:10]

Unnamed: 0_level_0,profit_change,ceo,industry,sector,previous_rank
company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Walmart,-7.2,C. Douglas McMillon,General Merchandisers,Retailing,1
State Grid,-6.2,Kou Wei,Utilities,Energy,2
Sinopec Group,-65.0,Wang Yupu,Petroleum Refining,Energy,4
China National Petroleum,-73.7,Zhang Jianhua,Petroleum Refining,Energy,3
Toyota Motor,-12.3,Akio Toyoda,Motor Vehicles and Parts,Motor Vehicles & Parts,8


In [244]:
f500.iloc[0, "sector"]

ValueError: Location based indexing can only have [integer, integer slice (START point is INCLUDED, END point is EXCLUDED), listlike of integers, boolean array] types

---

# Vectorized Operations

![vectorized](images/vectorized.gif)

In [245]:
my_series = pd.Series([1,2,3,4,5])
my_series

0    1
1    2
2    3
3    4
4    5
dtype: int64

In [246]:
my_series + 10

0    11
1    12
2    13
3    14
4    15
dtype: int64

In [247]:
my_series - 10

0   -9
1   -8
2   -7
3   -6
4   -5
dtype: int64

In [248]:
my_series * 10

0    10
1    20
2    30
3    40
4    50
dtype: int64

In [249]:
my_series / 10

0    0.1
1    0.2
2    0.3
3    0.4
4    0.5
dtype: float64

In [250]:
rank_change = f500["previous_rank"] - f500["rank"]
print(type(rank_change))
rank_change

<class 'pandas.core.series.Series'>


company
Walmart                             0
State Grid                          0
Sinopec Group                       1
China National Petroleum           -1
Toyota Motor                        3
                                 ... 
Teva Pharmaceutical Industries   -496
New China Life Insurance          -70
Wm. Morrison Supermarkets         -61
TUI                               -32
AutoNation                       -500
Length: 500, dtype: int64

# Series Data Exploration Methods

In [251]:
rank_change.head()

company
Walmart                     0
State Grid                  0
Sinopec Group               1
China National Petroleum   -1
Toyota Motor                3
dtype: int64

In [252]:
my_series = pd.Series([0,1,2,3,4,5])
my_series.max()

5

In [253]:
rank_change_max = rank_change.max()
print(rank_change_max)

226


In [254]:
rank_change_min = rank_change.min()
rank_change_min

-500

---

## Series Describe

In [255]:
rank_change.describe()

count    500.000000
mean     -28.366000
std      108.602823
min     -500.000000
25%      -28.250000
50%       -4.000000
75%        8.250000
max      226.000000
dtype: float64

In [256]:
country = f500["country"]
country.describe()

count     500
unique     34
top       USA
freq      132
Name: country, dtype: object

In [257]:
f500.head()

Unnamed: 0_level_0,rank,revenues,revenue_change,profits,assets,profit_change,ceo,industry,sector,previous_rank,country,hq_location,website,years_on_global_500_list,employees,total_stockholder_equity
company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Walmart,1,485873,0.8,13643.0,198825,-7.2,C. Douglas McMillon,General Merchandisers,Retailing,1,USA,"Bentonville, AR",http://www.walmart.com,23,2300000,77798
State Grid,2,315199,-4.4,9571.3,489838,-6.2,Kou Wei,Utilities,Energy,2,China,"Beijing, China",http://www.sgcc.com.cn,17,926067,209456
Sinopec Group,3,267518,-9.1,1257.9,310726,-65.0,Wang Yupu,Petroleum Refining,Energy,4,China,"Beijing, China",http://www.sinopec.com,19,713288,106523
China National Petroleum,4,262573,-12.3,1867.5,585619,-73.7,Zhang Jianhua,Petroleum Refining,Energy,3,China,"Beijing, China",http://www.cnpc.com.cn,17,1512048,301893
Toyota Motor,5,254694,7.7,16899.3,437575,-12.3,Akio Toyoda,Motor Vehicles and Parts,Motor Vehicles & Parts,8,Japan,"Toyota, Japan",http://www.toyota-global.com,23,364445,157210


In [258]:
f500.info()

<class 'pandas.core.frame.DataFrame'>
Index: 500 entries, Walmart to AutoNation
Data columns (total 16 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   rank                      500 non-null    int16  
 1   revenues                  500 non-null    int64  
 2   revenue_change            498 non-null    float64
 3   profits                   499 non-null    float64
 4   assets                    500 non-null    int64  
 5   profit_change             436 non-null    float64
 6   ceo                       500 non-null    object 
 7   industry                  500 non-null    object 
 8   sector                    500 non-null    object 
 9   previous_rank             500 non-null    int64  
 10  country                   500 non-null    object 
 11  hq_location               500 non-null    object 
 12  website                   500 non-null    object 
 13  years_on_global_500_list  500 non-null    int64  
 14  em

---

In [259]:
rank_change.value_counts()

-1      14
-4      13
 8      12
-2      12
-6      11
        ..
 85      1
 105     1
-30      1
 144     1
-500     1
Length: 185, dtype: int64

In [260]:
country.value_counts()

USA             132
China           109
Japan            51
Germany          29
France           29
Britain          24
South Korea      15
Netherlands      14
Switzerland      14
Canada           11
Spain             9
Australia         7
Brazil            7
India             7
Italy             7
Taiwan            6
Russia            4
Ireland           4
Singapore         3
Sweden            3
Mexico            2
Malaysia          1
Thailand          1
Belgium           1
Norway            1
Luxembourg        1
Indonesia         1
Denmark           1
Saudi Arabia      1
Finland           1
Venezuela         1
Turkey            1
U.A.E             1
Israel            1
Name: country, dtype: int64

In [261]:
import numpy as np


1 + np.nan

nan

In [262]:
f500["industry"].value_counts()

Banks: Commercial and Savings                     51
Motor Vehicles and Parts                          34
Petroleum Refining                                28
Insurance: Life, Health (stock)                   24
Food and Drug Stores                              20
Mining, Crude-Oil Production                      18
Telecommunications                                18
Utilities                                         18
Insurance: Property and Casualty (Stock)          18
Trading                                           15
Pharmaceuticals                                   15
Aerospace and Defense                             14
Electronics, Electrical Equip.                    13
Engineering, Construction                         13
Energy                                            12
Metals                                            12
Industrial Machinery                              10
Specialty Retailers                               10
Insurance: Life, Health (Mutual)              

# DataFrame Data Exploration Methods

![slika](images/axis_param.svg)

In [263]:
f500.max()

rank                                             500
revenues                                      485873
revenue_change                                 442.3
profits                                      45687.0
assets                                       3473238
profit_change                                 8909.5
ceo                                       Zhou Qiang
industry                    Wholesalers: Health Care
sector                                   Wholesalers
previous_rank                                    500
country                                    Venezuela
hq_location                      Zurich, Switzerland
website                        http://www.zurich.com
years_on_global_500_list                          23
employees                                    2300000
total_stockholder_equity                      301893
dtype: object

In [264]:
f500.max(axis="index")

rank                                             500
revenues                                      485873
revenue_change                                 442.3
profits                                      45687.0
assets                                       3473238
profit_change                                 8909.5
ceo                                       Zhou Qiang
industry                    Wholesalers: Health Care
sector                                   Wholesalers
previous_rank                                    500
country                                    Venezuela
hq_location                      Zurich, Switzerland
website                        http://www.zurich.com
years_on_global_500_list                          23
employees                                    2300000
total_stockholder_equity                      301893
dtype: object

In [265]:
f500.max(axis=0)

rank                                             500
revenues                                      485873
revenue_change                                 442.3
profits                                      45687.0
assets                                       3473238
profit_change                                 8909.5
ceo                                       Zhou Qiang
industry                    Wholesalers: Health Care
sector                                   Wholesalers
previous_rank                                    500
country                                    Venezuela
hq_location                      Zurich, Switzerland
website                        http://www.zurich.com
years_on_global_500_list                          23
employees                                    2300000
total_stockholder_equity                      301893
dtype: object

In [266]:
f500.max(axis=1)

  f500.max(axis=1)


company
Walmart                           2300000.0
State Grid                         926067.0
Sinopec Group                      713288.0
China National Petroleum          1512048.0
Toyota Motor                       437575.0
                                    ...    
Teva Pharmaceutical Industries      92890.0
New China Life Insurance           100609.0
Wm. Morrison Supermarkets           77210.0
TUI                                 66779.0
AutoNation                          26000.0
Length: 500, dtype: float64

In [267]:
f500.max(axis="columns")

  f500.max(axis="columns")


company
Walmart                           2300000.0
State Grid                         926067.0
Sinopec Group                      713288.0
China National Petroleum          1512048.0
Toyota Motor                       437575.0
                                    ...    
Teva Pharmaceutical Industries      92890.0
New China Life Insurance           100609.0
Wm. Morrison Supermarkets           77210.0
TUI                                 66779.0
AutoNation                          26000.0
Length: 500, dtype: float64

In [268]:
f500.max(axis="index", numeric_only=True)

rank                            500.0
revenues                     485873.0
revenue_change                  442.3
profits                       45687.0
assets                      3473238.0
profit_change                  8909.5
previous_rank                   500.0
years_on_global_500_list         23.0
employees                   2300000.0
total_stockholder_equity     301893.0
dtype: float64

---

## DataFrame Describe

In [269]:
f500.describe()

Unnamed: 0,rank,revenues,revenue_change,profits,assets,profit_change,previous_rank,years_on_global_500_list,employees,total_stockholder_equity
count,500.0,500.0,498.0,499.0,500.0,436.0,500.0,500.0,500.0,500.0
mean,250.5,55416.358,4.538353,3055.203206,243632.3,24.152752,222.134,15.036,133998.3,30628.076
std,144.481833,45725.478963,28.549067,5171.981071,485193.7,437.509566,146.941961,7.932752,170087.8,43642.576833
min,1.0,21609.0,-67.3,-13038.0,3717.0,-793.7,0.0,1.0,328.0,-59909.0
25%,125.75,29003.0,-5.9,556.95,36588.5,-22.775,92.75,7.0,42932.5,7553.75
50%,250.5,40236.0,0.55,1761.6,73261.5,-0.35,219.5,17.0,92910.5,15809.5
75%,375.25,63926.75,6.975,3954.0,180564.0,17.7,347.25,23.0,168917.2,37828.5
max,500.0,485873.0,442.3,45687.0,3473238.0,8909.5,500.0,23.0,2300000.0,301893.0


In [270]:
f500.describe(include="all")

Unnamed: 0,rank,revenues,revenue_change,profits,assets,profit_change,ceo,industry,sector,previous_rank,country,hq_location,website,years_on_global_500_list,employees,total_stockholder_equity
count,500.0,500.0,498.0,499.0,500.0,436.0,500,500,500,500.0,500,500,500,500.0,500.0,500.0
unique,,,,,,,500,58,21,,34,235,500,,,
top,,,,,,,C. Douglas McMillon,Banks: Commercial and Savings,Financials,,USA,"Beijing, China",http://www.walmart.com,,,
freq,,,,,,,1,51,118,,132,56,1,,,
mean,250.5,55416.358,4.538353,3055.203206,243632.3,24.152752,,,,222.134,,,,15.036,133998.3,30628.076
std,144.481833,45725.478963,28.549067,5171.981071,485193.7,437.509566,,,,146.941961,,,,7.932752,170087.8,43642.576833
min,1.0,21609.0,-67.3,-13038.0,3717.0,-793.7,,,,0.0,,,,1.0,328.0,-59909.0
25%,125.75,29003.0,-5.9,556.95,36588.5,-22.775,,,,92.75,,,,7.0,42932.5,7553.75
50%,250.5,40236.0,0.55,1761.6,73261.5,-0.35,,,,219.5,,,,17.0,92910.5,15809.5
75%,375.25,63926.75,6.975,3954.0,180564.0,17.7,,,,347.25,,,,23.0,168917.2,37828.5


In [271]:
f500.describe(include=["O"])

Unnamed: 0,ceo,industry,sector,country,hq_location,website
count,500,500,500,500,500,500
unique,500,58,21,34,235,500
top,C. Douglas McMillon,Banks: Commercial and Savings,Financials,USA,"Beijing, China",http://www.walmart.com
freq,1,51,118,132,56,1


---

# Changing values

In [272]:
data = f500[["rank", "profits"]].copy()
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 500 entries, Walmart to AutoNation
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   rank     500 non-null    int16  
 1   profits  499 non-null    float64
dtypes: float64(1), int16(1)
memory usage: 25.0+ KB


In [273]:
data["profits"] = 0.0
data

Unnamed: 0_level_0,rank,profits
company,Unnamed: 1_level_1,Unnamed: 2_level_1
Walmart,1,0.0
State Grid,2,0.0
Sinopec Group,3,0.0
China National Petroleum,4,0.0
Toyota Motor,5,0.0
...,...,...
Teva Pharmaceutical Industries,496,0.0
New China Life Insurance,497,0.0
Wm. Morrison Supermarkets,498,0.0
TUI,499,0.0


# Boolean Indexing pandas objects

In [274]:
d = {"name": ["Jaka", "Eva", "Sara", "Miha"], 
    "age": [12, 8, 5, 8]}
df = pd.DataFrame(d, index=["w", "x", "y", "z"])
df

Unnamed: 0,name,age
w,Jaka,12
x,Eva,8
y,Sara,5
z,Miha,8


In [275]:
bool_array = df["age"] == 8
print(type(bool_array))
bool_array

<class 'pandas.core.series.Series'>


w    False
x     True
y    False
z     True
Name: age, dtype: bool

In [276]:
result = df[bool_array]
print(type(result))
result

<class 'pandas.core.frame.DataFrame'>


Unnamed: 0,name,age
x,Eva,8
z,Miha,8


In [277]:
df.loc[bool_array, "name"]

x     Eva
z    Miha
Name: name, dtype: object

---

In [278]:
data = f500.copy()
data.head()

Unnamed: 0_level_0,rank,revenues,revenue_change,profits,assets,profit_change,ceo,industry,sector,previous_rank,country,hq_location,website,years_on_global_500_list,employees,total_stockholder_equity
company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Walmart,1,485873,0.8,13643.0,198825,-7.2,C. Douglas McMillon,General Merchandisers,Retailing,1,USA,"Bentonville, AR",http://www.walmart.com,23,2300000,77798
State Grid,2,315199,-4.4,9571.3,489838,-6.2,Kou Wei,Utilities,Energy,2,China,"Beijing, China",http://www.sgcc.com.cn,17,926067,209456
Sinopec Group,3,267518,-9.1,1257.9,310726,-65.0,Wang Yupu,Petroleum Refining,Energy,4,China,"Beijing, China",http://www.sinopec.com,19,713288,106523
China National Petroleum,4,262573,-12.3,1867.5,585619,-73.7,Zhang Jianhua,Petroleum Refining,Energy,3,China,"Beijing, China",http://www.cnpc.com.cn,17,1512048,301893
Toyota Motor,5,254694,7.7,16899.3,437575,-12.3,Akio Toyoda,Motor Vehicles and Parts,Motor Vehicles & Parts,8,Japan,"Toyota, Japan",http://www.toyota-global.com,23,364445,157210


In [279]:
filter_ = data["previous_rank"] == 0
data[filter_].head()

Unnamed: 0_level_0,rank,revenues,revenue_change,profits,assets,profit_change,ceo,industry,sector,previous_rank,country,hq_location,website,years_on_global_500_list,employees,total_stockholder_equity
company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Legal & General Group,49,105235,442.3,1697.9,577954,3.4,Nigel Wilson,"Insurance: Life, Health (stock)",Financials,0,Britain,"London, Britain",http://www.legalandgeneralgroup.com,17,8939,8579
Uniper,91,74407,,-3557.5,51541,,Klaus Schafer,Energy,Energy,0,Germany,"Dusseldorf, Germany",http://www.uniper.energy,1,12890,12889
Dell Technologies,124,64806,18.1,-1672.0,118206,,Michael S. Dell,"Computers, Office Equipment",Technology,0,USA,"Round Rock, TX",http://www.delltechnologies.com,17,138000,13243
Anbang Insurance Group,139,60800,124.0,3883.9,430040,0.9,Wu Xiaohui,"Insurance: Life, Health (Mutual)",Financials,0,China,"Beijing, China",http://www.anbanggroup.com,1,40707,20372
Albertsons Cos.,141,59678,1.6,-373.3,23755,,Robert G. Miller,Food and Drug Stores,Food & Drug Stores,0,USA,"Boise, ID",http://www.albertsons.com,13,273000,1371


In [280]:
data.loc[filter_, "previous_rank"] = np.nan
data[filter_].head()

Unnamed: 0_level_0,rank,revenues,revenue_change,profits,assets,profit_change,ceo,industry,sector,previous_rank,country,hq_location,website,years_on_global_500_list,employees,total_stockholder_equity
company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Legal & General Group,49,105235,442.3,1697.9,577954,3.4,Nigel Wilson,"Insurance: Life, Health (stock)",Financials,,Britain,"London, Britain",http://www.legalandgeneralgroup.com,17,8939,8579
Uniper,91,74407,,-3557.5,51541,,Klaus Schafer,Energy,Energy,,Germany,"Dusseldorf, Germany",http://www.uniper.energy,1,12890,12889
Dell Technologies,124,64806,18.1,-1672.0,118206,,Michael S. Dell,"Computers, Office Equipment",Technology,,USA,"Round Rock, TX",http://www.delltechnologies.com,17,138000,13243
Anbang Insurance Group,139,60800,124.0,3883.9,430040,0.9,Wu Xiaohui,"Insurance: Life, Health (Mutual)",Financials,,China,"Beijing, China",http://www.anbanggroup.com,1,40707,20372
Albertsons Cos.,141,59678,1.6,-373.3,23755,,Robert G. Miller,Food and Drug Stores,Food & Drug Stores,,USA,"Boise, ID",http://www.albertsons.com,13,273000,1371


In [281]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 500 entries, Walmart to AutoNation
Data columns (total 16 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   rank                      500 non-null    int16  
 1   revenues                  500 non-null    int64  
 2   revenue_change            498 non-null    float64
 3   profits                   499 non-null    float64
 4   assets                    500 non-null    int64  
 5   profit_change             436 non-null    float64
 6   ceo                       500 non-null    object 
 7   industry                  500 non-null    object 
 8   sector                    500 non-null    object 
 9   previous_rank             467 non-null    float64
 10  country                   500 non-null    object 
 11  hq_location               500 non-null    object 
 12  website                   500 non-null    object 
 13  years_on_global_500_list  500 non-null    int64  
 14  em

In [282]:
f500[ f500["previous_rank"] == 0 ] = np.nan
f500["previous_rank"]

company
Walmart                             1.0
State Grid                          2.0
Sinopec Group                       4.0
China National Petroleum            3.0
Toyota Motor                        8.0
                                  ...  
Teva Pharmaceutical Industries      NaN
New China Life Insurance          427.0
Wm. Morrison Supermarkets         437.0
TUI                               467.0
AutoNation                          NaN
Name: previous_rank, Length: 500, dtype: float64

---

# Adding data

In [283]:
df = pd.DataFrame({"a": [1,2,3], "b": [4,5,6]})
df

Unnamed: 0,a,b
0,1,4
1,2,5
2,3,6


In [284]:
df["c"] = 5
df

Unnamed: 0,a,b,c
0,1,4,5
1,2,5,5
2,3,6,5


In [285]:
df["d"] = [9,8,7]
df

Unnamed: 0,a,b,c,d
0,1,4,5,9
1,2,5,5,8
2,3,6,5,7


In [286]:
df["e"] = [1,2]
df

ValueError: Length of values (2) does not match length of index (3)

In [287]:
f500["rank_change"] = f500["previous_rank"] - f500["rank"]
f500.head()

Unnamed: 0_level_0,rank,revenues,revenue_change,profits,assets,profit_change,ceo,industry,sector,previous_rank,country,hq_location,website,years_on_global_500_list,employees,total_stockholder_equity,rank_change
company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
Walmart,1.0,485873.0,0.8,13643.0,198825.0,-7.2,C. Douglas McMillon,General Merchandisers,Retailing,1.0,USA,"Bentonville, AR",http://www.walmart.com,23.0,2300000.0,77798.0,0.0
State Grid,2.0,315199.0,-4.4,9571.3,489838.0,-6.2,Kou Wei,Utilities,Energy,2.0,China,"Beijing, China",http://www.sgcc.com.cn,17.0,926067.0,209456.0,0.0
Sinopec Group,3.0,267518.0,-9.1,1257.9,310726.0,-65.0,Wang Yupu,Petroleum Refining,Energy,4.0,China,"Beijing, China",http://www.sinopec.com,19.0,713288.0,106523.0,1.0
China National Petroleum,4.0,262573.0,-12.3,1867.5,585619.0,-73.7,Zhang Jianhua,Petroleum Refining,Energy,3.0,China,"Beijing, China",http://www.cnpc.com.cn,17.0,1512048.0,301893.0,-1.0
Toyota Motor,5.0,254694.0,7.7,16899.3,437575.0,-12.3,Akio Toyoda,Motor Vehicles and Parts,Motor Vehicles & Parts,8.0,Japan,"Toyota, Japan",http://www.toyota-global.com,23.0,364445.0,157210.0,3.0


In [288]:
f500["rank_change"].describe()

count    467.000000
mean      -3.533191
std       44.293603
min     -199.000000
25%      -21.000000
50%       -2.000000
75%       10.000000
max      226.000000
Name: rank_change, dtype: float64

---

In [289]:
f500["country"].value_counts().head(2)

USA      122
China     99
Name: country, dtype: int64

In [290]:
usa_filter = f500["country"] == "USA"
usa =f500[usa_filter]
usa["sector"].value_counts().head(3)

Financials     25
Health Care    15
Energy         13
Name: sector, dtype: int64

In [291]:
china_filter = f500["country"] == "China"
china = f500[china_filter]
china["sector"].value_counts().head(3)

Energy                        22
Financials                    22
Engineering & Construction     8
Name: sector, dtype: int64

In [292]:
f500[f500["country"] == "Slovenia"]

Unnamed: 0_level_0,rank,revenues,revenue_change,profits,assets,profit_change,ceo,industry,sector,previous_rank,country,hq_location,website,years_on_global_500_list,employees,total_stockholder_equity,rank_change
company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1


---

## Deleting Data

In [293]:
data = {
    "a": [1,2,3,4],
    "b": [4,3,2,1],
    "c": [5,6,7,8]
}
df = pd.DataFrame(data, index=["w", "x", "y", "z"])
df

Unnamed: 0,a,b,c
w,1,4,5
x,2,3,6
y,3,2,7
z,4,1,8


In [294]:
df.drop("x")

Unnamed: 0,a,b,c
w,1,4,5
y,3,2,7
z,4,1,8


In [295]:
df.drop(labels=["a"], axis=1)

Unnamed: 0,b,c
w,4,5
x,3,6
y,2,7
z,1,8


In [296]:
df.drop(labels=["a", "c"], axis=1)

Unnamed: 0,b
w,4
x,3
y,2
z,1


In [297]:
df.drop(labels=["w", "y"])

Unnamed: 0,a,b,c
x,2,3,6
z,4,1,8


In [298]:
df.drop(index=["x", "y"])

Unnamed: 0,a,b,c
w,1,4,5
z,4,1,8


In [299]:
df.drop(columns=["a", "c"])

Unnamed: 0,b
w,4
x,3
y,2
z,1


In [300]:
df

Unnamed: 0,a,b,c
w,1,4,5
x,2,3,6
y,3,2,7
z,4,1,8


In [301]:
new_df = df.drop(columns=["a", "c"])
new_df

Unnamed: 0,b
w,4
x,3
y,2
z,1


In [302]:
print(df)

df.drop(columns=["a", "c"], inplace=True)
df

   a  b  c
w  1  4  5
x  2  3  6
y  3  2  7
z  4  1  8


Unnamed: 0,b
w,4
x,3
y,2
z,1


---

## Multiple filters

In [303]:
filter_1 = f500["sector"] == "Technology"
f500[filter_1].head()

Unnamed: 0_level_0,rank,revenues,revenue_change,profits,assets,profit_change,ceo,industry,sector,previous_rank,country,hq_location,website,years_on_global_500_list,employees,total_stockholder_equity,rank_change
company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
Apple,9.0,215639.0,-7.7,45687.0,321686.0,-14.4,Timothy D. Cook,"Computers, Office Equipment",Technology,9.0,USA,"Cupertino, CA",http://www.apple.com,15.0,116000.0,128249.0,0.0
Samsung Electronics,15.0,173957.0,-2.0,19316.5,217104.0,16.8,Oh-Hyun Kwon,"Electronics, Electrical Equip.",Technology,13.0,South Korea,"Suwon, South Korea",http://www.samsung.com,23.0,325000.0,154376.0,-2.0
Amazon.com,26.0,135987.0,27.1,2371.0,83402.0,297.8,Jeffrey P. Bezos,Internet Services and Retailing,Technology,44.0,USA,"Seattle, WA",http://www.amazon.com,9.0,341400.0,19285.0,18.0
Hon Hai Precision Industry,27.0,135129.0,-4.3,4608.8,80436.0,-0.4,Terry Gou,"Electronics, Electrical Equip.",Technology,25.0,Taiwan,"New Taipei City, Taiwan",http://www.foxconn.com,13.0,726772.0,33476.0,-2.0
Alphabet,65.0,90272.0,20.4,19478.0,167497.0,19.1,Larry Page,Internet Services and Retailing,Technology,94.0,USA,"Mountain View, CA",http://www.abc.xyz,9.0,72053.0,139036.0,29.0


In [304]:
filter_2 = f500["profits"] > 10_000
f500[filter_2].head()

Unnamed: 0_level_0,rank,revenues,revenue_change,profits,assets,profit_change,ceo,industry,sector,previous_rank,country,hq_location,website,years_on_global_500_list,employees,total_stockholder_equity,rank_change
company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
Walmart,1.0,485873.0,0.8,13643.0,198825.0,-7.2,C. Douglas McMillon,General Merchandisers,Retailing,1.0,USA,"Bentonville, AR",http://www.walmart.com,23.0,2300000.0,77798.0,0.0
Toyota Motor,5.0,254694.0,7.7,16899.3,437575.0,-12.3,Akio Toyoda,Motor Vehicles and Parts,Motor Vehicles & Parts,8.0,Japan,"Toyota, Japan",http://www.toyota-global.com,23.0,364445.0,157210.0,3.0
Berkshire Hathaway,8.0,223604.0,6.1,24074.0,620854.0,,Warren E. Buffett,Insurance: Property and Casualty (Stock),Financials,11.0,USA,"Omaha, NE",http://www.berkshirehathaway.com,21.0,367700.0,283001.0,3.0
Apple,9.0,215639.0,-7.7,45687.0,321686.0,-14.4,Timothy D. Cook,"Computers, Office Equipment",Technology,9.0,USA,"Cupertino, CA",http://www.apple.com,15.0,116000.0,128249.0,0.0
Samsung Electronics,15.0,173957.0,-2.0,19316.5,217104.0,16.8,Oh-Hyun Kwon,"Electronics, Electrical Equip.",Technology,13.0,South Korea,"Suwon, South Korea",http://www.samsung.com,23.0,325000.0,154376.0,-2.0


In [305]:
filter_ = filter_1 & filter_2
f500[filter_]

Unnamed: 0_level_0,rank,revenues,revenue_change,profits,assets,profit_change,ceo,industry,sector,previous_rank,country,hq_location,website,years_on_global_500_list,employees,total_stockholder_equity,rank_change
company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
Apple,9.0,215639.0,-7.7,45687.0,321686.0,-14.4,Timothy D. Cook,"Computers, Office Equipment",Technology,9.0,USA,"Cupertino, CA",http://www.apple.com,15.0,116000.0,128249.0,0.0
Samsung Electronics,15.0,173957.0,-2.0,19316.5,217104.0,16.8,Oh-Hyun Kwon,"Electronics, Electrical Equip.",Technology,13.0,South Korea,"Suwon, South Korea",http://www.samsung.com,23.0,325000.0,154376.0,-2.0
Alphabet,65.0,90272.0,20.4,19478.0,167497.0,19.1,Larry Page,Internet Services and Retailing,Technology,94.0,USA,"Mountain View, CA",http://www.abc.xyz,9.0,72053.0,139036.0,29.0
Microsoft,69.0,85320.0,-8.8,16798.0,193694.0,37.8,Satya Nadella,Computer Software,Technology,63.0,USA,"Redmond, WA",http://www.microsoft.com,20.0,114000.0,71997.0,-6.0
IBM,81.0,79919.0,-3.1,11872.0,117470.0,-10.0,Virginia M. Rometty,Information Technology Services,Technology,82.0,USA,"Armonk, NY",http://www.ibm.com,23.0,414400.0,18246.0,1.0
Intel,144.0,59387.0,7.3,10316.0,113327.0,-9.7,Brian M. Krzanich,Semiconductors and Other Electronic Components,Technology,158.0,USA,"Santa Clara, CA",http://www.intel.com,23.0,106000.0,66226.0,14.0
Cisco Systems,187.0,49247.0,0.2,10739.0,121652.0,19.6,Charles H. Robbins,Network and Other Communications Equipment,Technology,183.0,USA,"San Jose, CA",http://www.cisco.com,18.0,73700.0,63586.0,-4.0
Taiwan Semiconductor Manufacturing,369.0,29388.0,10.6,10283.7,58535.0,7.8,Mark Liu,Semiconductors and Other Electronic Components,Technology,403.0,Taiwan,"Hsinchu, Taiwan",http://www.tsmc.com,3.0,46968.0,42174.0,34.0


<table>
<thead>
<tr>
<th>pandas</th>
<th>Python equivalent</th>
<th>Meaning</th>
</tr>
</thead>
<tbody>
<tr>
<td><code>a &amp; b</code></td>
<td><code>a and b</code></td>
<td><code>True</code> if both <code>a</code> and <code>b</code> are <code>True</code>, else <code>False</code></td>
</tr>
<tr>
<td><code>a | b</code></td>
<td><code>a or b</code></td>
<td><code>True</code> if either <code>a</code> or <code>b</code> is <code>True</code></td>
</tr>
<tr>
<td><code>~a</code></td>
<td><code>not a</code></td>
<td><code>True</code> if <code>a</code> is <code>False</code>, else <code>False</code></td>
</tr>
</tbody>
</table>

In [306]:
filter_ = (f500["sector"] == "Technology") & (f500["profits"] > 10_000)
f500[filter_]

Unnamed: 0_level_0,rank,revenues,revenue_change,profits,assets,profit_change,ceo,industry,sector,previous_rank,country,hq_location,website,years_on_global_500_list,employees,total_stockholder_equity,rank_change
company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
Apple,9.0,215639.0,-7.7,45687.0,321686.0,-14.4,Timothy D. Cook,"Computers, Office Equipment",Technology,9.0,USA,"Cupertino, CA",http://www.apple.com,15.0,116000.0,128249.0,0.0
Samsung Electronics,15.0,173957.0,-2.0,19316.5,217104.0,16.8,Oh-Hyun Kwon,"Electronics, Electrical Equip.",Technology,13.0,South Korea,"Suwon, South Korea",http://www.samsung.com,23.0,325000.0,154376.0,-2.0
Alphabet,65.0,90272.0,20.4,19478.0,167497.0,19.1,Larry Page,Internet Services and Retailing,Technology,94.0,USA,"Mountain View, CA",http://www.abc.xyz,9.0,72053.0,139036.0,29.0
Microsoft,69.0,85320.0,-8.8,16798.0,193694.0,37.8,Satya Nadella,Computer Software,Technology,63.0,USA,"Redmond, WA",http://www.microsoft.com,20.0,114000.0,71997.0,-6.0
IBM,81.0,79919.0,-3.1,11872.0,117470.0,-10.0,Virginia M. Rometty,Information Technology Services,Technology,82.0,USA,"Armonk, NY",http://www.ibm.com,23.0,414400.0,18246.0,1.0
Intel,144.0,59387.0,7.3,10316.0,113327.0,-9.7,Brian M. Krzanich,Semiconductors and Other Electronic Components,Technology,158.0,USA,"Santa Clara, CA",http://www.intel.com,23.0,106000.0,66226.0,14.0
Cisco Systems,187.0,49247.0,0.2,10739.0,121652.0,19.6,Charles H. Robbins,Network and Other Communications Equipment,Technology,183.0,USA,"San Jose, CA",http://www.cisco.com,18.0,73700.0,63586.0,-4.0
Taiwan Semiconductor Manufacturing,369.0,29388.0,10.6,10283.7,58535.0,7.8,Mark Liu,Semiconductors and Other Electronic Components,Technology,403.0,Taiwan,"Hsinchu, Taiwan",http://www.tsmc.com,3.0,46968.0,42174.0,34.0


In [307]:
filter_ = (f500["sector"] == "Technology") & (f500["country"] != "USA")
f500[filter_].head()

Unnamed: 0_level_0,rank,revenues,revenue_change,profits,assets,profit_change,ceo,industry,sector,previous_rank,country,hq_location,website,years_on_global_500_list,employees,total_stockholder_equity,rank_change
company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
Samsung Electronics,15.0,173957.0,-2.0,19316.5,217104.0,16.8,Oh-Hyun Kwon,"Electronics, Electrical Equip.",Technology,13.0,South Korea,"Suwon, South Korea",http://www.samsung.com,23.0,325000.0,154376.0,-2.0
Hon Hai Precision Industry,27.0,135129.0,-4.3,4608.8,80436.0,-0.4,Terry Gou,"Electronics, Electrical Equip.",Technology,25.0,Taiwan,"New Taipei City, Taiwan",http://www.foxconn.com,13.0,726772.0,33476.0,-2.0
Hitachi,71.0,84558.0,1.2,2134.3,86742.0,48.8,Toshiaki Higashihara,"Electronics, Electrical Equip.",Technology,79.0,Japan,"Tokyo, Japan",http://www.hitachi.com,23.0,303887.0,26632.0,8.0
Huawei Investment & Holding,83.0,78511.0,24.9,5579.4,63837.0,-5.0,Ren Zhengfei,Network and Other Communications Equipment,Technology,129.0,China,"Shenzhen, China",http://www.huawei.com,8.0,180000.0,20159.0,46.0
Sony,105.0,70170.0,3.9,676.4,158519.0,-45.1,Kazuo Hirai,"Electronics, Electrical Equip.",Technology,113.0,Japan,"Tokyo, Japan",http://www.sony.net,23.0,128400.0,22415.0,8.0


---

# Sorting

In [308]:
filter_ = f500["country"] == "China"
china = f500[filter_]
china.head()

Unnamed: 0_level_0,rank,revenues,revenue_change,profits,assets,profit_change,ceo,industry,sector,previous_rank,country,hq_location,website,years_on_global_500_list,employees,total_stockholder_equity,rank_change
company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
State Grid,2.0,315199.0,-4.4,9571.3,489838.0,-6.2,Kou Wei,Utilities,Energy,2.0,China,"Beijing, China",http://www.sgcc.com.cn,17.0,926067.0,209456.0,0.0
Sinopec Group,3.0,267518.0,-9.1,1257.9,310726.0,-65.0,Wang Yupu,Petroleum Refining,Energy,4.0,China,"Beijing, China",http://www.sinopec.com,19.0,713288.0,106523.0,1.0
China National Petroleum,4.0,262573.0,-12.3,1867.5,585619.0,-73.7,Zhang Jianhua,Petroleum Refining,Energy,3.0,China,"Beijing, China",http://www.cnpc.com.cn,17.0,1512048.0,301893.0,-1.0
Industrial & Commercial Bank of China,22.0,147675.0,-11.7,41883.9,3473238.0,-5.0,Gu Shu,Banks: Commercial and Savings,Financials,15.0,China,"Beijing, China",http://www.icbc-ltd.com,19.0,461749.0,283438.0,-7.0
China State Construction Engineering,24.0,144505.0,3.1,2492.9,201269.0,10.7,Guan Qing,"Engineering, Construction",Engineering & Construction,27.0,China,"Beijing, China",http://www.cscec.com,6.0,263915.0,15344.0,3.0


In [313]:
sorted_china = china.sort_values("employees", ascending=False)
sorted_china.iloc[0, 14]

1512048.0

---

<div class="alert alert-block alert-info not_clean">
<b>Vaja:</b> 
    

V datasetu ustvarite nov stolpec `roa`. V stolpcu naj bod vrednost **ROA - Return on Assets** za vsako podjetje. 

    roa = profits / assets

ROA je metrika, ki nam pove koliko dobička lahko podjetje ustvari s svojimi sredstvi.

Nato za vsako državo izračunajte koliko je povprečni ROA. Vse možne države, ki se pojavijo v stolpcu `country` lahko pridobite s pomočjo `Series.unique()` metode, ki vrne vse unikatne vrednosti v stolpcu.
</div>


```python
OUTPUT:
USA           	 ROA mean: 0.048
China         	 ROA mean: 0.017
Japan         	 ROA mean: 0.028
Germany       	 ROA mean: 0.020
Netherlands   	 ROA mean: 0.025
Britain       	 ROA mean: 0.033
South Korea   	 ROA mean: 0.031
Switzerland   	 ROA mean: 0.035
France        	 ROA mean: 0.025
Taiwan        	 ROA mean: 0.055
Singapore     	 ROA mean: 0.023
Italy         	 ROA mean: 0.004
Russia        	 ROA mean: 0.031
Spain         	 ROA mean: 0.035
Brazil        	 ROA mean: 0.017
Mexico        	 ROA mean: -0.042
Luxembourg    	 ROA mean: 0.024
India         	 ROA mean: 0.052
Malaysia      	 ROA mean: 0.030
Thailand      	 ROA mean: 0.043
Australia     	 ROA mean: -0.010
Belgium       	 ROA mean: 0.005
Norway        	 ROA mean: -0.028
Canada        	 ROA mean: 0.024
Ireland       	 ROA mean: 0.066
Indonesia     	 ROA mean: 0.067
Denmark       	 ROA mean: -0.032
Saudi Arabia  	 ROA mean: 0.056
Sweden        	 ROA mean: 0.082
Finland       	 ROA mean: -0.018
Venezuela     	 ROA mean: 0.013
Turkey        	 ROA mean: 0.046
U.A.E         	 ROA mean: 0.010
Israel        	 ROA mean: 0.004
```

In [318]:
f500["country"].dropna().unique()

array(['USA', 'China', 'Japan', 'Germany', 'Netherlands', 'Britain',
       'South Korea', 'Switzerland', 'France', 'Taiwan', nan, 'Singapore',
       'Italy', 'Russia', 'Spain', 'Brazil', 'Mexico', 'Luxembourg',
       'India', 'Malaysia', 'Thailand', 'Australia', 'Belgium', 'Norway',
       'Canada', 'Indonesia', 'Denmark', 'Saudi Arabia', 'Sweden',
       'Ireland', 'Turkey', 'U.A.E'], dtype=object)

In [319]:
# Rešitev
f500["roa"] = f500["profits"] / f500["assets"]
countries = f500["country"].dropna().unique()

for country in countries:
    filter_ = f500["country"] == country
    country_data = f500[filter_]
    roa_mean = country_data["roa"].mean()
    print(f"{country:13} \t ROA mean: {roa_mean:.3f}")

USA           	 ROA mean: 0.048
China         	 ROA mean: 0.015
Japan         	 ROA mean: 0.028
Germany       	 ROA mean: 0.024
Netherlands   	 ROA mean: 0.028
Britain       	 ROA mean: 0.036
South Korea   	 ROA mean: 0.033
Switzerland   	 ROA mean: 0.036
France        	 ROA mean: 0.025
Taiwan        	 ROA mean: 0.055
Singapore     	 ROA mean: 0.023
Italy         	 ROA mean: 0.004
Russia        	 ROA mean: 0.031
Spain         	 ROA mean: 0.035
Brazil        	 ROA mean: 0.017
Mexico        	 ROA mean: -0.042
Luxembourg    	 ROA mean: 0.024
India         	 ROA mean: 0.052
Malaysia      	 ROA mean: 0.030
Thailand      	 ROA mean: 0.043
Australia     	 ROA mean: -0.010
Belgium       	 ROA mean: 0.005
Norway        	 ROA mean: -0.028
Canada        	 ROA mean: 0.025
Indonesia     	 ROA mean: 0.067
Denmark       	 ROA mean: -0.032
Saudi Arabia  	 ROA mean: 0.056
Sweden        	 ROA mean: 0.082
Ireland       	 ROA mean: 0.120
Turkey        	 ROA mean: 0.046
U.A.E         	 ROA mean: 0.010
