In [1]:
# Introduction to the Data

import pandas as pd
f500 = pd.read_csv('../dataset/f500.csv', index_col = 0)
f500_head = f500.head()

In [2]:
f500.info()

<class 'pandas.core.frame.DataFrame'>
Index: 500 entries, Walmart to AutoNation
Data columns (total 16 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   rank                      500 non-null    int64  
 1   revenues                  500 non-null    int64  
 2   revenue_change            498 non-null    float64
 3   profits                   499 non-null    float64
 4   assets                    500 non-null    int64  
 5   profit_change             436 non-null    float64
 6   ceo                       500 non-null    object 
 7   industry                  500 non-null    object 
 8   sector                    500 non-null    object 
 9   previous_rank             500 non-null    int64  
 10  country                   500 non-null    object 
 11  hq_location               500 non-null    object 
 12  website                   500 non-null    object 
 13  years_on_global_500_list  500 non-null    int64  
 14  em

In [3]:
# Vectorized Operations
rank_change = f500['previous_rank'] - f500['rank']
print(rank_change[:5])

company
Walmart                     0
State Grid                  0
Sinopec Group               1
China National Petroleum   -1
Toyota Motor                3
dtype: int64


In [4]:
# Series Data Exploration Methods

rank_change_max = rank_change.max()
rank_change_min = rank_change.min()
print(rank_change_max, rank_change_min)

226 -500


In [5]:
# Series Describe Method

rank = f500['rank']
rank_desc = rank.describe()

prev_rank = f500['previous_rank']
prev_rank_desc = prev_rank.describe()

print(rank_desc)
print(prev_rank_desc)


count    500.000000
mean     250.500000
std      144.481833
min        1.000000
25%      125.750000
50%      250.500000
75%      375.250000
max      500.000000
Name: rank, dtype: float64
count    500.000000
mean     222.134000
std      146.941961
min        0.000000
25%       92.750000
50%      219.500000
75%      347.250000
max      500.000000
Name: previous_rank, dtype: float64


In [6]:
# Method Chaining
f500['previous_rank'].head()

zero_previous_rank = f500['previous_rank'].value_counts().loc[0]
print(zero_previous_rank)

33


In [7]:
# Dataframe Exploration Methods

max_f500 = f500.max(numeric_only = True)

In [8]:
# Dataframe describe Method
f500_desc = f500.describe()

print(f500_desc)


             rank       revenues  revenue_change       profits        assets  \
count  500.000000     500.000000      498.000000    499.000000  5.000000e+02   
mean   250.500000   55416.358000        4.538353   3055.203206  2.436323e+05   
std    144.481833   45725.478963       28.549067   5171.981071  4.851937e+05   
min      1.000000   21609.000000      -67.300000 -13038.000000  3.717000e+03   
25%    125.750000   29003.000000       -5.900000    556.950000  3.658850e+04   
50%    250.500000   40236.000000        0.550000   1761.600000  7.326150e+04   
75%    375.250000   63926.750000        6.975000   3954.000000  1.805640e+05   
max    500.000000  485873.000000      442.300000  45687.000000  3.473238e+06   

       profit_change  previous_rank  years_on_global_500_list     employees  \
count     436.000000     500.000000                500.000000  5.000000e+02   
mean       24.152752     222.134000                 15.036000  1.339983e+05   
std       437.509566     146.941961       

In [9]:
# Assignment with pandas
# usa-se loc para expressas ambos os eixos
# DOWN CHEMICAL = ROW
# CEO = COLUMN

f500.loc['Dow Chemical', 'ceo'] = 'Jim Fitterling'

In [10]:
# Using Boolean Indexing with pandas Objects

motor_bool = f500['industry'] == 'Motor Vehicles and Parts'
motor_countries = f500.loc[motor_bool, 'country']
print(motor_countries)

company
Toyota Motor                                 Japan
Volkswagen                                 Germany
Daimler                                    Germany
General Motors                                 USA
Ford Motor                                     USA
Honda Motor                                  Japan
SAIC Motor                                   China
Nissan Motor                                 Japan
BMW Group                                  Germany
Dongfeng Motor                               China
Robert Bosch                               Germany
Hyundai Motor                          South Korea
China FAW Group                              China
Beijing Automotive Group                     China
Peugeot                                     France
Renault                                     France
Kia Motors                             South Korea
Continental                                Germany
Denso                                        Japan
Guangzhou Automobile In

In [11]:
#  Using Boolean Arrays to Assign Values

import numpy as np

prev_rank_before = f500["previous_rank"].value_counts(dropna=False).head()
f500.loc[f500['previous_rank'] == 0, 'previous_rank'] = np.nan
prev_rank_after = f500["previous_rank"].value_counts(dropna=False).head()
print(prev_rank_after)

previous_rank
NaN      33
1.0       1
302.0     1
334.0     1
325.0     1
Name: count, dtype: int64


In [19]:
# Creating New Columns
import numpy as np
f500['rank_change'] = f500['previous_rank'] - f500['rank']
rank_change_desc = f500['rank_change'].describe()
print(rank_change.min())
print(rank_change.max())
print(rank_change_desc)

-199.0
226.0
count    467.000000
mean      -3.533191
std       44.293603
min     -199.000000
25%      -21.000000
50%       -2.000000
75%       10.000000
max      226.000000
Name: rank_change, dtype: float64


In [35]:
# Challenge: Top Performers by Country

industry_usa = f500.loc[f500['country'] == 'USA', 'industry'].value_counts().head(2)
sector_china = f500.loc[f500['country'] == 'China', 'sector'].value_counts().head(3)

print(industry_usa)
print(sector_china)

industry
Banks: Commercial and Savings               8
Insurance: Property and Casualty (Stock)    7
Name: count, dtype: int64
sector
Financials    25
Energy        22
Name: count, dtype: int64


In [24]:
f500.head()

Unnamed: 0_level_0,rank,revenues,revenue_change,profits,assets,profit_change,ceo,industry,sector,previous_rank,country,hq_location,website,years_on_global_500_list,employees,total_stockholder_equity,rank_change
company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
Walmart,1,485873,0.8,13643.0,198825,-7.2,C. Douglas McMillon,General Merchandisers,Retailing,1.0,USA,"Bentonville, AR",http://www.walmart.com,23,2300000,77798,0.0
State Grid,2,315199,-4.4,9571.3,489838,-6.2,Kou Wei,Utilities,Energy,2.0,China,"Beijing, China",http://www.sgcc.com.cn,17,926067,209456,0.0
Sinopec Group,3,267518,-9.1,1257.9,310726,-65.0,Wang Yupu,Petroleum Refining,Energy,4.0,China,"Beijing, China",http://www.sinopec.com,19,713288,106523,1.0
China National Petroleum,4,262573,-12.3,1867.5,585619,-73.7,Zhang Jianhua,Petroleum Refining,Energy,3.0,China,"Beijing, China",http://www.cnpc.com.cn,17,1512048,301893,-1.0
Toyota Motor,5,254694,7.7,16899.3,437575,-12.3,Akio Toyoda,Motor Vehicles and Parts,Motor Vehicles & Parts,8.0,Japan,"Toyota, Japan",http://www.toyota-global.com,23,364445,157210,3.0
