In [55]:
# loc method = using label based (label name)
# iloc method = using position based (index)

# label based will inlcude the upper limit, while position based will not
# e.g. djson.loc['Luke':'John'] will have length = 4, while djson.iloc[0:3] will have length = 3 (index 3 not included)

# 3 operators for filtering data and boolean indexing: | (or), & (and), ~ (not)

In [56]:
import pandas

In [57]:
djson = pandas.read_json('./src/Employees.json') # read json file using read_json() function
djson

Unnamed: 0,ID,FirstName,LastName,Department,Phone,Address,Salary,Skills
0,1,Luke,Phillip,Sales,1234567890,"1st Address, Miami",52000,Online Sales
1,2,Jack,Darren,IT,1234567891,"2nd Address, Miami",52200,Networking
2,3,Ken,Wood,IT,1234567892,"3rd Address, Miami",58000,"Python, Java"
3,4,John,Wilson,Marketing,1234567893,"4th Address, Miami",58700,Facebook Ads
4,5,Emily,Larson,Marketing,1234567894,"5th Address, Miami",60000,Instagram Ads
5,6,Anna,Sullivan,Sales,1234567895,"6th Address, Miami",54000,In-Person Sales
6,7,Richard,Smith,Logistics,1234567896,"7th Address, Miami",56000,Warehouse Mgmt.
7,8,Ronnie,Moore,Sales,1234567897,"8th Address, Miami",49000,Online Sales
8,9,Ron,Drake,IT,1234567898,"9th Address, Miami",53000,Linux Servers
9,10,Wayne,Barker,Logistics,1234567899,"10th Address, Miami",59500,Product Loading


In [58]:
# set column FirstName as label index
djson.set_index('FirstName', inplace = True)
djson

Unnamed: 0_level_0,ID,LastName,Department,Phone,Address,Salary,Skills
FirstName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Luke,1,Phillip,Sales,1234567890,"1st Address, Miami",52000,Online Sales
Jack,2,Darren,IT,1234567891,"2nd Address, Miami",52200,Networking
Ken,3,Wood,IT,1234567892,"3rd Address, Miami",58000,"Python, Java"
John,4,Wilson,Marketing,1234567893,"4th Address, Miami",58700,Facebook Ads
Emily,5,Larson,Marketing,1234567894,"5th Address, Miami",60000,Instagram Ads
Anna,6,Sullivan,Sales,1234567895,"6th Address, Miami",54000,In-Person Sales
Richard,7,Smith,Logistics,1234567896,"7th Address, Miami",56000,Warehouse Mgmt.
Ronnie,8,Moore,Sales,1234567897,"8th Address, Miami",49000,Online Sales
Ron,9,Drake,IT,1234567898,"9th Address, Miami",53000,Linux Servers
Wayne,10,Barker,Logistics,1234567899,"10th Address, Miami",59500,Product Loading


In [59]:
djson.loc['John'] # print only 1 row

ID                             4
LastName                  Wilson
Department             Marketing
Phone                 1234567893
Address       4th Address, Miami
Salary                     58700
Skills              Facebook Ads
Name: John, dtype: object

In [60]:
djson.loc[['John','Anna','Ron']] # print few rows based on label name

Unnamed: 0_level_0,ID,LastName,Department,Phone,Address,Salary,Skills
FirstName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
John,4,Wilson,Marketing,1234567893,"4th Address, Miami",58700,Facebook Ads
Anna,6,Sullivan,Sales,1234567895,"6th Address, Miami",54000,In-Person Sales
Ron,9,Drake,IT,1234567898,"9th Address, Miami",53000,Linux Servers


In [61]:
djson.loc['Jack':'Emily'] # slicing based on label name

Unnamed: 0_level_0,ID,LastName,Department,Phone,Address,Salary,Skills
FirstName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Jack,2,Darren,IT,1234567891,"2nd Address, Miami",52200,Networking
Ken,3,Wood,IT,1234567892,"3rd Address, Miami",58000,"Python, Java"
John,4,Wilson,Marketing,1234567893,"4th Address, Miami",58700,Facebook Ads
Emily,5,Larson,Marketing,1234567894,"5th Address, Miami",60000,Instagram Ads


In [62]:
djson.loc['Luke':'John', "Phone":"Skills"] # slicing based on label index name & column name

Unnamed: 0_level_0,Phone,Address,Salary,Skills
FirstName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Luke,1234567890,"1st Address, Miami",52000,Online Sales
Jack,1234567891,"2nd Address, Miami",52200,Networking
Ken,1234567892,"3rd Address, Miami",58000,"Python, Java"
John,1234567893,"4th Address, Miami",58700,Facebook Ads


In [63]:
djson.loc['Ken':'Anna', ["Phone", "Skills", "Address"]] # slicing label index name & columns name

Unnamed: 0_level_0,Phone,Skills,Address
FirstName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Ken,1234567892,"Python, Java","3rd Address, Miami"
John,1234567893,Facebook Ads,"4th Address, Miami"
Emily,1234567894,Instagram Ads,"5th Address, Miami"
Anna,1234567895,In-Person Sales,"6th Address, Miami"


In [64]:
djson.loc['Ken', "Phone"] # print specific cell

1234567892

In [65]:
djson.loc[:, "Phone"] # print phone column for all rows

FirstName
Luke       1234567890
Jack       1234567891
Ken        1234567892
John       1234567893
Emily      1234567894
Anna       1234567895
Richard    1234567896
Ronnie     1234567897
Ron        1234567898
Wayne      1234567899
Name: Phone, dtype: int64

In [66]:
set(djson.loc[:, 'Department']) # print unique department by converting to set data type

{'IT', 'Logistics', 'Marketing', 'Sales'}

In [67]:
djson.iloc[0:3] # position based slicing using iloc will not return the upper limit

Unnamed: 0_level_0,ID,LastName,Department,Phone,Address,Salary,Skills
FirstName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Luke,1,Phillip,Sales,1234567890,"1st Address, Miami",52000,Online Sales
Jack,2,Darren,IT,1234567891,"2nd Address, Miami",52200,Networking
Ken,3,Wood,IT,1234567892,"3rd Address, Miami",58000,"Python, Java"


In [68]:
djson.iloc[2, 2:5] # iloc method will not return address column as the upper limit

Department                    IT
Phone                 1234567892
Address       3rd Address, Miami
Name: Ken, dtype: object

In [69]:
djson.sample() # return random sample

Unnamed: 0_level_0,ID,LastName,Department,Phone,Address,Salary,Skills
FirstName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Emily,5,Larson,Marketing,1234567894,"5th Address, Miami",60000,Instagram Ads


In [70]:
djson.sample(n = 4) # return 4 random sample

Unnamed: 0_level_0,ID,LastName,Department,Phone,Address,Salary,Skills
FirstName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
John,4,Wilson,Marketing,1234567893,"4th Address, Miami",58700,Facebook Ads
Anna,6,Sullivan,Sales,1234567895,"6th Address, Miami",54000,In-Person Sales
Luke,1,Phillip,Sales,1234567890,"1st Address, Miami",52000,Online Sales
Jack,2,Darren,IT,1234567891,"2nd Address, Miami",52200,Networking


In [71]:
djson.sample(frac = 0.5) # return half / 50% random sample

Unnamed: 0_level_0,ID,LastName,Department,Phone,Address,Salary,Skills
FirstName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Wayne,10,Barker,Logistics,1234567899,"10th Address, Miami",59500,Product Loading
Emily,5,Larson,Marketing,1234567894,"5th Address, Miami",60000,Instagram Ads
Jack,2,Darren,IT,1234567891,"2nd Address, Miami",52200,Networking
Ken,3,Wood,IT,1234567892,"3rd Address, Miami",58000,"Python, Java"
Ron,9,Drake,IT,1234567898,"9th Address, Miami",53000,Linux Servers


In [72]:
djson = pandas.read_json('./src/Employees.json')
djson.set_index('ID', inplace = True)

In [73]:
djson[djson.loc[:,'Salary'] < 50000] # filtering data, return only those who has salary < 50000

Unnamed: 0_level_0,FirstName,LastName,Department,Phone,Address,Salary,Skills
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
8,Ronnie,Moore,Sales,1234567897,"8th Address, Miami",49000,Online Sales


In [74]:
djson[(djson.loc[:,'Salary'] < 50000) | (djson.loc[:,'Salary'] > 56000)] # filtering data, return only those who has salary < 50000 OR salary > 56000

Unnamed: 0_level_0,FirstName,LastName,Department,Phone,Address,Salary,Skills
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
3,Ken,Wood,IT,1234567892,"3rd Address, Miami",58000,"Python, Java"
4,John,Wilson,Marketing,1234567893,"4th Address, Miami",58700,Facebook Ads
5,Emily,Larson,Marketing,1234567894,"5th Address, Miami",60000,Instagram Ads
8,Ronnie,Moore,Sales,1234567897,"8th Address, Miami",49000,Online Sales
10,Wayne,Barker,Logistics,1234567899,"10th Address, Miami",59500,Product Loading


In [75]:
djson[(djson.loc[:,'Salary'] > 50000) & (djson.loc[:,'Department'] == 'Sales')] # filtering data, return only those who has salary > 50000 AND department = sales

Unnamed: 0_level_0,FirstName,LastName,Department,Phone,Address,Salary,Skills
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,Luke,Phillip,Sales,1234567890,"1st Address, Miami",52000,Online Sales
6,Anna,Sullivan,Sales,1234567895,"6th Address, Miami",54000,In-Person Sales


In [76]:
djson[ ~ (djson.loc[:,'Department'] != 'IT')] # filtering data, return only those who work in IT department. Can use ~ or !=

Unnamed: 0_level_0,FirstName,LastName,Department,Phone,Address,Salary,Skills
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2,Jack,Darren,IT,1234567891,"2nd Address, Miami",52200,Networking
3,Ken,Wood,IT,1234567892,"3rd Address, Miami",58000,"Python, Java"
9,Ron,Drake,IT,1234567898,"9th Address, Miami",53000,Linux Servers
