In [95]:
import pandas as pd

## Basic Employees Analysis

In [96]:
# 1. Display all the records of REGIONS file
regions = pd.read_csv("data/regions.csv", index_col = "REGION_ID")
regions

Unnamed: 0_level_0,REGION_NAME
REGION_ID,Unnamed: 1_level_1
1,Europe
2,Americas
3,Asia
4,Middle East and Africa


In [97]:
# 2. Display all the location id from LOCATIONS file
locations = pd.read_csv("data/locations.csv", index_col = "location_id")
locations.index

Int64Index([1000, 1100, 1200, 1300, 1400, 1500, 1600, 1700, 1800, 1900, 2000,
            2100, 2200, 2300, 2400, 2500, 2600, 2700, 2800, 2900, 3000, 3100,
            3200],
           dtype='int64', name='location_id')

In [130]:
# 3. Extract first 7 records from EMPLOYEES file
employees = pd.read_csv("data/employees.csv", index_col = "employ_id")
employees.head(7)

Unnamed: 0_level_0,first_name,last_name,email,phone_number,hire_date,job_id,salary,commission_pct,manager_id,department_id
employ_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
100,Steven,King,SKING,515.123.4567,1987-06-17,AD_PRES,24000,,,90.0
101,Neena,Kochhar,NKOCHHAR,515.123.4568,1987-06-18,AD_VP,17000,,100.0,90.0
102,Lex,De Haan,LDEHAAN,515.123.4569,1987-06-19,AD_VP,17000,,100.0,90.0
103,Alexander,Hunold,AHUNOLD,590.423.4567,1987-06-20,IT_PROG,9000,,102.0,60.0
104,Bruce,Ernst,BERNST,590.423.4568,1987-06-21,IT_PROG,6000,,103.0,60.0
105,David,Austin,DAUSTIN,590.423.4569,1987-06-22,IT_PROG,4800,,103.0,60.0
106,Valli,Pataballa,VPATABAL,590.423.4560,1987-06-23,IT_PROG,4800,,103.0,60.0


In [99]:
# 4. Select distinct department id from EMPLOYEES file
employees["department_id"].unique()

array([ 90.,  60., 100.,  30.,  50.,  80.,  nan,  10.,  20.,  40.,  70.,
       110.])

In [100]:
employees["department_id"].drop_duplicates()

employ_id
100     90.0
103     60.0
108    100.0
114     30.0
120     50.0
145     80.0
178      NaN
200     10.0
201     20.0
203     40.0
204     70.0
205    110.0
Name: department_id, dtype: float64

In [101]:
employees.groupby("department_id").email.nunique()

department_id
10.0      1
20.0      2
30.0      6
40.0      1
50.0     45
60.0      5
70.0      1
80.0     34
90.0      3
100.0     6
110.0     2
Name: email, dtype: int64

In [114]:
# 5. Display the first name, last name and department_id of employees who last name is McEwen

employees[employees["last_name"] == "McEwen"][["first_name", "last_name", "department_id"]]

Unnamed: 0_level_0,first_name,last_name,department_id
employ_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
158,Allan,McEwen,80.0


In [117]:
# 6. Display first name, last name, salary and department number for those employees whose first name starts with letter "S"

criteria = employees["first_name"].map(lambda first_name: first_name.startswith("S"))
employees[criteria][["first_name", "last_name", "salary", "department_id"]]

Unnamed: 0_level_0,first_name,last_name,salary,department_id
employ_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,Steven,King,24000,90.0
116,Shelli,Baida,2900,30.0
117,Sigal,Tobias,2800,30.0
123,Shanta,Vollman,6500,50.0
128,Steven,Markle,2200,50.0
138,Stephen,Stiles,3200,50.0
161,Sarath,Sewall,7000,80.0
166,Sundar,Ande,6400,80.0
173,Sundita,Kumar,6100,80.0
192,Sarah,Bell,4000,50.0


In [120]:
# 7. Display first name, last name, salary and department number for those employees whose first name does not contain letter "M"

criteria = employees["first_name"].map(lambda first_name: "M" not in first_name)
employees[criteria][["first_name", "last_name", "salary", "department_id"]]

Unnamed: 0_level_0,first_name,last_name,salary,department_id
employ_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,Steven,King,24000,90.0
101,Neena,Kochhar,17000,90.0
102,Lex,De Haan,17000,90.0
103,Alexander,Hunold,9000,60.0
104,Bruce,Ernst,6000,60.0
...,...,...,...,...
202,Pat,Fay,6000,20.0
203,Susan,Mavris,6500,40.0
204,Hermann,Baer,10000,70.0
205,Shelley,Higgins,12000,110.0


In [122]:
# 8. Display the first name, last name, salary and department number in ascending order by department number.

sorted_employees_asc = employees.sort_values(by="department_id")
sorted_employees_asc[["first_name", "last_name", "salary", "department_id"]]

Unnamed: 0_level_0,first_name,last_name,salary,department_id
employ_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
200,Jennifer,Whalen,4400,10.0
201,Michael,Hartstein,13000,20.0
202,Pat,Fay,6000,20.0
119,Karen,Colmenares,2500,30.0
118,Guy,Himuro,2600,30.0
...,...,...,...,...
109,Daniel,Faviet,9000,100.0
108,Nancy,Greenberg,12000,100.0
205,Shelley,Higgins,12000,110.0
206,William,Gietz,8300,110.0


In [124]:
# 9. Display the first name, last name, salary and department number in descending order by first name.

sorted_employees_desc = employees.sort_values(by="first_name", ascending=False)
sorted_employees_desc


Unnamed: 0_level_0,first_name,last_name,email,phone_number,hire_date,job_id,salary,commission_pct,manager_id,department_id
employ_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
180,Winston,Taylor,WTAYLOR,650.507.9876,1987-09-05,SH_CLERK,3200,,120.0,50.0
206,William,Gietz,WGIETZ,515.123.8181,1987-10-01,AC_ACCOUNT,8300,,205.0,110.0
171,William,Smith,WSMITH,011.44.1343.629268,1987-08-27,SA_REP,7400,0.15,148.0,80.0
195,Vance,Jones,VJONES,650.501.4876,1987-09-20,SH_CLERK,2800,,123.0,50.0
106,Valli,Pataballa,VPATABAL,590.423.4560,1987-06-23,IT_PROG,4800,,103.0,60.0
...,...,...,...,...,...,...,...,...,...,...
115,Alexander,Khoo,AKHOO,515.127.4562,1987-07-02,PU_CLERK,3100,,114.0,30.0
103,Alexander,Hunold,AHUNOLD,590.423.4567,1987-06-20,IT_PROG,9000,,102.0,60.0
147,Alberto,Errazuriz,AERRAZUR,011.44.1344.429278,1987-08-03,SA_MAN,12000,0.30,100.0,80.0
196,Alana,Walsh,AWALSH,650.507.9811,1987-09-21,SH_CLERK,3100,,124.0,50.0


In [127]:
# 10. Display the first name, last name, salary and manager id where manager ids are null. 
employees_no_managers = employees[employees["manager_id"].isnull()]
employees_no_managers[["first_name", "last_name", "salary", "manager_id"]]

Unnamed: 0_level_0,first_name,last_name,salary,manager_id
employ_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,Steven,King,24000,


In [128]:
# 11. Display the first name, last name, salary and manager id where manager ids are not null. 
employees_with_managers = employees[employees["manager_id"].notnull()]
employees_with_managers[["first_name", "last_name", "salary", "manager_id"]]


Unnamed: 0_level_0,first_name,last_name,salary,manager_id
employ_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
101,Neena,Kochhar,17000,100.0
102,Lex,De Haan,17000,100.0
103,Alexander,Hunold,9000,102.0
104,Bruce,Ernst,6000,103.0
105,David,Austin,4800,103.0
...,...,...,...,...
202,Pat,Fay,6000,201.0
203,Susan,Mavris,6500,101.0
204,Hermann,Baer,10000,101.0
205,Shelley,Higgins,12000,101.0


## Location Analysis