## Python Review (Part 2)

### List Comprehensions

List comprehension offers a shorter syntax when you want to create a new list based on the values of an existing list.

#### Example #1: Applying Transformations/Mapping

syntax: `(dosomething(var) for var in arr)` or `[dosomething(var) for var in arr]`

In [84]:
import numpy as np

np.random.seed(42)
arr1 = np.random.randint(0, 100, 100)
arr2 = np.random.randint(0, 100, 100)
arr1, arr2

(array([51, 92, 14, 71, 60, 20, 82, 86, 74, 74, 87, 99, 23,  2, 21, 52,  1,
        87, 29, 37,  1, 63, 59, 20, 32, 75, 57, 21, 88, 48, 90, 58, 41, 91,
        59, 79, 14, 61, 61, 46, 61, 50, 54, 63,  2, 50,  6, 20, 72, 38, 17,
         3, 88, 59, 13,  8, 89, 52,  1, 83, 91, 59, 70, 43,  7, 46, 34, 77,
        80, 35, 49,  3,  1,  5, 53,  3, 53, 92, 62, 17, 89, 43, 33, 73, 61,
        99, 13, 94, 47, 14, 71, 77, 86, 61, 39, 84, 79, 81, 52, 23]),
 array([25, 88, 59, 40, 28, 14, 44, 64, 88, 70,  8, 87,  0,  7, 87, 62, 10,
        80,  7, 34, 34, 32,  4, 40, 27,  6, 72, 71, 11, 33, 32, 47, 22, 61,
        87, 36, 98, 43, 85, 90, 34, 64, 98, 46, 77,  2,  0,  4, 89, 13, 26,
         8, 78, 14, 89, 41, 76, 50, 62, 95, 51, 95,  3, 93, 22, 14, 42, 28,
        35, 12, 31, 70, 58, 85, 27, 65, 41, 44, 61, 56,  5, 27, 27, 43, 83,
        29, 61, 74, 91, 88, 61, 96,  0, 26, 61, 76,  2, 69, 71, 26]))

In [85]:
# Map to an operation (e.g., multiply each number by 2)
# for i in range(len(arr)):
#     arr[i] = arr[i] * 2

[i * 2 for i in arr1]

[102,
 184,
 28,
 142,
 120,
 40,
 164,
 172,
 148,
 148,
 174,
 198,
 46,
 4,
 42,
 104,
 2,
 174,
 58,
 74,
 2,
 126,
 118,
 40,
 64,
 150,
 114,
 42,
 176,
 96,
 180,
 116,
 82,
 182,
 118,
 158,
 28,
 122,
 122,
 92,
 122,
 100,
 108,
 126,
 4,
 100,
 12,
 40,
 144,
 76,
 34,
 6,
 176,
 118,
 26,
 16,
 178,
 104,
 2,
 166,
 182,
 118,
 140,
 86,
 14,
 92,
 68,
 154,
 160,
 70,
 98,
 6,
 2,
 10,
 106,
 6,
 106,
 184,
 124,
 34,
 178,
 86,
 66,
 146,
 122,
 198,
 26,
 188,
 94,
 28,
 142,
 154,
 172,
 122,
 78,
 168,
 158,
 162,
 104,
 46]

In [86]:
# Divide by 2, Perform Floor Operation
# 5 / 2 = 2.5, floor(2.5) = 2
[i // 2 for i in arr1]

[25,
 46,
 7,
 35,
 30,
 10,
 41,
 43,
 37,
 37,
 43,
 49,
 11,
 1,
 10,
 26,
 0,
 43,
 14,
 18,
 0,
 31,
 29,
 10,
 16,
 37,
 28,
 10,
 44,
 24,
 45,
 29,
 20,
 45,
 29,
 39,
 7,
 30,
 30,
 23,
 30,
 25,
 27,
 31,
 1,
 25,
 3,
 10,
 36,
 19,
 8,
 1,
 44,
 29,
 6,
 4,
 44,
 26,
 0,
 41,
 45,
 29,
 35,
 21,
 3,
 23,
 17,
 38,
 40,
 17,
 24,
 1,
 0,
 2,
 26,
 1,
 26,
 46,
 31,
 8,
 44,
 21,
 16,
 36,
 30,
 49,
 6,
 47,
 23,
 7,
 35,
 38,
 43,
 30,
 19,
 42,
 39,
 40,
 26,
 11]

In [87]:
# Another way to use list comprehensions with index instead
[arr1[i] * 2 for i in range(len(arr1))]

[102,
 184,
 28,
 142,
 120,
 40,
 164,
 172,
 148,
 148,
 174,
 198,
 46,
 4,
 42,
 104,
 2,
 174,
 58,
 74,
 2,
 126,
 118,
 40,
 64,
 150,
 114,
 42,
 176,
 96,
 180,
 116,
 82,
 182,
 118,
 158,
 28,
 122,
 122,
 92,
 122,
 100,
 108,
 126,
 4,
 100,
 12,
 40,
 144,
 76,
 34,
 6,
 176,
 118,
 26,
 16,
 178,
 104,
 2,
 166,
 182,
 118,
 140,
 86,
 14,
 92,
 68,
 154,
 160,
 70,
 98,
 6,
 2,
 10,
 106,
 6,
 106,
 184,
 124,
 34,
 178,
 86,
 66,
 146,
 122,
 198,
 26,
 188,
 94,
 28,
 142,
 154,
 172,
 122,
 78,
 168,
 158,
 162,
 104,
 46]

In [88]:
# Add arr1 to arr2 using list comprehensions
[arr1[i] + arr2[i] for i in range(len(arr1))]

[76,
 180,
 73,
 111,
 88,
 34,
 126,
 150,
 162,
 144,
 95,
 186,
 23,
 9,
 108,
 114,
 11,
 167,
 36,
 71,
 35,
 95,
 63,
 60,
 59,
 81,
 129,
 92,
 99,
 81,
 122,
 105,
 63,
 152,
 146,
 115,
 112,
 104,
 146,
 136,
 95,
 114,
 152,
 109,
 79,
 52,
 6,
 24,
 161,
 51,
 43,
 11,
 166,
 73,
 102,
 49,
 165,
 102,
 63,
 178,
 142,
 154,
 73,
 136,
 29,
 60,
 76,
 105,
 115,
 47,
 80,
 73,
 59,
 90,
 80,
 68,
 94,
 136,
 123,
 73,
 94,
 70,
 60,
 116,
 144,
 128,
 74,
 168,
 138,
 102,
 132,
 173,
 86,
 87,
 100,
 160,
 81,
 150,
 123,
 49]

In [89]:
# Map to function
# 96 - 100: A
# 90 - 95: B
# 85 - 89: C
# 80 - 84: D
# 75 - 79: E
# Lower: F

def to_letter_scale(grade):
    if grade >= 96 and grade <= 100:
        return 'A'
    elif grade >= 90 and grade <= 95:
        return 'B'
    elif grade >= 85 and grade <= 89:
        return 'C'
    elif grade >= 80 and grade <= 84:
        return 'D'
    elif grade >= 75 and grade <= 79:
        return 'E'
    else:
        return 'F'

to_letter_scale(90)

'B'

In [90]:
np.random.seed(42)
arr3 = np.random.randint(70, 100, 50)
arr3

array([76, 89, 98, 84, 80, 77, 98, 90, 76, 95, 88, 92, 80, 80, 93, 90, 73,
       77, 93, 72, 91, 90, 71, 93, 81, 99, 75, 71, 97, 90, 70, 81, 95, 91,
       98, 81, 94, 86, 96, 96, 79, 97, 97, 85, 84, 99, 99, 84, 99, 88])

In [91]:
# [statement for var in arr]
[to_letter_scale(i) for i in arr3]

['E',
 'C',
 'A',
 'D',
 'D',
 'E',
 'A',
 'B',
 'E',
 'B',
 'C',
 'B',
 'D',
 'D',
 'B',
 'B',
 'F',
 'E',
 'B',
 'F',
 'B',
 'B',
 'F',
 'B',
 'D',
 'A',
 'E',
 'F',
 'A',
 'B',
 'F',
 'D',
 'B',
 'B',
 'A',
 'D',
 'B',
 'C',
 'A',
 'A',
 'E',
 'A',
 'A',
 'C',
 'D',
 'A',
 'A',
 'D',
 'A',
 'C']

#### Example #2: Filtering/Conditions

syntax: `(var for var in arr if expr)` or `[var for var in arr if expr]`

In [92]:
# Filter even items only in the array
[i for i in arr1 if i % 2 == 0]

[92,
 14,
 60,
 20,
 82,
 86,
 74,
 74,
 2,
 52,
 20,
 32,
 88,
 48,
 90,
 58,
 14,
 46,
 50,
 54,
 2,
 50,
 6,
 20,
 72,
 38,
 88,
 8,
 52,
 70,
 46,
 34,
 80,
 92,
 62,
 94,
 14,
 86,
 84,
 52]

In [101]:
# Combining map (transformation) and filter.
[i + 1 for i in arr1 if i % 2 == 0]

[93,
 15,
 61,
 21,
 83,
 87,
 75,
 75,
 3,
 53,
 21,
 33,
 89,
 49,
 91,
 59,
 15,
 47,
 51,
 55,
 3,
 51,
 7,
 21,
 73,
 39,
 89,
 9,
 53,
 71,
 47,
 35,
 81,
 93,
 63,
 95,
 15,
 87,
 85,
 53]

### Lambda Functions

A lambda function is a small anonymous function.

#### Example #1: Apply lambda function to a column using .assign().

Returns a new `DataFrame` object.


In [93]:
import pandas as pd

df = pd.DataFrame(
        [['Rohan',455],
        ['Elvish',250],
        ['Deepak',495],
        ['Soni',400],
        ['Radhika',350],
        ['Vansh',450]],
        columns=['Name', 'Total_Marks'])

df

Unnamed: 0,Name,Total_Marks
0,Rohan,455
1,Elvish,250
2,Deepak,495
3,Soni,400
4,Radhika,350
5,Vansh,450


In [107]:
# Create a new column percentage = score / 500 * 100
df_new = df.assign(Percentage = lambda x:x.Total_Marks / 500 * 100)
df_new

Unnamed: 0,Name,Total_Marks,Percentage
0,Rohan,455,91.0
1,Elvish,250,50.0
2,Deepak,495,99.0
3,Soni,400,80.0
4,Radhika,350,70.0
5,Vansh,450,90.0


#### Example #2: Apply lambda to a column using .apply().

Returns a series when applied on a series (single column) which can be appended to the original DataFrame as a new Column or to overwrite an existing one.

In [108]:
# Create a new column percentage = score / 500 * 100
df['Percentage'] = (df['Total_Marks'] / 500) * 100
df

Unnamed: 0,Name,Total_Marks,Percentage
0,Rohan,455,91.0
1,Elvish,250,50.0
2,Deepak,495,99.0
3,Soni,400,80.0
4,Radhika,350,70.0
5,Vansh,450,90.0


In [110]:
df["Grade"] = df["Percentage"].apply(to_letter_scale)
df

Unnamed: 0,Name,Total_Marks,Percentage,Grade
0,Rohan,455,91.0,B
1,Elvish,250,50.0,F
2,Deepak,495,99.0,A
3,Soni,400,80.0,D
4,Radhika,350,70.0,F
5,Vansh,450,90.0,B


In [111]:
# Another way
df["Grade"] = df["Percentage"].apply(lambda x:to_letter_scale(int(x)))
df

Unnamed: 0,Name,Total_Marks,Percentage,Grade
0,Rohan,455,91.0,B
1,Elvish,250,50.0,F
2,Deepak,495,99.0,A
3,Soni,400,80.0,D
4,Radhika,350,70.0,F
5,Vansh,450,90.0,B


### Group By

We can create a grouping of categories and apply a function to the categories.

#### Example #1: Group the unique values from a column.

syntax: `df.groupby(column)`


In [113]:
employees_df = pd.read_csv('datasets/employees.csv')
employees_df

Unnamed: 0,number,first_name,last_name,gender,birth_date,employment_status,annual_salary
0,483,Lenord,Kihn,M,1994-07-01,Full Time,64598
1,478,Palma,Beahan,F,1972-05-06,Full Time,124103
2,348,Hebert,Muller,M,1990-09-04,Full Time,100324
3,757,Virginia,Ullrich,M,1991-09-23,Full Time,144957
4,937,Roby,Hudson,M,1997-01-15,Full Time,137659
...,...,...,...,...,...,...,...
994,712,Dann,Crooks,M,1983-12-25,Part Time,66592
995,132,Maxim,Orn,M,1971-03-01,Part Time,127966
996,126,Kimberli,Walter,M,1976-03-07,Part Time,60712
997,277,Talmage,Bartell,F,1989-02-19,Full Time,103606


In [127]:
# group by employment status
groups = employees_df.groupby("employment_status")

for name, group in groups:
    display(name)
    display(group)

'Full Time'

Unnamed: 0,number,first_name,last_name,gender,birth_date,employment_status,annual_salary
0,483,Lenord,Kihn,M,1994-07-01,Full Time,64598
1,478,Palma,Beahan,F,1972-05-06,Full Time,124103
2,348,Hebert,Muller,M,1990-09-04,Full Time,100324
3,757,Virginia,Ullrich,M,1991-09-23,Full Time,144957
4,937,Roby,Hudson,M,1997-01-15,Full Time,137659
...,...,...,...,...,...,...,...
989,916,Jerrad,Orn,M,1976-12-15,Full Time,142640
991,308,Elberta,Harvey,F,1988-12-29,Full Time,146691
992,799,Valentino,Hudson,M,1988-09-16,Full Time,98463
997,277,Talmage,Bartell,F,1989-02-19,Full Time,103606


'Part Time'

Unnamed: 0,number,first_name,last_name,gender,birth_date,employment_status,annual_salary
9,940,Marlana,Moen,F,1979-05-05,Part Time,140611
12,950,Adrienne,Stokes,F,1982-07-28,Part Time,97453
17,901,Nikki,D'Amore,M,1985-08-09,Part Time,108287
28,232,Urijah,McClure,F,1971-04-29,Part Time,105555
30,458,Desmond,Pfeffer,F,1970-04-07,Part Time,72361
...,...,...,...,...,...,...,...
990,762,Fitzgerald,Welch,M,1991-04-11,Part Time,88134
993,606,Patty,Swift,F,1987-12-26,Part Time,126589
994,712,Dann,Crooks,M,1983-12-25,Part Time,66592
995,132,Maxim,Orn,M,1971-03-01,Part Time,127966


'Probationary'

Unnamed: 0,number,first_name,last_name,gender,birth_date,employment_status,annual_salary
962,56,Seth,Rippin,M,1976-04-11,Probationary,148561
963,406,Vida,Hintz,F,1971-10-28,Probationary,121373
964,91,Glennis,Abshire,M,1992-01-19,Probationary,104223
965,743,Dessa,Crona,F,1975-04-06,Probationary,60560
966,767,Joshua,Renner,M,1979-02-26,Probationary,109423
967,88,Tempie,Heller,F,1983-07-10,Probationary,141141
968,419,Tari,Waelchi,F,1998-10-08,Probationary,84696
969,724,Gunnar,Prosacco,M,1979-01-26,Probationary,126024
970,607,Alto,Wisozk,F,1975-01-11,Probationary,136728
971,792,Nanci,Schuppe,F,1989-12-30,Probationary,56949


#### Example #2: Group data with multiple keys.

syntax: `df.groupby([column, column, ...])`

In [128]:
# group by gender and employment status
groups = employees_df.groupby(["gender", "employment_status"])

for name, group in groups:
    display(name)
    display(group)

('F', 'Full Time')

Unnamed: 0,number,first_name,last_name,gender,birth_date,employment_status,annual_salary
1,478,Palma,Beahan,F,1972-05-06,Full Time,124103
6,165,Jalissa,Bogisich,F,1974-01-07,Full Time,101133
18,712,Yahir,Kertzmann,F,1970-10-27,Full Time,95140
19,936,Marsha,Ledner,F,1982-12-23,Full Time,143806
22,542,Mat,Nienow,F,1995-06-14,Full Time,141463
...,...,...,...,...,...,...,...
960,668,Karrie,Prosacco,F,1970-12-23,Full Time,119203
982,301,Jimmie,Wisoky,F,1982-03-15,Full Time,55945
986,562,Alpheus,Mayer,F,1977-01-25,Full Time,83631
991,308,Elberta,Harvey,F,1988-12-29,Full Time,146691


('F', 'Part Time')

Unnamed: 0,number,first_name,last_name,gender,birth_date,employment_status,annual_salary
9,940,Marlana,Moen,F,1979-05-05,Part Time,140611
12,950,Adrienne,Stokes,F,1982-07-28,Part Time,97453
28,232,Urijah,McClure,F,1971-04-29,Part Time,105555
30,458,Desmond,Pfeffer,F,1970-04-07,Part Time,72361
51,900,Kate,Blick,F,1975-05-10,Part Time,90927
...,...,...,...,...,...,...,...
921,43,Montel,Kunde,F,1996-07-29,Part Time,105292
923,749,Drake,Conroy,F,1988-11-28,Part Time,146475
947,75,Kayson,Towne,F,1980-11-08,Part Time,68757
951,590,Santana,Jones,F,1999-03-09,Part Time,85004


('F', 'Probationary')

Unnamed: 0,number,first_name,last_name,gender,birth_date,employment_status,annual_salary
963,406,Vida,Hintz,F,1971-10-28,Probationary,121373
965,743,Dessa,Crona,F,1975-04-06,Probationary,60560
967,88,Tempie,Heller,F,1983-07-10,Probationary,141141
968,419,Tari,Waelchi,F,1998-10-08,Probationary,84696
970,607,Alto,Wisozk,F,1975-01-11,Probationary,136728
971,792,Nanci,Schuppe,F,1989-12-30,Probationary,56949
975,772,Linsey,Grant,F,1974-11-11,Probationary,73168
977,699,Gloria,Treutel,F,1995-12-16,Probationary,100276
979,931,Sommer,Johnston,F,1972-11-24,Probationary,95217


('M', 'Full Time')

Unnamed: 0,number,first_name,last_name,gender,birth_date,employment_status,annual_salary
0,483,Lenord,Kihn,M,1994-07-01,Full Time,64598
2,348,Hebert,Muller,M,1990-09-04,Full Time,100324
3,757,Virginia,Ullrich,M,1991-09-23,Full Time,144957
4,937,Roby,Hudson,M,1997-01-15,Full Time,137659
5,264,Jaeda,Effertz,M,1994-03-21,Full Time,131924
...,...,...,...,...,...,...,...
983,95,Jase,Dibbert,M,1972-12-03,Full Time,146698
985,137,Rosco,Lynch,M,1989-06-20,Full Time,103499
989,916,Jerrad,Orn,M,1976-12-15,Full Time,142640
992,799,Valentino,Hudson,M,1988-09-16,Full Time,98463


('M', 'Part Time')

Unnamed: 0,number,first_name,last_name,gender,birth_date,employment_status,annual_salary
17,901,Nikki,D'Amore,M,1985-08-09,Part Time,108287
41,700,Jaiden,Hirthe,M,1970-09-26,Part Time,62770
46,238,Addison,Sanford,M,1977-09-20,Part Time,111234
53,482,Anibal,Dare,M,1986-05-23,Part Time,149219
58,357,Darby,Herzog,M,1980-05-24,Part Time,72575
...,...,...,...,...,...,...,...
988,97,Jonnie,McDermott,M,1980-07-13,Part Time,100740
990,762,Fitzgerald,Welch,M,1991-04-11,Part Time,88134
994,712,Dann,Crooks,M,1983-12-25,Part Time,66592
995,132,Maxim,Orn,M,1971-03-01,Part Time,127966


('M', 'Probationary')

Unnamed: 0,number,first_name,last_name,gender,birth_date,employment_status,annual_salary
962,56,Seth,Rippin,M,1976-04-11,Probationary,148561
964,91,Glennis,Abshire,M,1992-01-19,Probationary,104223
966,767,Joshua,Renner,M,1979-02-26,Probationary,109423
969,724,Gunnar,Prosacco,M,1979-01-26,Probationary,126024
972,367,Kerwin,Purdy,M,1979-01-03,Probationary,55215
973,835,Olar,Pagac,M,1997-02-15,Probationary,93548
974,260,Karley,Hahn,M,1984-08-24,Probationary,134543
976,36,Callie,Lesch,M,1992-10-04,Probationary,66278
978,544,Henrietta,Greenholt,M,1977-04-06,Probationary,93206
980,995,Diamond,Corkery,M,1974-05-25,Probationary,132008


#### Example #3: Get a single group.

syntax: `.get_group(column)` or `.get_group((column, column, ...))` for multiple columns

In [131]:
# loop through one group and display its members
df_male_prob = groups.get_group(("M", "Probationary"))
df_male_prob

Unnamed: 0,number,first_name,last_name,gender,birth_date,employment_status,annual_salary
962,56,Seth,Rippin,M,1976-04-11,Probationary,148561
964,91,Glennis,Abshire,M,1992-01-19,Probationary,104223
966,767,Joshua,Renner,M,1979-02-26,Probationary,109423
969,724,Gunnar,Prosacco,M,1979-01-26,Probationary,126024
972,367,Kerwin,Purdy,M,1979-01-03,Probationary,55215
973,835,Olar,Pagac,M,1997-02-15,Probationary,93548
974,260,Karley,Hahn,M,1984-08-24,Probationary,134543
976,36,Callie,Lesch,M,1992-10-04,Probationary,66278
978,544,Henrietta,Greenholt,M,1977-04-06,Probationary,93206
980,995,Diamond,Corkery,M,1974-05-25,Probationary,132008


In [132]:
df_male_prob.describe()

Unnamed: 0,number,annual_salary
count,11.0,11.0
mean,506.181818,108098.181818
std,359.121377,29236.452729
min,36.0,55215.0
25%,175.5,93377.0
50%,544.0,109423.0
75%,801.0,129029.5
max,995.0,148561.0
