In [19]:
# create a list of 1,000,000 salaries ranging from 50,000 and 150,000
import random

salary_list = [random.randint(50_000, 150_000) for _ in range(10_000_000)]

In [20]:
import statistics

In [21]:
%%timeit

# Look at the mean or average of these salaries.
statistics.mean(salary_list)

2.51 s ± 173 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [22]:
# Now we import numpy
import numpy as np

In [23]:
%%timeit

# Look at the mean or average of these salaries.
np.mean(salary_list)

411 ms ± 4.46 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [25]:
# Creating an aray in NumPy
my_array = np.array([1, 2, 3, 4, 5])

In [26]:
my_array.mean()

3.0

In [27]:
years_of_experience = np.array([1, 2, 3, 4, 5])

In [28]:
years_of_experience_plus_one = years_of_experience + 1

years_of_experience_plus_one

array([2, 3, 4, 5, 6])

In [29]:
years_of_experience_minus_one = years_of_experience - 1

years_of_experience_minus_one

array([0, 1, 2, 3, 4])

In [30]:
years_of_experience_half = years_of_experience / 2

years_of_experience_half

array([0.5, 1. , 1.5, 2. , 2.5])

In [31]:
years_of_experience_double = years_of_experience * 2

years_of_experience_double

array([ 2,  4,  6,  8, 10])

In [32]:
# Example: Selecting the experience requirement for the second and third job listings.
second_and_third_jobs_experience = years_of_experience[1:3]

second_and_third_jobs_experience

array([2, 3])

In [34]:
# Example: Selecting only those job listings that require more than 1 year of experience.
jobs_with_more_than_one_year_exp = years_of_experience[years_of_experience > 1]

jobs_with_more_than_one_year_exp

array([2, 3, 4, 5])

#### Examples
First lets create a list with 10 yearly salaries for a Senior Data Analyst job. We're just using a combination of the `random` library to get random integers between 100000 and 150000. Then using a for `loop` to get 10 (random) values.

In [36]:
import random
salary = [random.randint(100_000, 150_000) for a in range(10)]

salary

[113896,
 130132,
 149550,
 112701,
 119482,
 111398,
 116927,
 112262,
 122008,
 146736]

In [37]:
# Sum all salaries
total_sum_salaries = np.sum(salary)

total_sum_salaries

1235092

In [38]:
product_salaries = np.prod(salary)

product_salaries

-372178944

In [39]:
cumulative_sum_salaries = np.cumsum(salary)

cumulative_sum_salaries

array([ 113896,  244028,  393578,  506279,  625761,  737159,  854086,
        966348, 1088356, 1235092])

In [40]:
cumulative_prod_salaries = np.cumprod(salary)

cumulative_prod_salaries

array([     113896,  1936612384,  2147323328,  1259128512,  -921573504,
        1158077696, -1178148096, -2038640128,  -258691072,  -372178944])

In [41]:
average_salary = np.mean(salary)

average_salary

123509.2

In [42]:
median_salary = np.median(salary)

median_salary

118204.5

In [43]:
salary_variance = np.var(salary, ddof=1)

salary_variance

200630861.73333332

In [44]:
salary_std_dev = np.std(salary, ddof=1)

salary_std_dev

14164.422393212273

In [45]:
min_salary = np.min(salary)

min_salary

111398

In [46]:
max_salary = np.max(salary)

max_salary

149550

In [47]:
salary_with_nan = np.array([123124, np.nan, 145000, 128000, 110000, 149999, np.nan, 135000, 115000, 140000], dtype=float)
salary_with_nan

array([123124.,     nan, 145000., 128000., 110000., 149999.,     nan,
       135000., 115000., 140000.])

In [49]:
salary_with_nan[salary_with_nan > 135000] = np.nan

salary_with_nan

array([123124.,     nan,     nan, 128000., 110000.,     nan,     nan,
       135000., 115000.,     nan])

In [52]:
salary_array = np.array(salary)

salary_array

array([113896, 130132, 149550, 112701, 119482, 111398, 116927, 112262,
       122008, 146736])

In [53]:
salary_array = np.where(salary_array > 120_000, 120_000, salary_array)

salary_array

array([113896, 120000, 120000, 112701, 119482, 111398, 116927, 112262,
       120000, 120000])

In [54]:
noise = np.random.normal(0, 5000, salary_array.size)

salary_array_with_noise = salary_array + noise

salary_array_with_noise

array([119916.08830604, 119750.99238071, 118340.68322065, 116108.14562993,
       121779.57264647, 111503.00575392, 125607.48795751, 109180.51673055,
       119375.93853933, 114953.70885784])

In [55]:
import numpy as np

In [61]:
# Job Titles
job_titles = np.array(['Data Analyst', 'Data Engineer', 'Data Scientist', 'Machine Learning Engineer', 'AI Engineer'])

# Base Salaries
base_salaries = np.array([60000, 80000, 75000, 90000, np.nan])

# Bonus Rate
bonus_rates = np.array([0.05, 0.1, 0.08, 0.12, np.nan])

In [62]:
total_salaries = base_salaries * (1 + bonus_rates)

total_salaries

array([ 63000.,  88000.,  81000., 100800.,     nan])

In [68]:
np.nanmean(total_salaries)

83200.0

In [67]:
np.nanmedian(total_salaries)

84500.0

In [65]:
np.nan

nan

In [66]:
type(np.nan)

float

# Practise Problems

In [69]:
# Create NumPy Array
applications_list = [10, 15, 7, 20, 25, 30, 5]

In [71]:
application_array = np.array(applications_list)

application_array

array([10, 15,  7, 20, 25, 30,  5])

In [72]:
# Slicing Array (1.22.2)

# Code Needed
postings_list = [10, 15, 7, 20, 25, 30, 5]

In [79]:
postings_list[3:5]

[20, 25]

In [80]:
# Highest and Lowest Salary (1.22.3)

# Code Needed
salaries_list = [70000, 85000, 60000, 95000, 80000]

In [81]:
salaries_array = np.array(salaries_list)

salaries_array

array([70000, 85000, 60000, 95000, 80000])

In [82]:
# Minimum salary
min_salary = np.min(salaries_array)

min_salary

60000

In [84]:
# Maximum Salary
max_salary = np.max(salary_array)

max_salary

120000

# AI Generated Questions

In [85]:
# Create a NumPy array with the following numbers: 10, 20, 30, 40, 50.

arr = np.array([10, 20, 30, 40, 50])

arr

array([10, 20, 30, 40, 50])

In [86]:
# Add 5 to each element of the array you created in Question 1.

arr_plus_5 = arr + 5

arr_plus_5

array([15, 25, 35, 45, 55])

In [87]:
# Extract the first three elements of the array.

first_three_elements = arr[:3]

first_three_elements

array([10, 20, 30])

In [89]:
# Create an array of the salaries of five employees: 50000, 60000, 70000, 80000, 90000. 
# Then, find the average salary.

salaries = np.array([50_000, 60_000, 70_000, 80_000, 90_000])
average_salaries = np.mean(salaries)

average_salaries

70000.0

In [91]:
# Generate an array of 5 random integers between 1 and 100.

random_integers = np.random.randint(1, 101, 5)

random_integers

array([36,  4, 41, 66, 93])

In [92]:
# Multiply each element in the array from Question 1 by 3.
arr_multiply_3 = arr * 3

arr_multiply_3

array([ 30,  60,  90, 120, 150])

In [95]:
# Create an array with the numbers 1 to 10, then find the sum of all the elements.
arr_1_to_10 = np.arange(1, 11)

arr_sum = np.sum(arr_1_to_10)

arr_sum

55

In [97]:
# Replace all elements greater than 30 in the array from Question 1 with the value 30.
arr_clipped = np.where(arr > 30, 30, arr)

arr_clipped

array([10, 20, 30, 30, 30])

In [99]:
# Find the standard deviation of the array you created in Question 4.
arr_std_dev = np.std(salaries)

arr_std_dev

14142.13562373095

In [100]:
# Create an array of 10 random numbers drawn from a normal distribution with a mean of 50 and a standard deviation of 5.
random_normals = np.random.normal(loc=50, scale=5, size=10)

random_normals

array([55.60984757, 49.64888582, 39.22113859, 57.99497293, 62.93304266,
       50.70226585, 52.00614079, 42.13099658, 50.72692493, 45.47830625])

In [101]:
# Question: Create a NumPy array representing the number of years of experience required for five different data science job listings. 
# Add 1 year to each experience requirement and display the result.
years_of_experience = np.array([1, 2, 3, 4, 5])

years_of_experience_plus_one = years_of_experience + 1

years_of_experience_plus_one


array([2, 3, 4, 5, 6])

In [102]:
# Question: Create a NumPy array representing the number of years of experience required for five different data science job listings.
# Retrieve the experience requirements for the second and third job listings.
import numpy as np

years_of_experience = np.array([1, 2, 3, 4, 5])

second_and_third_elements = years_of_experience[1:3]

second_and_third_elements

array([2, 3])

In [103]:
# Question: Create a NumPy array representing the number of years of experience required for five different data science job listings.
# Retrieve only the job postings that require more than 2 years of experience.
import numpy as np

years_of_experience = np.array([1, 2, 3, 4, 5])

years_of_experience_more_than_2_years = years_of_experience[years_of_experience > 2]

years_of_experience_more_than_2_years

array([3, 4, 5])

In [109]:
# Question: Create a list with 10 random yearly salaries for a Senior Data Analyst job. 
# Convert the list to a NumPy array and calculate the total sum of the elements.
import numpy as np
import random

salary = [random.randint(100_000, 150_000) for num in range(10)]

salary_array = np.array(salary)

total_salary_array = np.sum(salary_array)
total_salary_array

1188385

In [113]:
# Question: Create a list with 10 random yearly salaries for a Senior Data Analyst job. 
# Convert the list to a NumPy array and calculate the mean, median, variance, and standard deviation of the salaries.
import numpy as np
import random

# Create a list with random salaries
salaries = [random.randint(100_000, 150_000) for num in range(10)]

# Convert list to a NumPy array
salaries_array = np.array(salaries)

# Calculate mean, median, variance, and standard deviation
mean_salary = np.mean(salaries_array)
median_salary = np.median(salaries_array)
salary_variance = np.var(salaries_array, ddof=1)
salary_std_dev = np.std(salaries_array, ddof=1)

mean_salary, median_salary, salary_variance, salary_std_dev

(125134.7, 122533.0, 168267439.34444445, 12971.793990980756)

In [115]:
# Question: Create a NumPy array representing salaries, with some values intentionally set to np.nan. 
# Replace all salaries less than 130000 with np.nan.

import numpy as np

# Create an array with some NaN values
salary_with_nan = np.array([123124, np.nan, 145000, 128000, 110000, 149999, np.nan, 135000, 115000, 140000], dtype=float)

# Replace values less than 130000 with np.nan
salary_with_nan[salary_with_nan < 130_000] = np.nan

salary_with_nan

array([    nan,     nan, 145000.,     nan,     nan, 149999.,     nan,
       135000.,     nan, 140000.])

In [116]:
# Question: Generate random values from a normal distribution with a mean of 0 and a standard deviation of 5000.
# Add these values to an array of salaries to simulate salary variations.
import numpy as np

# Example salary array
salary_array = np.array([120000, 130000, 140000, 150000, 160000])

noise = np.random.normal(0, 5_000, salary_array.size)

salary_array_with_noise = salary_array + noise

salary_array_with_noise

array([117055.52700759, 126716.05843948, 136589.86138431, 146419.26452158,
       162027.10695694])