## Introduction to Sets in Python
A set is an unordered collection of unique elements. Sets are used to store multiple items in a single variable and are written with curly brackets `{}`

In [None]:
# Creating sets
fruits = {'apple', 'banana', 'cherry'}
print(f'Fruits set: {fruits}')
# Adding an element to the set
fruits.add('orange')
print(f'After adding an orange: {fruits}')
# Removing an element from the set
fruits.remove('banana')
print(f'After removing banana: {fruits}')

## Set Operations in Python
Python methods for sets

In [7]:
# Set union
A = {1, 2, 3, 4}
B = {3, 4, 5, 6}
union_set = A.union(B)
print(f'Union of A and B: {union_set}')
# Set intersection
intersection_set = A.intersection(B)
print(f'Intersection of A and B: {intersection_set}')
# Set difference
difference_set = A.difference(B)
print(f'Difference of A and B: {difference_set}')
# Set symmetric difference (elements that are in either of the sets, but not in both)
symmetric_difference_set = A.symmetric_difference(B)
print(f'Symmetric difference of A and B: {symmetric_difference_set}')

Union of A and B: {1, 2, 3, 4, 5, 6}
Intersection of A and B: {3, 4}
Difference of A and B: {1, 2}
Symmetric difference of A and B: {1, 2, 5, 6}


## Set Operations in NumPy
NumPy provides functions to perform set operations on arrays. These functions return sorted, unique values. 

In [8]:
import numpy as np
# Creating NumPy arrays
A_np = np.array([1, 2, 3, 4])
B_np = np.array([3, 4, 5, 6])
# Set union in NumPy
union_np = np.union1d(A_np, B_np)
print(f'Union of A_np and B_np: {union_np}')
# Set intersection in NumPy
intersection_np = np.intersect1d(A_np, B_np)
print(f'Intersection of A_np and B_np: {intersection_np}')
# Set difference in NumPy
difference_np = np.setdiff1d(A_np, B_np)
print(f'Difference of A_np and B_np: {difference_np}')
# Set symmetric difference in NumPy
symmetric_difference_np = np.setxor1d(A_np, B_np)
print(f'Symmetric difference of A_np and B_np: {symmetric_difference_np}')

Union of A_np and B_np: [1 2 3 4 5 6]
Intersection of A_np and B_np: [3 4]
Difference of A_np and B_np: [1 2]
Symmetric difference of A_np and B_np: [1 2 5 6]


## Sorting Python and NumPy

### Sorting in Python
Python provides built-in functions and methods to sort lists and other iterable objects. Let's explore some of these methods:

In [9]:
# Creating a list
numbers = [34, 12, 89, 5, 73]
# Using the sorted() function to sort the list
sorted_numbers = sorted(numbers)
print(f'Sorted list using sorted(): {sorted_numbers}')
# Using the sort() method to sort the list in-place
numbers.sort()
print(f'Sorted list using sort(): {numbers}')
# Sorting in descending order
desc_sorted_numbers = sorted(numbers, reverse=True)
print(f'Sorted list in descending order: {desc_sorted_numbers}')

Sorted list using sorted(): [5, 12, 34, 73, 89]
Sorted list using sort(): [5, 12, 34, 73, 89]
Sorted list in descending order: [89, 73, 34, 12, 5]


### Sorting in NumPy
NumPy array sorting functions are efficient and can handle large datasets

In [10]:
# Creating a NumPy array
numbers_np = np.array([34, 12, 89, 5, 73])
# Using the np.sort() function to sort the array
sorted_numbers_np = np.sort(numbers_np)
print(f'Sorted array using np.sort(): {sorted_numbers_np}')
# Sorting in descending order
desc_sorted_numbers_np = -np.sort(-numbers_np)
print(f'Sorted array in descending order: {desc_sorted_numbers_np}')
# Sorting along different axes (for multi-dimensional arrays)
matrix = np.array([[34, 12], [89, 5], [73, 56]])
sorted_matrix = np.sort(matrix, axis=0)
print(f'Sorted matrix along columns:\n{sorted_matrix}')

Sorted array using np.sort(): [ 5 12 34 73 89]
Sorted array in descending order: [89 73 34 12  5]
Sorted matrix along columns:
[[34  5]
 [73 12]
 [89 56]]


In [13]:
arr99 = np.linspace(1, 10, 5)
arr99

array([ 1.  ,  3.25,  5.5 ,  7.75, 10.  ])

## Common Data Science Use Cases for NumPy
NumPy common use cases

### 1. Data Manipulation and Cleaning
NumPy provides a consistent interface to manipulate and clean data : filling missing values, filtering data based on conditions, or transforming data 

In [14]:
# Creating a sample array with missing values (using np.nan)
data = np.array([1, 2, np.nan, 4, 5, np.nan])
# Filling missing values with a placeholder (e.g., 0)
filled_data = np.where(np.isnan(data), 0, data)
print(f'Original data: {data}')
print(f'Filled data: {filled_data}')
# Filtering data based on a condition
filtered_data = data[data > 2]
print(f'Filtered data (values greater than 2): {filtered_data}')

Original data: [ 1.  2. nan  4.  5. nan]
Filled data: [1. 2. 0. 4. 5. 0.]
Filtered data (values greater than 2): [4. 5.]


### 2. Statistical Analysis
NumPy provides a comprehensive set of functions to perform statistical analysis on datasets. Whether it's computing the mean, median, standard deviation, or other statistical measures, NumPy can handle it efficiently. Let's explore some of these functions:

In [15]:
# Creating a sample dataset
dataset = np.array([23, 45, 67, 89, 12, 34, 56, 78, 90, 45])
# Computing mean
mean_value = np.mean(dataset)
print(f'Mean of the dataset: {mean_value}')
# Computing median
median_value = np.median(dataset)
print(f'Median of the dataset: {median_value}')
# Computing standard deviation
std_dev = np.std(dataset)
print(f'Standard Deviation of the dataset: {std_dev}')
# Computing variance
variance = np.var(dataset)
print(f'Variance of the dataset: {variance}')

Mean of the dataset: 53.9
Median of the dataset: 50.5
Standard Deviation of the dataset: 25.60644450133599
Variance of the dataset: 655.69


### 3. Linear Algebra Operations
Linear algebra is a branch of mathematics that deals with vectors, matrices, and the linear transformations between them. NumPy provides a comprehensive set of functions to perform linear algebra operations. Let's explore some of these functions:

In [None]:
# Creating two matrices
A = np.array([[1, 2], [3, 4]])
B = np.array([[2, 0], [0, 2]])
# Matrix multiplication
product = np.dot(A, B)
print(f'Matrix product of A and B:\n{product}')
# Transpose of a matrix
transpose_A = np.transpose(A)
print(f'Transpose of matrix A:\n{transpose_A}')
# Inverse of a matrix
inverse_A = np.linalg.inv(A)
print(f'Inverse of matrix A:\n{inverse_A}')
# Determinant of a matrix
determinant_A = np.linalg.det(A)
print(f'Determinant of matrix A: {determinant_A}')

### 4. Signal Processing
Signal processing involves analyzing, modifying, and synthesizing signals such as sound, images, and scientific measurements. NumPy provides a suite of functions for signal processing tasks. Let's explore some basic operations:

In [None]:
# Creating a simple sine wave signal
t = np.linspace(0, 1, 500, endpoint=False)  # Time values
freq = 5  # Frequency of the sine wave
signal = np.sin(2 * np.pi * freq * t)
# Adding noise to the signal
noisy_signal = signal + 0.5 * np.random.randn(signal.shape[0])
# Fourier Transform to analyze the frequency components
frequencies = np.fft.fftfreq(t.shape[0], d=t[1]-t[0])
fft_values = np.fft.fft(noisy_signal)
# Filtering out frequencies above a threshold to denoise
filtered_fft_values = np.where(abs(frequencies) > 10, 0, fft_values)
filtered_signal = np.fft.ifft(filtered_fft_values)
filtered_signal

In [None]:
# Plotting the signals
plt.figure(figsize=(14, 8))
# Original Signal
plt.subplot(3, 1, 1)
plt.plot(t, signal, label='Original Signal', color='blue')
plt.title('Original Sine Wave Signal')
plt.legend()
# Noisy Signal
plt.subplot(3, 1, 2)
plt.plot(t, noisy_signal, label='Noisy Signal', color='red')
plt.title('Noisy Signal')
plt.legend()
# Filtered (Denoised) Signal
plt.subplot(3, 1, 3)
plt.plot(t, filtered_signal, label='Filtered Signal', color='green')
plt.title('Filtered Signal After Fourier Transform and Filtering')
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
import matplotlib.pyplot as plt

# Plotting the signals
plt.figure(figsize=(14, 8))
# Original Signal
plt.subplot(3, 1, 1)
plt.plot(t, signal, label='Original Signal', color='blue')
plt.title('Original Sine Wave Signal')
plt.legend()
# Noisy Signal
plt.subplot(3, 1, 2)
plt.plot(t, noisy_signal, label='Noisy Signal', color='red')
plt.title('Noisy Signal')
plt.legend()
# Filtered (Denoised) Signal
plt.subplot(3, 1, 3)
plt.plot(t, filtered_signal, label='Filtered Signal', color='green')
plt.title('Filtered Signal After Fourier Transform and Filtering')
plt.legend()
plt.tight_layout()
plt.show()

### 5. Image Processing with NumPy
Image processing is a method to perform operations on an image to enhance it or extract useful information. NumPy, with its powerful array operations, can be used to process images. Let's explore some basic image processing tasks using NumPy:

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
# Loading an example image
image = mpimg.imread('https://via.placeholder.com/150')
# Displaying the original image
plt.imshow(image)
plt.title('Original Image')
plt.axis('off')
plt.show()

In [None]:
import urllib.request
from PIL import Image
# Loading the image using Pillow and converting to NumPy array
with urllib.request.urlopen('https://via.placeholder.com/150') as url:
    image = Image.open(url)
    image_np = np.array(image)
# Displaying the image
plt.imshow(image_np)
plt.title('Original Image')
plt.axis('off')
plt.show()

In [None]:
# Grayscale conversion
def rgb2gray(rgb):
    return np.dot(rgb[...,:3], [0.2989, 0.5870, 0.1140])
grayscale_image = rgb2gray(image_np)
# Displaying the grayscale image
plt.imshow(grayscale_image, cmap='gray')
plt.title('Grayscale Image')
plt.axis('off')
plt.show()

### 6. Statistical Analysis with NumPy


In [None]:
# Generating sample data
np.random.seed(42)  # for reproducibility
data_1 = np.random.normal(50, 10, 1000)  # data with mean=50, std_dev=10, 1000 samples
data_2 = np.random.normal(60, 15, 1000)  # data with mean=60, std_dev=15, 1000 samples
# Displaying the first few values of data_1 and data_2
data_1[:10], data_2[:10]

In [None]:
# Descriptive Statistics
mean_data_1 = np.mean(data_1)
median_data_1 = np.median(data_1)
std_dev_data_1 = np.std(data_1)
variance_data_1 = np.var(data_1)
mean_data_2 = np.mean(data_2)
median_data_2 = np.median(data_2)
std_dev_data_2 = np.std(data_2)
variance_data_2 = np.var(data_2)
# Correlation
correlation_coefficient = np.corrcoef(data_1, data_2)[0, 1]
# Data Distribution Visualization
plt.figure(figsize=(12, 6))
plt.hist(data_1, bins=50, alpha=0.5, label='data_1')
plt.hist(data_2, bins=50, alpha=0.5, label='data_2')
plt.title('Histogram of data_1 and data_2')
plt.xlabel('Value')
plt.ylabel('Frequency')
plt.legend()
plt.show()
# Hypothesis Testing
from scipy.stats import ttest_ind
t_stat, p_value = ttest_ind(data_1, data_2)
mean_data_1, median_data_1, std_dev_data_1, variance_data_1, mean_data_2, median_data_2, std_dev_data_2, variance_data_2, correlation_coefficient, t_stat, p_value