# Broadcasting Example

In [19]:
import numpy as np

## Broadcasting 

In [20]:
A = np.array([[56.0, 0.0, 4.4, 68.0],
             [1.2, 104.0, 52.0, 8.0],
             [1.8,135.0,99.0,0.9]])
print(A)

[[ 56.    0.    4.4  68. ]
 [  1.2 104.   52.    8. ]
 [  1.8 135.   99.    0.9]]


In [21]:
cal_sum = A.sum(axis=0)
print(cal_sum)

[ 59.  239.  155.4  76.9]


In [22]:
percentage = 100 * A / cal_sum
print(percentage)

[[94.91525424  0.          2.83140283 88.42652796]
 [ 2.03389831 43.51464435 33.46203346 10.40312094]
 [ 3.05084746 56.48535565 63.70656371  1.17035111]]


In [23]:
print(A.shape)

(3, 4)


In [24]:
print(cal_sum.shape)

(4,)


## Under the hood

In [25]:
cal_sum_ = cal_sum.reshape(1,4).repeat(3, axis=0)
print(cal_sum_)

[[ 59.  239.  155.4  76.9]
 [ 59.  239.  155.4  76.9]
 [ 59.  239.  155.4  76.9]]


In [26]:
percentage_ = 100 * A / cal_sum_
print(percentage_)

[[94.91525424  0.          2.83140283 88.42652796]
 [ 2.03389831 43.51464435 33.46203346 10.40312094]
 [ 3.05084746 56.48535565 63.70656371  1.17035111]]


In [27]:
percentage - percentage_

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

------------
# Itertools

In [28]:
import itertools

## cycle

In [29]:
# Create an iterator that cycles through the list [1, 2, 3]
cycler = itertools.cycle([1, 2, 3])

# Generate the first 10 elements of the cycle
print("Cycle Example:")
for i, element in enumerate(cycler):
    if i == 10:  # Stop after 10 iterations
        break
    print(element, end=" ")  # Output: 1 2 3 1 2 3 1 2 3 1

Cycle Example:
1 2 3 1 2 3 1 2 3 1 

## repeat

In [30]:
# Repeat the number 4, five times
repeater = itertools.repeat(4, 5)

print("\nRepeat Example:")
for element in repeater:
    print(element, end=" ")  # Output: 4 4 4 4 4


Repeat Example:
4 4 4 4 4 

## chain

In [31]:
# Chain three lists together
chainer = itertools.chain([1, 2, 3], ['a', 'b', 'c'], [4.0, 5.0, 6.0])

print("\nChain Example:")
for element in chainer:
    print(element, end=" ")  # Output: 1 2 3 a b c 4.0 5.0 6.0


Chain Example:
1 2 3 a b c 4.0 5.0 6.0 

## combination

In [32]:
lst = [1, 2, 3]
comb = list(itertools.combinations(lst, 2))
print(comb)  # Output: [(1, 2), (1, 3), (2, 3)]

[(1, 2), (1, 3), (2, 3)]


----------
# Pandas

In [33]:
import pandas as pd

## Series & DataFrame

### series

In [34]:
import pandas as pd

# Create a Series from a list
s = pd.Series([1, 2, 3, 4, 5])

# Create a Series with custom index
s_with_index = pd.Series([1, 2, 3], index=['a', 'b', 'c'])

In [35]:
s

0    1
1    2
2    3
3    4
4    5
dtype: int64

In [36]:
s_with_index

a    1
b    2
c    3
dtype: int64

### DataFrame

In [37]:
# Create a DataFrame from a dictionary
df = pd.DataFrame({
    'A': [1, 2, 3],
    'B': [4, 5, 6],
    'C': ['p', 'q', 'r']
})

# Create a DataFrame from multiple Series
s1 = pd.Series([1, 2, 3])
s2 = pd.Series([4, 5, 6])
df_from_series = pd.DataFrame({'Column1': s1, 'Column2': s2})

In [38]:
df_from_series

Unnamed: 0,Column1,Column2
0,1,4
1,2,5
2,3,6


----------
## NumPy and Pandas

## Easy Creation by Converting

In [40]:
# Creating a NumPy array
numpy_array = np.array([[1, 2, 3], [4, 5, 6]])

# Convert to a DataFrame
df_from_array = pd.DataFrame(numpy_array, columns=['A', 'B', 'C'])

# Show the underlying NumPy array from DataFrame
print(df_from_array.values)

[[1 2 3]
 [4 5 6]]


## Datatype

In [39]:
# Creating a DataFrame with mixed types
df = pd.DataFrame({'A': [1, 2, 3], 'B': [1.2, 3.4, 4.5], 'C': ['a', 'b', 'c']})

# Check the data types; these map to NumPy dtypes
print(df.dtypes)

A      int64
B    float64
C     object
dtype: object


In [46]:
df

Unnamed: 0,A,B,C
0,2,1.2,a
1,3,3.4,b
2,4,4.5,c


## Advanced Indexing

In [42]:
# NumPy boolean indexing
numpy_array = np.array([1, 2, 3, 4, 5])
filtered_array = numpy_array[numpy_array > 2]
print(filtered_array)  # Output: [3 4 5]

[3 4 5]


In [43]:
# Pandas boolean indexing
s = pd.Series([1, 2, 3, 4, 5])
filtered_series = s[s > 2]
print(filtered_series)  # Output: 2    3, 3    4, 4    5

2    3
3    4
4    5
dtype: int64


In [44]:
# Using NumPy logical_and for compound filtering
filtered_series = s[np.logical_and(s > 2, s < 5)]
print(filtered_series)  # Output: 2    3, 3    4

2    3
3    4
dtype: int64


## Broadcasting

In [47]:
df

Unnamed: 0,A,B,C
0,2,1.2,a
1,3,3.4,b
2,4,4.5,c


In [45]:
# Adding a single number to a whole NumPy array
numpy_array += 1

# Adding a single number to a whole DataFrame column
df['A'] += 1

In [48]:
df

Unnamed: 0,A,B,C
0,2,1.2,a
1,3,3.4,b
2,4,4.5,c


## Universal Functions

In [49]:
# Using NumPy sqrt function on a Pandas Series
s_sqrt = np.sqrt(s)
print(s_sqrt)

0    1.000000
1    1.414214
2    1.732051
3    2.000000
4    2.236068
dtype: float64


---------
# Basic Operations

## Create a CSV

In [50]:
# Creating a DataFrame from a dictionary
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David'],
    'Age': [24, 27, 22, 32],
    'Pet': ['Cat', 'Dog', 'Fish', 'Bird']
}

df = pd.DataFrame(data)

# Saving the DataFrame to a CSV file
df.to_csv('people_pets.csv', index=False)

In [51]:
df

Unnamed: 0,Name,Age,Pet
0,Alice,24,Cat
1,Bob,27,Dog
2,Charlie,22,Fish
3,David,32,Bird


## Reading the CSV and get info

In [52]:
# Reading the CSV file into a DataFrame
df = pd.read_csv('people_pets.csv')

# Show the first few rows of the DataFrame
print("First few rows:\n", df.head())

# Show the last few rows of the DataFrame
print("\nLast few rows:\n", df.tail())

# Statistical summary of numerical columns
print("\nStatistical Summary:\n", df.describe())

# Information about the DataFrame including data types and memory usage
print("\nDataFrame Info:")
print(df.info())


First few rows:
       Name  Age   Pet
0    Alice   24   Cat
1      Bob   27   Dog
2  Charlie   22  Fish
3    David   32  Bird

Last few rows:
       Name  Age   Pet
0    Alice   24   Cat
1      Bob   27   Dog
2  Charlie   22  Fish
3    David   32  Bird

Statistical Summary:
              Age
count   4.000000
mean   26.250000
std     4.349329
min    22.000000
25%    23.500000
50%    25.500000
75%    28.250000
max    32.000000

DataFrame Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Name    4 non-null      object
 1   Age     4 non-null      int64 
 2   Pet     4 non-null      object
dtypes: int64(1), object(2)
memory usage: 224.0+ bytes
None
