# Numpy and Pandas Methods

In [1]:
import numpy as np
import pandas as pd

## Numpy

1. np.array()  
Creates an ndarray object

In [2]:
np.array([(1,3,5,7,9),(2,4,6,8,10)])

array([[ 1,  3,  5,  7,  9],
       [ 2,  4,  6,  8, 10]])

2. np.arange()  
Creates an array of evenly spaced values within a given interval

In [3]:
np.arange(0,100,10)

array([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])

3. np.linspace()  
Creates an array of evenly spaced values, but unlike np.arange() that allows us optionally specify the stepsize, here we specify the number of values we want in the sequece. 

In [4]:
np.linspace(0,100,10)

array([  0.        ,  11.11111111,  22.22222222,  33.33333333,
        44.44444444,  55.55555556,  66.66666667,  77.77777778,
        88.88888889, 100.        ])

4. np.random.rand()  
Returns n unique numbers from a uniform distribution(between 0 and 1)

In [5]:
np.random.rand(5)

array([0.21676479, 0.96752286, 0.86962485, 0.61511461, 0.18490571])

5. np.random.randint()  
Returns n(if the last argument is specified) random integers between a particular range

In [6]:
np.random.randint(1,10,3)

array([2, 4, 5])

6. np.random.shuffle()  
Modifies the sequence of an array by shuffling its elements

In [7]:
arr = np.arange(0,50,7)
np.random.shuffle(arr)
arr

array([ 0, 28, 42, 21, 49,  7, 35, 14])

7. np.sort()  
Sorts elements in an array in ascending order  

In [8]:
arr = np.array([4,6,2,5,12,8,7])
np.sort(arr)

array([ 2,  4,  5,  6,  7,  8, 12])

8. np.unique()  
Returns the unique elements in an array

In [9]:
arr = np.array([1,2,2,3,2,5,4,7,7,9,10])
np.unique(arr)

array([ 1,  2,  3,  4,  5,  7,  9, 10])

9. np.full()  
Returns an nxn matrix filled with a specified value



In [10]:
np.full(fill_value=2, shape=(3,3))

array([[2, 2, 2],
       [2, 2, 2],
       [2, 2, 2]])

10. np.identity()  
creates an identity matrix of the specified direction



In [11]:
np.identity(4)

array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.]])

11. np.transpose()    
Transposes an array

In [12]:
arr = np.random.randint(6, size=(2,3))
np.transpose(arr)

array([[2, 0],
       [3, 5],
       [1, 5]])

12. np.round()  
Rounds elements in an array to a specified number of decimals

In [13]:
arr = np.random.rand(6)
np.round(arr, 3)

array([0.165, 0.552, 0.282, 0.157, 0.345, 0.962])

13. np.concatenate()  
Joins arrays along a specified axis

In [14]:
arr = np.array([(2,4,5), (8,9,10)])
arr2 = np.array([(1,3,6), (11,12,13)])
np.concatenate((arr, arr2), axis=0)

array([[ 2,  4,  5],
       [ 8,  9, 10],
       [ 1,  3,  6],
       [11, 12, 13]])

14. np.sum()  
Returns the seum of elements in an array

In [15]:
arr = np.array([1,3,5])
np.sum(arr)

9

15. np.mean()  
Returns the mean of elements in an array

In [16]:
arr = np.array([2,4,6,8,10])
np.mean(arr)

6.0

16. np.median()  
Returns the median of elements in an array

In [17]:
arr = np.array([1,3,5,2,4,6,8,10])
np.median(arr)

4.5

17. np.std()  
Returns the standard deviation of elements in an array

In [18]:
np.std(arr)

2.8476964374736293

18. np.var()  
Returns the variance of elements in an array

In [19]:
np.var(arr)

8.109375

19. np.count_nonzero()  
Returns the count of non zero elements in an array

In [20]:
arr = np.identity(4)
np.count_nonzero(arr)

4

20. np.absolute()  
Returns the absolute value of elements in an array

In [21]:
arr = np.arange(-10,0)
np.absolute(arr)

array([10,  9,  8,  7,  6,  5,  4,  3,  2,  1])

## Pandas

1. pd.Series()  
Creates a Pandas series object

In [22]:
pd.Series([2,4,6,8,10])

0     2
1     4
2     6
3     8
4    10
dtype: int64

2. pd.DataFrame()  
Creates a Pandas dataframe object

In [23]:
names_dict = {"Name": ["Dapo", "Doyin", "Esther", "Chisom", "Ebuka"], "Gender": ["Male", "Female", "Female", "Female", "Male"], "Age" : [17, 15, 18, 16, 15]}
df = pd.DataFrame(names_dict)
df

Unnamed: 0,Name,Gender,Age
0,Dapo,Male,17
1,Doyin,Female,15
2,Esther,Female,18
3,Chisom,Female,16
4,Ebuka,Male,15


3. pd.read_csv()  
Loads a CSV file into a Pandas dataframe

In [24]:
df2 = pd.read_csv(r"chord-progressions.csv", index_col=False)
df2

Unnamed: 0,1st chord,2nd chord,3rd chord,4th chord,Progression Quality
0,6,4,1,5,Alternative
1,4,4,1,5,Catchy
2,1,1,1,1,Didgeridoo
3,1,6,4,5,Dreadful
4,1,6,2,5,Dreadful
5,1,6,2,4,Endless
6,1,3,4,6,Energetic
7,1,5,1,4,Folk
8,1,6,1,4,Folk
9,6,5,4,3,Flamenco


4. pd.Dataframe.head()  
Returns the first n rows of a Pandas Dataframe, default is 5

In [25]:
df2.head()

Unnamed: 0,1st chord,2nd chord,3rd chord,4th chord,Progression Quality
0,6,4,1,5,Alternative
1,4,4,1,5,Catchy
2,1,1,1,1,Didgeridoo
3,1,6,4,5,Dreadful
4,1,6,2,5,Dreadful


5. pd.Dataframe.tail()  
Returns the last n rows of a Pandas Dataframe, default is 5

In [26]:
df2.tail()

Unnamed: 0,1st chord,2nd chord,3rd chord,4th chord,Progression Quality
23,1,5,5,1,Simple
24,1,4,1,4,Wildside
25,1,1,4,6,Wistful
26,2,1,5,7,Moody
27,2,1,7,6,Moody


6. pd.Dataframe.isna()  
Returns booleans of missing values in the dataframe, False if it is not a missing value, True if it is

In [27]:
df2.isna()

Unnamed: 0,1st chord,2nd chord,3rd chord,4th chord,Progression Quality
0,False,False,False,False,False
1,False,False,False,False,False
2,False,False,False,False,False
3,False,False,False,False,False
4,False,False,False,False,False
5,False,False,False,False,False
6,False,False,False,False,False
7,False,False,False,False,False
8,False,False,False,False,False
9,False,False,False,False,False


7. pd.Dataframe.duplicated()  
Return booleans of duplicated rows in a dataframe


In [28]:
df2.duplicated()

0     False
1     False
2     False
3     False
4     False
5     False
6     False
7     False
8     False
9     False
10    False
11    False
12    False
13    False
14    False
15    False
16    False
17    False
18    False
19    False
20    False
21    False
22    False
23    False
24    False
25    False
26    False
27    False
dtype: bool

8. pd.Series.value_counts()  
Returns the count of each unique value in a Pandas series

In [29]:
df2["1st chord"].value_counts()

1    19
6     3
2     3
4     2
5     1
Name: 1st chord, dtype: int64

9. pd.Series.nunique()  
Returns the number of unique values in a Pandas series

In [30]:
df2["2nd chord"].nunique()

5

10. pd.DataFrame.describe()  
Returns a dataframe showing the statistical summmary of columns in the dataframe

In [31]:
df2.describe()

Unnamed: 0,1st chord,2nd chord,3rd chord,4th chord
count,28.0,28.0,28.0,28.0
mean,2.0,4.035714,3.142857,4.5
std,1.763834,1.643892,1.938199,1.575272
min,1.0,1.0,1.0,1.0
25%,1.0,4.0,1.0,4.0
50%,1.0,4.0,3.5,5.0
75%,2.0,5.0,5.0,5.25
max,6.0,6.0,7.0,7.0


11. df2.rename()
Renames columns of a dataframe

In [32]:
df2.rename(columns={"1st chord":"1st_chord", "2nd chord":"2nd_chord", "3rd chord":"3rd chord", "4th chord":"4th_chord",
                   "Progression Quality":"Progression_quality"}, inplace=True)
df2

Unnamed: 0,1st_chord,2nd_chord,3rd chord,4th_chord,Progression_quality
0,6,4,1,5,Alternative
1,4,4,1,5,Catchy
2,1,1,1,1,Didgeridoo
3,1,6,4,5,Dreadful
4,1,6,2,5,Dreadful
5,1,6,2,4,Endless
6,1,3,4,6,Energetic
7,1,5,1,4,Folk
8,1,6,1,4,Folk
9,6,5,4,3,Flamenco


12. pd.DataFrame.query()  
It is used to filter using a boolean expression

In [33]:
df2.query("Progression_quality == 'Sad'")

Unnamed: 0,1st_chord,2nd_chord,3rd chord,4th_chord,Progression_quality
18,1,4,5,5,Sad
19,1,5,4,4,Sad
20,1,4,5,4,Sad


13. pd.DataFrame.info()  
Returns a concise summary of the dataframe

In [34]:
df2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 28 entries, 0 to 27
Data columns (total 5 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   1st_chord            28 non-null     int64 
 1   2nd_chord            28 non-null     int64 
 2   3rd chord            28 non-null     int64 
 3   4th_chord            28 non-null     int64 
 4   Progression_quality  28 non-null     object
dtypes: int64(4), object(1)
memory usage: 1.2+ KB


14. pd.DataFrame.memory_usage()  
Returns how much memory each column uses

In [35]:
df2.memory_usage()

Index                  128
1st_chord              224
2nd_chord              224
3rd chord              224
4th_chord              224
Progression_quality    224
dtype: int64

15. pd.DataFrame.groupby()  
Groups the dataframe based on a column/columns in order to calculate aggregrate operations

In [36]:
df2.groupby("1st_chord").sum()

Unnamed: 0_level_0,2nd_chord,3rd chord,4th_chord
1st_chord,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,83,58,83
2,7,13,19
4,5,5,10
5,4,1,1
6,14,11,13


16. pd.DataFrame.sort_values()  
Sorts the dataframe by a specified column in the order specified

In [37]:
df2.sort_values(by="1st_chord", ascending=False)

Unnamed: 0,1st_chord,2nd_chord,3rd chord,4th_chord,Progression_quality
0,6,4,1,5,Alternative
9,6,5,4,3,Flamenco
10,6,5,6,5,Flamenco
21,5,4,1,1,Sweet
17,4,1,4,5,Rebellious
1,4,4,1,5,Catchy
26,2,1,5,7,Moody
12,2,5,1,6,Jazz
27,2,1,7,6,Moody
8,1,6,1,4,Folk


17. pd.DataFrame.insert()  
Inserts a column in the dataframe in a specified position

In [38]:
new_col = np.random.randint(1,7,28)
df2.insert(4, "5th_chord", new_col)
df2

Unnamed: 0,1st_chord,2nd_chord,3rd chord,4th_chord,5th_chord,Progression_quality
0,6,4,1,5,2,Alternative
1,4,4,1,5,1,Catchy
2,1,1,1,1,1,Didgeridoo
3,1,6,4,5,5,Dreadful
4,1,6,2,5,3,Dreadful
5,1,6,2,4,3,Endless
6,1,3,4,6,4,Energetic
7,1,5,1,4,6,Folk
8,1,6,1,4,4,Folk
9,6,5,4,3,2,Flamenco


18. pd.DataFrame.drop()
Drops specified rows/columns

In [39]:
df2.drop(columns=["5th_chord"], axis=1, inplace=True)

In [40]:
df2

Unnamed: 0,1st_chord,2nd_chord,3rd chord,4th_chord,Progression_quality
0,6,4,1,5,Alternative
1,4,4,1,5,Catchy
2,1,1,1,1,Didgeridoo
3,1,6,4,5,Dreadful
4,1,6,2,5,Dreadful
5,1,6,2,4,Endless
6,1,3,4,6,Energetic
7,1,5,1,4,Folk
8,1,6,1,4,Folk
9,6,5,4,3,Flamenco


19. pd.concat()  
Concatenates dataframes along a specified axis, default is row-wise

In [41]:
df_sad = df2.query("Progression_quality == 'Sad'")
df_simple = df2.query("Progression_quality == 'Simple'")
df3 = pd.concat([df_sad, df_simple])
df3

Unnamed: 0,1st_chord,2nd_chord,3rd chord,4th_chord,Progression_quality
18,1,4,5,5,Sad
19,1,5,4,4,Sad
20,1,4,5,4,Sad
22,1,4,1,4,Simple
23,1,5,5,1,Simple


20. pd.Series.astype()  
Changes the datatype of the column to another datatype

In [42]:
df2["1st_chord"] = df2["1st_chord"].astype("float")
df2.dtypes

1st_chord              float64
2nd_chord                int64
3rd chord                int64
4th_chord                int64
Progression_quality     object
dtype: object