In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
list_1 = [11, 22, 33, 44]
s_1 = pd.Series(list_1)
s_1

0    11
1    22
2    33
3    44
dtype: int64

In [3]:
n_1 = np.array(list_1)
n_1

array([11, 22, 33, 44])

In [4]:
s_2 = pd.Series(n_1)
s_2

0    11
1    22
2    33
3    44
dtype: int64

In [5]:
type(s_2)

pandas.core.series.Series

In [6]:
s_2.values  # ndarray with one dimension

array([11, 22, 33, 44])

In [7]:
s_2.index

RangeIndex(start=0, stop=4, step=1)

In [8]:
# s_2.index = ["a", "b", "c", "d"]
s_2.index = list("BCDE")
s_2

B    11
C    22
D    33
E    44
dtype: int64

In [9]:
s_2.B, s_2.C, s_2.D

(11, 22, 33)

In [10]:
d = {"a":11, "b":22, "c":33, "d":44}
s_3 = pd.Series(d)
s_3

a    11
b    22
c    33
d    44
dtype: int64

In [11]:
s = pd.Series({"python":150, "numpy":130, "pandas":130})
s

python    150
numpy     130
pandas    130
dtype: int64

In [12]:
s[["python", "numpy"]]

python    150
numpy     130
dtype: int64

In [13]:
s.loc[["python"]]

python    150
dtype: int64

In [14]:
s[2]

130

In [15]:
s = pd.Series({"Chinese":120, "Math":150, "English":140, "Python":199})
s

Chinese    120
Math       150
English    140
Python     199
dtype: int64

In [16]:
s[1:3]

Math       150
English    140
dtype: int64

In [17]:
s.iloc[1:4]

Math       150
English    140
Python     199
dtype: int64

In [18]:
# s["Math":"Python"]
s.loc["Math":"Python"]

Math       150
English    140
Python     199
dtype: int64

In [19]:
s.shape

(4,)

In [20]:
s.size

4

In [21]:
s.index

Index(['Chinese', 'Math', 'English', 'Python'], dtype='object')

In [22]:
s.values

array([120, 150, 140, 199])

In [23]:
s = pd.Series(["a", "bb", "ccc", np.nan])
s

0      a
1     bb
2    ccc
3    NaN
dtype: object

In [24]:
s.isnull()

0    False
1    False
2    False
3     True
dtype: bool

In [25]:
pd.isnull(s)

0    False
1    False
2    False
3     True
dtype: bool

In [26]:
s.notnull()

0     True
1     True
2     True
3    False
dtype: bool

In [27]:
pd.notnull(s)

0     True
1     True
2     True
3    False
dtype: bool

In [28]:
cond1 = s.isnull()
# cond1
s = s[~cond1]
s

0      a
1     bb
2    ccc
dtype: object

In [29]:
s = pd.Series(np.random.randint(10, 100, size=10))
s+100

0    172
1    178
2    148
3    114
4    172
5    165
6    182
7    174
8    134
9    150
dtype: int64

In [30]:
s_1 = pd.Series(np.random.randint(10, 100, size=5))
s_2 = pd.Series(np.random.randint(10, 100, size=4))
s_1 + s_2  # according to the index!

0    133.0
1     79.0
2    149.0
3    106.0
4      NaN
dtype: float64

In [31]:
d = {
    "name":["a", "b", "c"],
    "age":[1, 2, 3],
}
df = pd.DataFrame(d)
df

Unnamed: 0,name,age
0,a,1
1,b,2
2,c,3


In [32]:
df.values

array([['a', 1],
       ['b', 2],
       ['c', 3]], dtype=object)

In [33]:
df.columns

Index(['name', 'age'], dtype='object')

In [34]:
df.index

RangeIndex(start=0, stop=3, step=1)

In [35]:
df.shape

(3, 2)

In [36]:
df.index = list("ABC")
df

Unnamed: 0,name,age
A,a,1
B,b,2
C,c,3


In [37]:
d = {
    "name":["a", "b", "c"],
    "age":[1, 2, 3],
}
df = pd.DataFrame(d, index=list("ABC"))
df

Unnamed: 0,name,age
A,a,1
B,b,2
C,c,3


In [38]:
# df.A  # ERROR 

In [39]:
df.name

A    a
B    b
C    c
Name: name, dtype: object

In [40]:
df.loc["A"]

name    a
age     1
Name: A, dtype: object

In [41]:
df.iloc[2]

name    c
age     3
Name: C, dtype: object

In [42]:
df.name[0]

'a'

In [43]:
df.iloc[0][0]

'a'

In [44]:
# df[:2]
df["A":"B"]

Unnamed: 0,name,age
A,a,1
B,b,2


In [45]:
df.iloc[0:2]

Unnamed: 0,name,age
A,a,1
B,b,2


In [46]:
df.iloc[:, 1:2]

Unnamed: 0,age
A,1
B,2
C,3


In [47]:
df1 = pd.DataFrame(data=np.random.randint(0, 100, size=(3, 3)), 
                   index=["Student A", "Student B", "Student C"], columns=["Chinese", "Math", "English"])

df2 = pd.DataFrame(data=np.random.randint(0, 100, size=(3, 3)), 
                   index=["Student A", "Student B", "Student C"], columns=["Chinese", "Math", "English"])

In [48]:
display(df1, df2)

Unnamed: 0,Chinese,Math,English
Student A,19,20,50
Student B,95,15,50
Student C,73,52,92


Unnamed: 0,Chinese,Math,English
Student A,26,14,90
Student B,61,24,26
Student C,85,67,78


In [49]:
df1 + df2

Unnamed: 0,Chinese,Math,English
Student A,45,34,140
Student B,156,39,76
Student C,158,119,170


In [50]:
s = pd.Series([100, 10, 1], index=df1.columns)
display(df1, s)

Unnamed: 0,Chinese,Math,English
Student A,19,20,50
Student B,95,15,50
Student C,73,52,92


Chinese    100
Math        10
English      1
dtype: int64

In [51]:
df1 + s

Unnamed: 0,Chinese,Math,English
Student A,119,30,51
Student B,195,25,51
Student C,173,62,93


In [52]:
df1.add(s, axis="columns")

Unnamed: 0,Chinese,Math,English
Student A,119,30,51
Student B,195,25,51
Student C,173,62,93


In [53]:
# df1.add(s, axis=0)  # ERROR 

In [54]:
df1.add(s, axis=1)

Unnamed: 0,Chinese,Math,English
Student A,119,30,51
Student B,195,25,51
Student C,173,62,93


In [55]:
s_2 = pd.Series([100, 10, 1], index=df1.index)
s_2

Student A    100
Student B     10
Student C      1
dtype: int64

In [56]:
df1.add(s_2, axis=0)

Unnamed: 0,Chinese,Math,English
Student A,119,120,150
Student B,105,25,60
Student C,74,53,93


In [57]:
import pandas as pd
import numpy as np

In [58]:
data = np.random.randint(0, 100, size=(6, 6))
index = [
    ["class 1", "class 1", "class 1", "class 2", "class 2", "class 2"],
    ["Student A", "Student B", "Student C", "Student D", "Student E", "Student F"]
]

columns = [
    ["term1", "term1", "term1", "term2", "term2", "term2"],
    ["Chinese", "Math", "English", "Chinese", "Math", "English"]
]

df = pd.DataFrame(data=data, index=index, columns=columns)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,term1,term1,term1,term2,term2,term2
Unnamed: 0_level_1,Unnamed: 1_level_1,Chinese,Math,English,Chinese,Math,English
class 1,Student A,41,89,71,40,65,94
class 1,Student B,68,87,45,9,24,5
class 1,Student C,20,98,52,83,47,90
class 2,Student D,4,84,33,64,66,18
class 2,Student E,88,1,90,0,13,23
class 2,Student F,24,90,34,36,99,74


In [59]:
data = np.random.randint(0, 100, size=6)
index = [
    ["class 1", "class 1", "class 1", "class 2", "class 2", "class 2"],
    ["Student A", "Student B", "Student C", "Student D", "Student E", "Student F"]
]
s = pd.Series(data=data, index=index)
s

class 1  Student A    11
         Student B    56
         Student C    39
class 2  Student D    19
         Student E    92
         Student F    38
dtype: int64

In [60]:
data = np.random.randint(0, 100, size=(6, 6))

index = pd.MultiIndex.from_product([
    ["class 1", "class 2"],
    ["Student A", "Student B", "Student C"]
])

columns = [
    ["term1", "term1", "term1", "term2", "term2", "term2"],
    ["Chinese", "Math", "English", "Chinese", "Math", "English"]
]

df = pd.DataFrame(data=data, index=index, columns=columns)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,term1,term1,term1,term2,term2,term2
Unnamed: 0_level_1,Unnamed: 1_level_1,Chinese,Math,English,Chinese,Math,English
class 1,Student A,99,95,33,84,3,31
class 1,Student B,54,24,29,43,75,24
class 1,Student C,26,43,35,33,65,78
class 2,Student A,23,42,61,26,62,38
class 2,Student B,41,87,86,37,63,52
class 2,Student C,33,99,64,22,67,6


In [61]:
df = df.loc["class 1", "term1"]
df

Unnamed: 0,Chinese,Math,English
Student A,99,95,33
Student B,54,24,29
Student C,26,43,35


In [62]:
df.sum(axis=1)

Student A    227
Student B    107
Student C    104
dtype: int64

In [63]:
index = ["a", "b", "c", "d"]
columns = ["Python", "Java", "H5", "NLP"]

data = np.random.randint(0, 100, size=(4, 4))
df = pd.DataFrame(data=data, index=index, columns=columns)
df

Unnamed: 0,Python,Java,H5,NLP
a,72,64,8,81
b,46,38,47,76
c,26,13,80,55
d,86,34,62,56


In [64]:
df.replace({6:66})

Unnamed: 0,Python,Java,H5,NLP
a,72,64,8,81
b,46,38,47,76
c,26,13,80,55
d,86,34,62,56


In [65]:
df2 = df.copy()
df2

Unnamed: 0,Python,Java,H5,NLP
a,72,64,8,81
b,46,38,47,76
c,26,13,80,55
d,86,34,62,56


In [66]:
df2["Python"].map({64:640})

a   NaN
b   NaN
c   NaN
d   NaN
Name: Python, dtype: float64

In [67]:
df2["Python"].map(lambda x: x*10)

a    720
b    460
c    260
d    860
Name: Python, dtype: int64

In [68]:
df2["Pandas"] = df2["Python"].map(lambda x: x*10)

In [69]:
df2["Java OK?"] = df2["Java"].map(lambda x: x>10)
df2

Unnamed: 0,Python,Java,H5,NLP,Pandas,Java OK?
a,72,64,8,81,720,True
b,46,38,47,76,460,True
c,26,13,80,55,260,True
d,86,34,62,56,860,True


In [70]:
df = pd.DataFrame(data=np.random.randint(0, 10, size=(5, 3)), index=list("ABCED"), columns=["Python", "Numpy", "Pandas]"])

In [71]:
df

Unnamed: 0,Python,Numpy,Pandas]
A,5,5,1
B,2,1,2
C,9,3,1
E,4,2,6
D,1,3,1


In [72]:
df["Python"].apply(lambda x: True if x>5 else False)

A    False
B    False
C     True
E    False
D    False
Name: Python, dtype: bool

In [73]:
df.apply(lambda x: x.mean())

Python     4.2
Numpy      2.8
Pandas]    2.2
dtype: float64

In [74]:
def fn2(x):
    return x.mean()

In [75]:
df.apply(fn2, axis=1)

A    3.666667
B    1.666667
C    4.333333
E    4.000000
D    1.666667
dtype: float64

In [76]:
df

Unnamed: 0,Python,Numpy,Pandas]
A,5,5,1
B,2,1,2
C,9,3,1
E,4,2,6
D,1,3,1


In [77]:
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Python,5.0,4.2,3.114482,1.0,2.0,4.0,5.0,9.0
Numpy,5.0,2.8,1.48324,1.0,2.0,3.0,3.0,5.0
Pandas],5.0,2.2,2.167948,1.0,1.0,1.0,2.0,6.0


In [79]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, A to D
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype
---  ------   --------------  -----
 0   Python   5 non-null      int64
 1   Numpy    5 non-null      int64
 2   Pandas]  5 non-null      int64
dtypes: int64(3)
memory usage: 332.0+ bytes


In [80]:
df

Unnamed: 0,Python,Numpy,Pandas]
A,5,5,1
B,2,1,2
C,9,3,1
E,4,2,6
D,1,3,1


In [81]:
df.take([0, 2, 1])

Unnamed: 0,Python,Numpy,Pandas]
A,5,5,1
C,9,3,1
B,2,1,2


In [82]:
df.take([0, 2, 1], axis=1)

Unnamed: 0,Python,Pandas],Numpy
A,5,1,5
B,2,2,1
C,9,1,3
E,4,6,2
D,1,1,3
