### Import Pandas

In [1]:
import pandas as pd

### Make DataFrame

In [2]:
data = {
    'Name': ['John', 'Alice', 'Bob', 'Emily', 'David'],
    'Math': [85, 78, 92, 88, 79],
    'Science': [90, 82, 95, 79, 88],
    'History': [78, 85, 88, 92, 76]
}

In [3]:
df = pd.DataFrame(data)
display(data)

{'Name': ['John', 'Alice', 'Bob', 'Emily', 'David'],
 'Math': [85, 78, 92, 88, 79],
 'Science': [90, 82, 95, 79, 88],
 'History': [78, 85, 88, 92, 76]}

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   Name     5 non-null      object
 1   Math     5 non-null      int64 
 2   Science  5 non-null      int64 
 3   History  5 non-null      int64 
dtypes: int64(3), object(1)
memory usage: 288.0+ bytes


### Try various function

In [6]:
subjects = df.columns[1:]
subjects

Index(['Math', 'Science', 'History'], dtype='object')

In [12]:
for subject in subjects:
    print(subject)
    print(df[subject])
    print(f'{subject} 평균 : {df[subject].mean()}')
    print()

Math
0    85
1    78
2    92
3    88
4    79
Name: Math, dtype: int64
Math 평균 : 84.4

Science
0    90
1    82
2    95
3    79
4    88
Name: Science, dtype: int64
Science 평균 : 86.8

History
0    78
1    85
2    88
3    92
4    76
Name: History, dtype: int64
History 평균 : 83.8



In [13]:
import numpy as np

In [14]:
df['English'] = [99, np.nan, 93, np.nan, 87]

In [15]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 5 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   Name     5 non-null      object 
 1   Math     5 non-null      int64  
 2   Science  5 non-null      int64  
 3   History  5 non-null      int64  
 4   English  3 non-null      float64
dtypes: float64(1), int64(3), object(1)
memory usage: 328.0+ bytes


In [16]:
display(df)

Unnamed: 0,Name,Math,Science,History,English
0,John,85,90,78,99.0
1,Alice,78,82,85,
2,Bob,92,95,88,93.0
3,Emily,88,79,92,
4,David,79,88,76,87.0


In [17]:
df.isna()

Unnamed: 0,Name,Math,Science,History,English
0,False,False,False,False,False
1,False,False,False,False,True
2,False,False,False,False,False
3,False,False,False,False,True
4,False,False,False,False,False


In [19]:
df.isna().sum()

Name       0
Math       0
Science    0
History    0
English    2
dtype: int64

In [20]:
df['English'].fillna(0, inplace = True)
display(df)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['English'].fillna(0, inplace = True)


Unnamed: 0,Name,Math,Science,History,English
0,John,85,90,78,99.0
1,Alice,78,82,85,0.0
2,Bob,92,95,88,93.0
3,Emily,88,79,92,0.0
4,David,79,88,76,87.0


In [21]:
df['English'] = df['English'].astype('int64')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 5 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   Name     5 non-null      object
 1   Math     5 non-null      int64 
 2   Science  5 non-null      int64 
 3   History  5 non-null      int64 
 4   English  5 non-null      int64 
dtypes: int64(4), object(1)
memory usage: 328.0+ bytes


In [22]:
subjects = df.columns[1:]

In [23]:
display(df)

Unnamed: 0,Name,Math,Science,History,English
0,John,85,90,78,99
1,Alice,78,82,85,0
2,Bob,92,95,88,93
3,Emily,88,79,92,0
4,David,79,88,76,87


In [24]:
df.describe()

Unnamed: 0,Math,Science,History,English
count,5.0,5.0,5.0,5.0
mean,84.4,86.8,83.8,55.8
std,5.94138,6.379655,6.723095,51.114577
min,78.0,79.0,76.0,0.0
25%,79.0,82.0,78.0,0.0
50%,85.0,88.0,85.0,87.0
75%,88.0,90.0,88.0,93.0
max,92.0,95.0,92.0,99.0


In [25]:
df['Math'].max()

np.int64(92)

In [28]:
df.loc[df['Math'].idxmax()]

Name       Bob
Math        92
Science     95
History     88
English     93
Name: 2, dtype: object

In [32]:
df['Average'] = df[subjects].mean(axis = 1)
display(df)

Unnamed: 0,Name,Math,Science,History,English,Average
0,John,85,90,78,99,88.0
1,Alice,78,82,85,0,61.25
2,Bob,92,95,88,93,92.0
3,Emily,88,79,92,0,64.75
4,David,79,88,76,87,82.5


In [33]:
df[subjects].mean()

Math       84.4
Science    86.8
History    83.8
English    55.8
dtype: float64

In [34]:
df.sort_values(by = 'Average', ascending = False)

Unnamed: 0,Name,Math,Science,History,English,Average
2,Bob,92,95,88,93,92.0
0,John,85,90,78,99,88.0
4,David,79,88,76,87,82.5
3,Emily,88,79,92,0,64.75
1,Alice,78,82,85,0,61.25


In [36]:
df[df['Math'] > 80]

Unnamed: 0,Name,Math,Science,History,English,Average
0,John,85,90,78,99,88.0
2,Bob,92,95,88,93,92.0
3,Emily,88,79,92,0,64.75


In [39]:
df[(df['Math'] < 80) & (df['English'] > 80)]

Unnamed: 0,Name,Math,Science,History,English,Average
4,David,79,88,76,87,82.5
