In [1]:
# 01_basic_pandas.ipynb
# Basic pandas practice: creating and manipulating DataFrames

import pandas as pd
import numpy as np

In [3]:
# 1. Create a basic DataFrame
data = {
    'Name': ['a', 'b', 'c', 'd'],
    'Age': [2, np.nan, 1, 4],
    'Score': [1, 2, np.nan, 4]
}
df = pd.DataFrame(data)
print(df)

  Name  Age  Score
0    a  2.0    1.0
1    b  NaN    2.0
2    c  1.0    NaN
3    d  4.0    4.0


In [5]:
# 2. Fill missing values
df = df.fillna(3)
print(df)

  Name  Age  Score
0    a  2.0    1.0
1    b  3.0    2.0
2    c  1.0    3.0
3    d  4.0    4.0


In [7]:
# 3. Add a new column 'Grade'
df['Grade'] = ['A', 'B', 'A', 'C']
print(df)

  Name  Age  Score Grade
0    a  2.0    1.0     A
1    b  3.0    2.0     B
2    c  1.0    3.0     A
3    d  4.0    4.0     C


In [9]:
# 4. Drop the 'Score' column
df = df.drop('Score', axis=1)
print(df)

  Name  Age Grade
0    a  2.0     A
1    b  3.0     B
2    c  1.0     A
3    d  4.0     C


In [11]:
# 5. Drop a row (index 1)
df = df.drop(1, axis=0)
print(df)

  Name  Age Grade
0    a  2.0     A
2    c  1.0     A
3    d  4.0     C


In [13]:
# 6. Reset index
df = df.reset_index(drop=True)
print(df)

  Name  Age Grade
0    a  2.0     A
1    c  1.0     A
2    d  4.0     C


In [15]:
# 7. Filter rows where Age is greater than or equal to 2
filtered_df = df[df['Age'] >= 2]
print(filtered_df)

  Name  Age Grade
0    a  2.0     A
2    d  4.0     C


In [17]:
# 8. Group by 'Name' and calculate the mean of 'Age'
# (Here, Age will be grouped by Name)
grouped = df.groupby('Name')['Age'].mean()
print(grouped)

Name
a    2.0
c    1.0
d    4.0
Name: Age, dtype: float64


In [19]:
# 9. Sort the DataFrame by 'Age' in descending order
sorted_df = df.sort_values('Age', ascending=False)
print(sorted_df)

  Name  Age Grade
2    d  4.0     C
0    a  2.0     A
1    c  1.0     A
