In [1]:
import pandas as pd

# How can missing values be handled when reshaping data with pivot()?
# When reshaping data with the pivot() function in pandas, 
# missing values can be handled in several ways:
# Fill Missing Values Before Pivoting: You can use the fillna() method 
# illustrated below to fill missing values before applying the pivot() function.
data = {
    'Date': ['2023-01-01', '2023-01-01', '2023-01-02', '2023-01-02'],
    'Category': ['A', 'B', 'A', 'B'],
    'Value': [10, None, 15, 20]
}
df = pd.DataFrame(data)
df['Value'] = df['Value'].fillna(0)  # Fill missing values
pivoted_df = df.pivot(index='Date', columns='Category', values='Value')
print(pivoted_df)


Category       A     B
Date                  
2023-01-01  10.0   0.0
2023-01-02  15.0  20.0


In [3]:
pd.Series([2.5, 4.0, 5.5, 6.75])

0    2.50
1    4.00
2    5.50
3    6.75
dtype: float64

In [4]:
pd.Series(data=[2.5, 4.0, 5.5, 6.75])

0    2.50
1    4.00
2    5.50
3    6.75
dtype: float64

In [5]:
pd.Series(values=[2.5, 4.0, 5.5, 6.75])

TypeError: Series.__init__() got an unexpected keyword argument 'values'

In [6]:
pd.Series(index=[2.5, 4.0, 5.5, 6.75])

  pd.Series(index=[2.5, 4.0, 5.5, 6.75])


2.50   NaN
4.00   NaN
5.50   NaN
6.75   NaN
dtype: float64

In [4]:
import pandas as pd
# drop the columns 'two' and 'four' from a DataFrame named 'data'
# illustrate with with a dataframe
data = pd.DataFrame({
    'one': [1, 2, 3],
    'two': [4, 5, 6],
    'three': [7, 8, 9],
    'four': [10, 11, 12]
})
# axis=1 indicates that we are dropping columns,
# axis=0 would indicate dropping rows
data = data.drop(['two', 'four'], axis=1)
data

Unnamed: 0,one,three
0,1,7
1,2,8
2,3,9


In [5]:
#  apply a function to each column of a DataFrame named 'frame'
# illustrate with a dataframe
frame = pd.DataFrame({
    'A': [1, 2, 3],
    'B': [4, 5, 6],
    'C': [7, 8, 9]
})
# Define a function to apply
def my_function(x):
    return x * 2
# Apply the function to each column
result = frame.apply(my_function)
print(result)

   A   B   C
0  2   8  14
1  4  10  16
2  6  12  18


In [6]:
# Illustrate used to create dummy variables from a categorical column
data = pd.DataFrame({
    'Category': ['A', 'B', 'A', 'C'],
    'Value': [10, 20, 30, 40]
})
# Create dummy variables
dummies = pd.get_dummies(data['Category'], prefix='Category')
# Concatenate the dummy variables with the original DataFrame
data_with_dummies = pd.concat([data, dummies], axis=1)
print(data_with_dummies)

  Category  Value  Category_A  Category_B  Category_C
0        A     10           1           0           0
1        B     20           0           1           0
2        A     30           1           0           0
3        C     40           0           0           1


In [7]:
# illustrate return unique values of the index pd.Index(['a', 'b', 'c', 'a'])
index = pd.Index(['a', 'b', 'c', 'a'])
unique_values = index.unique()
print(unique_values)

Index(['a', 'b', 'c'], dtype='object')


In [None]:
# illustrate the result of arr2d[1, 2]
import numpy as np
arr2d = np.array([[10, 20, 30], [40, 50, 60], [70, 80, 90]])
result = arr2d[1, 2]
# 1 means second row, 2 means third column

print(result)  # Output: 60 

60
