Create a DataFrame with datetime index and resample by month.

In [1]:
import pandas as pd
date_range = pd.date_range(start='1/1/2020', periods=100, freq='D')
data = {'X': range(100)}
df = pd.DataFrame(data, index=date_range)
monthly_df = df.resample('M').sum()
print(monthly_df)

               X
2020-01-31   465
2020-02-29  1305
2020-03-31  2325
2020-04-30   855


  monthly_df = df.resample('M').sum()


Create a DataFrame and calculate the exponential moving average.

In [2]:
import pandas as pd
data = {'X': [1, 2, 3, 4, 5, 6]}
df = pd.DataFrame(data)
df['EMA'] = df['X'].ewm(span=3, adjust=False).mean()
print(df)

   X      EMA
0  1  1.00000
1  2  1.50000
2  3  2.25000
3  4  3.12500
4  5  4.06250
5  6  5.03125


Create a DataFrame with random integers and calculate the mode.

In [8]:
import pandas as pd
import numpy as np
data = np.random.randint(1, 10, size=(5, 3))
df = pd.DataFrame(data, columns=['X', 'Y', 'Z'])
print(df.mode())

     X  Y  Z
0  5.0  1  3
1  NaN  3  4
2  NaN  4  5
3  NaN  6  8
4  NaN  9  9


Create a DataFrame and calculate the z-score of each column.

In [9]:
import pandas as pd
import numpy as np
data = {'X': [1, 2, 3, 4], 'Y': [4, 5, 6, 7]}
df = pd.DataFrame(data)
df['zscore_A'] = (df['X'] - np.mean(df['X'])) / np.std(df['X'])
df['zscore_B'] = (df['Y'] - np.mean(df['Y'])) / np.std(df['Y'])
print(df)

   X  Y  zscore_A  zscore_B
0  1  4 -1.341641 -1.341641
1  2  5 -0.447214 -0.447214
2  3  6  0.447214  0.447214
3  4  7  1.341641  1.341641


Create a DataFrame with random values and calculate the median.

In [10]:
import pandas as pd
import numpy as np
data = np.random.rand(4, 3)
df = pd.DataFrame(data, columns=['X', 'Y', 'Z'])
print(df.median())

X    0.749177
Y    0.532431
Z    0.655125
dtype: float64


Create a DataFrame and apply a custom function to each column.

In [11]:
import pandas as pd
data = {'X': [1, 2, 3], 'Y': [4, 5, 6]}
df = pd.DataFrame(data)
df = df.apply(lambda x: x + 1)
print(df)

   X  Y
0  2  5
1  3  6
2  4  7


Create a DataFrame with hierarchical index and calculate the mean for each group.

In [12]:
import pandas as pd
arrays = [['X', 'X', 'Y', 'Y'], [1, 2, 1, 2]]
index = pd.MultiIndex.from_arrays(arrays, names=('Group', 'Number'))
data = {'Value': [10, 20, 30, 40]}
df = pd.DataFrame(data, index=index)
grouped_df = df.groupby('Group').mean()
print(grouped_df)

       Value
Group       
X       15.0
Y       35.0


Create a DataFrame and calculate the percentage of missing values in each column.

In [13]:
import pandas as pd
data = {'X': [1, 2, None, 4], 'Y': [4, None, 6, 8]}
df = pd.DataFrame(data)
missing_percentage = df.isnull().mean() * 100
print(missing_percentage)

X    25.0
Y    25.0
dtype: float64


Create a DataFrame and apply a custom function to each row.

In [14]:
import pandas as pd
data = {'X': [1, 2, 3], 'Y': [4, 5, 6]}
df = pd.DataFrame(data)
df['Sum'] = df.apply(lambda row: row['X'] + row['Y'], axis=1)
print(df)

   X  Y  Sum
0  1  4    5
1  2  5    7
2  3  6    9


Create a DataFrame with random values and calculate the quantiles.

In [15]:
import pandas as pd
import numpy as np
data = np.random.rand(4, 3)
df = pd.DataFrame(data, columns=['X', 'Y', 'Z'])
print(df.quantile([0.25, 0.5, 0.75]))

             X         Y         Z
0.25  0.288570  0.438385  0.321990
0.50  0.568169  0.626097  0.431054
0.75  0.790837  0.810459  0.608168
