# Pandas 3

## Questions


**Exercise 1**

Perform time series resampling.


In [None]:
import pandas as pd
dates = pd.date_range('2024-01-01', periods=100, freq='D')
df = pd.DataFrame({'val': range(100)}, index=dates)
print(df.resample('W').mean().head())


**Exercise 2**

Create a cumulative sum column.


In [None]:
import pandas as pd
df = pd.DataFrame({'A': [1, 2, 3, 4, 5]})
df['cumsum'] = df['A'].cumsum()
print(df)


**Exercise 3**

Detect outliers in a dataset.


In [None]:
import pandas as pd
import numpy as np
df = pd.DataFrame({'A': [1, 2, 3, 100, 4, 5]})
q1, q3 = df['A'].quantile(0.25), df['A'].quantile(0.75)
iqr = q3 - q1
outliers = df[(df['A'] < q1 - 1.5*iqr) | (df['A'] > q3 + 1.5*iqr)]
print(outliers)


**Exercise 4**

Normalize a numeric column.


In [None]:
import pandas as pd
df = pd.DataFrame({'A': [10, 20, 30, 40, 50]})
df['norm'] = (df['A'] - df['A'].min()) / (df['A'].max() - df['A'].min())
print(df)


**Exercise 5**

Create bins for numeric data.


In [None]:
import pandas as pd
df = pd.DataFrame({'age': [15, 25, 35, 45, 55]})
df['age_group'] = pd.cut(df['age'], bins=[0, 30, 50, 100], labels=['young', 'mid', 'senior'])
print(df)


**Exercise 6**

Create a conditional column based on a rule.


In [None]:
import pandas as pd
df = pd.DataFrame({'score': [45, 65, 85]})
df['grade'] = df['score'].apply(lambda x: 'pass' if x >= 60 else 'fail')
print(df)


**Exercise 7**

Work with a multi-index DataFrame.


In [None]:
import pandas as pd
df = pd.DataFrame({'A': [1, 1, 2, 2], 'B': [1, 2, 1, 2], 'val': [10, 20, 30, 40]})
df = df.set_index(['A', 'B'])
print(df.loc[(1, 2)])


**Exercise 8**

Reshape a DataFrame using stacking.


In [None]:
import pandas as pd
df = pd.DataFrame({'A': [1, 2], 'B': [3, 4], 'C': [5, 6]})
df = df.set_index('A')
print(df.stack())


**Exercise 9**

Merge DataFrames using different join types.


In [None]:
import pandas as pd
df1 = pd.DataFrame({'id': [1, 2], 'a': [10, 20]})
df2 = pd.DataFrame({'id': [2, 3], 'b': [30, 40]})
print(pd.merge(df1, df2, on='id', how='outer'))


**Exercise 10**

Handle missing datetime values.


In [None]:
import pandas as pd
df = pd.DataFrame({'date': pd.to_datetime(['2024-01-01', None, '2024-01-03'])})
print(df.fillna(pd.Timestamp('2024-01-02')))


**Exercise 11**

Calculate percentage change between rows.


In [None]:
import pandas as pd
df = pd.DataFrame({'A': [100, 110, 105, 120]})
print(df['A'].pct_change())


**Exercise 12**

Create a plot directly from a DataFrame.


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]})
df.plot(x='x', y='y')
plt.show()


**Exercise 13**

Create a cross-tabulation table.


In [None]:
import pandas as pd
df = pd.DataFrame({'A': ['X', 'Y', 'X', 'Y'], 'B': [1, 1, 2, 2]})
print(pd.crosstab(df['A'], df['B']))


**Exercise 14**

Reduce memory usage by changing data types.


In [None]:
import pandas as pd
df = pd.DataFrame({'A': [1, 2, 3]})
print(df['A'].astype('int8'))


**Exercise 15**

Read multiple CSV files and combine them.


In [None]:
import pandas as pd
from pathlib import Path
files = list(Path('.').glob('*.csv'))
dfs = [pd.read_csv(f) for f in files]
print(pd.concat(dfs, ignore_index=True))
