# Dataset and Preprocessing

```python
import pandas as pd
```

## Preprocessing of Dataset 1

```python
data = pd.read_csv('animal_sorted.csv')
penguins = pd.unique(data['common_name'])
sites = pd.unique(data['site_id'])
years = list(range(2002, 2021))

interpolated_data = []
for penguin in penguins:
    for site in sites:
        filtered = data[(data['common_name'] == penguin) & (data['site_id'] == site)]
        count = filtered.groupby('year')['penguin_count'].mean()
        df = pd.DataFrame(index=years)
        df['penguin_count'] = count
        df = df.interpolate(method='linear', limit_direction='both').fillna(0.0)
        df['common_name'] = penguin
        df['site_id'] = site
        interpolated_data.append(df.reset_index(names='year'))
interpolated_data = pd.concat(interpolated_data, ignore_index='year')
print(penguins)
print(sites)
print(years)
print(interpolated_data)
interpolated_data.to_csv('interpolated_penguin_data.csv', index=False)
```

## Preprocessing of Dataset 2

## Preprocessing of Dataset 3

```python
combined_data = None

for file_number in range(1, 19):
    file_name = f'all{file_number}.xlsx'
    data = pd.read_excel(file_name)
    combined_data = pd.concat([combined_data, data], ignore_index=True)

combined_data.to_csv('combined_data.csv', index=False)

df = pd.read_csv('combined_data.csv')

df = df.drop(columns='Days with average temperature ≥ 18 ℃')
df = df.drop(columns='Days with average temperature ≥ 35 ℃')

df.to_csv('antarctica_temperature.csv', index=False)
```

## Preprocessing of Dataset 4

```python
combined_data = None

for file_number in range(1, 112):
    file_name = f'{file_number}.csv'
    data = pd.read_csv(file_name)
    combined_data = pd.concat([combined_data, data], ignore_index=True)

combined_data.to_csv('combined_data.csv', index=False)
```

## Preprocessing of Dataset 5

```python
df = pd.read_csv('sealevel.csv')
df_yearly_max = df.groupby('year')['GMSL_GIA'].max().reset_index()

df_yearly_max.to_csv('sealevel_year.csv', index=False)
```