In [6]:
#### Identify Extreme Data Values

# Import necessary libraries
import pandas as pd

# Load the birdwatcher dataset
birdwatcher_data = pd.read_csv('birdwatcher.csv')

# Calculate the 5th and 95th percentiles of age
# This will help in identifying the oldest and youngest 5% of birdwatchers
age_5th_percentile = birdwatcher_data['Age'].quantile(0.05)
age_95th_percentile = birdwatcher_data['Age'].quantile(0.95)

# Determine the range of ages for the oldest 5% and youngest 5% of birdwatchers
oldest_5_percent_range = birdwatcher_data[birdwatcher_data['Age'] >= age_95th_percentile]['Age'].agg(['min', 'max'])
youngest_5_percent_range = birdwatcher_data[birdwatcher_data['Age'] <= age_5th_percentile]['Age'].agg(['min', 'max'])

# Count the number of birdwatchers above 100 and below 18
# These counts will identify potentially extreme values
above_100_count = birdwatcher_data[birdwatcher_data['Age'] > 100].shape[0]
below_18_count = birdwatcher_data[birdwatcher_data['Age'] < 18].shape[0]

# Identify impossible values such as negative ages
impossible_values_count = birdwatcher_data[(birdwatcher_data['Age'] < 0) | (birdwatcher_data['Age'] > 200)].shape[0]

# Display the results
print('Range of the oldest 5%:', oldest_5_percent_range)
print('Range of the youngest 5%:', youngest_5_percent_range)
print('Birdwatchers above 100 years:', above_100_count)
print('Birdwatchers below 18 years:', below_18_count)
print('Impossible values:', impossible_values_count)

# Print impossible values
print("Impossible Ages:")
print(birdwatcher_data[(birdwatcher_data['Age'] < 0) | (birdwatcher_data['Age'] > 200)])

Range of the oldest 5%: min    101
max    723
Name: Age, dtype: int64
Range of the youngest 5%: min    -1
max    22
Name: Age, dtype: int64
Birdwatchers above 100 years: 1333
Birdwatchers below 18 years: 11
Impossible values: 2
Impossible Ages:
      BirdwatcherID  Age Favorite_Bird  Lens_size  Repeat_Experience  \
1152           1153  723    woodpecker         10                  0   
1366           1367   -1    woodpecker         10                  0   

            Date  
1152  2023-04-01  
1366  2023-11-04  
