# Specified Diversity Analyzer

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

# Suppress warnings for cleaner output
warnings.filterwarnings('ignore')

### 1. Load and Prepare Data

In [None]:
# Load the uploaded dataset
file_path = "ocean_climate_dataset.csv"

# Try reading with utf-8 first
try:
    df = pd.read_csv(file_path, encoding='utf-8')
except UnicodeDecodeError:
    df = pd.read_csv(file_path, encoding='ISO-8859-1')

# Ensure column name consistency
df.rename(columns=lambda x: x.strip(), inplace=True)

# Parse 'Date' column properly
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')

# Drop duplicates and missing dates
df.drop_duplicates(inplace=True)
df = df.dropna(subset=['Date'])

# Clean 'Bleaching Severity' - replace 'None' with 'No Bleaching'
df['Bleaching Severity'] = df['Bleaching Severity'].replace('None', 'No Bleaching')

# Convert categorical columns
df['Location'] = df['Location'].astype('category')
df['Bleaching Severity'] = df['Bleaching Severity'].astype('category')

# Encode 'Bleaching Severity' and 'Marine Heatwave'
severity_mapping = {
    'No Bleaching': 0,
    'Low': 1,
    'Medium': 2,
    'High': 3
}
df['Bleaching Severity Encoded'] = df['Bleaching Severity'].map(severity_mapping)
df['Marine Heatwave Encoded'] = df['Marine Heatwave'].astype(int)

# Display column names
df.columns.tolist()
df.info(), df.head()

### 2. Changes in Species Count Over Time

In [None]:
# Plotting the data
plt.figure(figsize=(14,6))
sns.lineplot(data=df, x='Date', y='Species Observed', hue='Location', legend=False)

plt.title('Species Observed Over Time by Location', fontsize=16)
plt.xlabel('Year', fontsize=12)
plt.ylabel('Species Count', fontsize=12)
plt.grid(alpha=0.3)
plt.tight_layout()
plt.show()


### 3. Biodiversity Hotspots

In [None]:
# Define biodiversity hotspots based on highest average species count
hotspot_df = df.groupby('Location')['Species Observed'].mean().sort_values(ascending=False).head(10)

plt.figure(figsize=(12,6))
sns.barplot(x=hotspot_df.values, y=hotspot_df.index, palette='viridis')

plt.title('Top 10 Biodiversity Hotspots (Avg Species Observed)', fontsize=16)
plt.xlabel('Average Species Observed', fontsize=12)
plt.ylabel('Location', fontsize=12)
plt.grid(axis='x', alpha=0.3)
plt.tight_layout()
plt.show()

### 4. Impact of Marine Heatwaves on Species Count

In [None]:
# Plotting the data
heatwave_impact = df.groupby('Marine Heatwave')['Species Observed'].mean()

plt.figure(figsize=(8,6))
sns.barplot(x=heatwave_impact.index.map({0: 'No Heatwave', 1: 'Heatwave'}), 
            y=heatwave_impact.values, palette='coolwarm')

plt.title('Impact of Marine Heatwaves on Species Observed', fontsize=16)
plt.xlabel('Marine Heatwave', fontsize=12)
plt.ylabel('Average Species Observed', fontsize=12)
plt.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.show()


### 5. Impact of Coral Bleaching Severity on Species Count

In [None]:
bleaching_order = ['No Bleaching', 'Low', 'Medium', 'High']
bleaching_impact = df.groupby('Bleaching Severity')['Species Observed'].mean().reindex(bleaching_order)

plt.figure(figsize=(8,6))
sns.barplot(x=bleaching_impact.index, y=bleaching_impact.values, palette='mako')

plt.title('Impact of Coral Bleaching Severity on Species Observed', fontsize=16)
plt.xlabel('Bleaching Severity', fontsize=12)
plt.ylabel('Average Species Observed', fontsize=12)
plt.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.show()