In [None]:
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
import warnings 
warnings.filterwarnings("ignore")
df=pd.read_csv("/kaggle/input/indian-bird-observations-tracking-species/Birds of India.csv")
print(df.tail())
print(df.info())
print(df.isnull().sum())

# <div style="text-align:center; padding:15px; color:white; margin:0; font-size:150%; font-family:'Times New Roman'; background-color:#6A0977; overflow:hidden"><b>Import libraries and load data</b></div>


In [None]:
df['last observation'] = pd.to_datetime(df['last observation'], format='%B %d, %Y', errors='coerce')
df['total observations'] = df['total observations'].str.replace(',', '')
df['total observations'] = df['total observations'].astype(int)
print(df.dtypes)
print(df)

# <div style="text-align:center; padding:15px; color:white; margin:0; font-size:150%; font-family:'Times New Roman'; background-color:#6A0977; overflow:hidden"><b>EDA</b></div>


In [None]:
plt.figure(figsize=(10, 6))
sns.histplot(df['total observations'], bins=20, kde=True)
plt.title('Distribution of Total Observations')
plt.xlabel('Total Observations')
plt.ylabel('Frequency')
plt.show()


In [None]:
top_10_species = df.nlargest(10, 'total observations')
plt.figure(figsize=(12, 8))
sns.barplot(x='total observations', y='name', data=top_10_species)
plt.title('Top 10 Bird Species by Total Observations')
plt.xlabel('Total Observations')
plt.ylabel('Bird Species')
plt.show()


In [None]:
df['month'] = df['last observation'].dt.month_name()
plt.figure(figsize=(12, 6))
sns.countplot(x='month', data=df, order=pd.date_range('2024-01-01', '2024-12-31', freq='M').strftime('%B'))
plt.title('Count of Observations by Month')
plt.xlabel('Month')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.show()


In [None]:
plt.figure(figsize=(14, 6))
sns.scatterplot(x='last observation', y='total observations', data=df)
plt.title('Scatter Plot of Total Observations over Time')
plt.xlabel('Last Observation Date')
plt.ylabel('Total Observations')
plt.show()


In [None]:
plt.figure(figsize=(12, 6))
sns.boxplot(x='month', y='total observations', data=df, order=pd.date_range('2024-01-01', '2024-12-31', freq='M').strftime('%B'))
plt.title('Box Plot of Total Observations by Month')
plt.xlabel('Month')
plt.ylabel('Total Observations')
plt.xticks(rotation=45)
plt.show()


In [None]:
top_5_species = df.nlargest(5, 'total observations')
plt.figure(figsize=(8, 8))
plt.pie(top_5_species['total observations'], labels=top_5_species['name'], autopct='%1.1f%%', startangle=140)
plt.title('Pie Chart of Observations by Top 5 Species')
plt.show()


In [None]:
plt.figure(figsize=(14, 6))
sns.lineplot(x='last observation', y='total observations', data=df)
plt.title('Line Plot of Total Observations over Time')
plt.xlabel('Last Observation Date')
plt.ylabel('Total Observations')
plt.show()


In [None]:
plt.figure(figsize=(10, 6))
sns.kdeplot(df['total observations'], shade=True)
plt.title('KDE Plot of Total Observations')
plt.xlabel('Total Observations')
plt.ylabel('Density')
plt.show()


In [None]:
df['first_letter'] = df['name'].str[0]
plt.figure(figsize=(12, 8))
sns.countplot(x='first_letter', data=df, order=df['first_letter'].value_counts().index)
plt.title('Count of Bird Species by First Letter')
plt.xlabel('First Letter')
plt.ylabel('Count')
plt.show()


In [None]:
plt.figure(figsize=(14, 8))
sns.swarmplot(x='first_letter', y='total observations', data=df)
plt.title('Swarm Plot of Total Observations by First Letter')
plt.xlabel('First Letter')
plt.ylabel('Total Observations')
plt.show()


In [None]:
plt.figure(figsize=(14, 8))
sns.stripplot(x='first_letter', y='total observations', data=df)
plt.title('Strip Plot of Total Observations by First Letter')
plt.xlabel('First Letter')
plt.ylabel('Total Observations')
plt.show()


<div style="text-align:center; padding:15px; color:white; margin:0; font-size:150%; font-family:'Times New Roman'; background-color:#6A0977 ; overflow:hidden">
  <b></b><br>
  If you found this notebook helpful, please consider upvoting it! Thank you!
</div>