# 🌪️ Tauktae Cyclone Dataset – Exploratory Data Analysis (EDA)

In [None]:

# Install required libraries if not already installed (run in terminal if needed)
# pip install pandas matplotlib seaborn scikit-learn folium ipykernel

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier
import folium


In [None]:

# Load dataset (make sure Tauktae.csv is in the same folder as this notebook)
df = pd.read_csv("Tauktae.csv")

print("✅ Dataset Loaded Successfully!")
print(df.head())

# Convert Date column if exists
if "Date" in df.columns:
    df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
    df = df.sort_values("Date")


In [None]:

# Summary Statistics
print("\nDataset Info:")
print(df.info())
print("\nSummary Statistics:")
print(df.describe())

# Missing Values Heatmap
plt.figure(figsize=(6,4))
sns.heatmap(df.isnull(), cbar=False, cmap="viridis")
plt.title("Missing Values Heatmap")
plt.show()


In [None]:

# Correlation Heatmap
plt.figure(figsize=(8,6))
sns.heatmap(df.corr(numeric_only=True), annot=True, cmap="coolwarm", fmt=".2f")
plt.title("Correlation Heatmap of Features")
plt.show()


In [None]:

# Distributions
df.hist(figsize=(12,8), bins=15, color="skyblue", edgecolor="black")
plt.suptitle("Feature Distributions", fontsize=16)
plt.show()

# Boxplots
plt.figure(figsize=(12,6))
sns.boxplot(data=df, palette="Set2")
plt.title("Boxplots of Features")
plt.show()


In [None]:

# Pairplot
sns.pairplot(df.select_dtypes(include=["number"]), diag_kind="kde", corner=True)
plt.suptitle("Pairplot of Features", y=1.02)
plt.show()


In [None]:

# Time-Series Plots
if "Date" in df.columns:
    if "WindSpeed" in df.columns:
        plt.figure(figsize=(12,6))
        plt.plot(df["Date"], df["WindSpeed"], marker="o", color="blue")
        plt.title("Tauktae Cyclone - Wind Speed Over Time")
        plt.xlabel("Date")
        plt.ylabel("Wind Speed")
        plt.grid(True)
        plt.show()

    if "Pressure" in df.columns:
        plt.figure(figsize=(12,6))
        plt.plot(df["Date"], df["Pressure"], marker="s", color="red")
        plt.title("Tauktae Cyclone - Pressure Over Time")
        plt.xlabel("Date")
        plt.ylabel("Pressure")
        plt.grid(True)
        plt.show()


In [None]:

# Geographical Path of Cyclone
if "Latitude" in df.columns and "Longitude" in df.columns:
    # Static Path Plot
    plt.figure(figsize=(8,8))
    plt.plot(df["Longitude"], df["Latitude"], marker="o", color="darkorange", linewidth=2)
    plt.scatter(df["Longitude"], df["Latitude"],
                c=df["WindSpeed"] if "WindSpeed" in df.columns else "blue",
                cmap="coolwarm", s=50, alpha=0.7, edgecolor="k")
    plt.title("Cyclone Tauktae Path (Lat vs Lon)")
    plt.xlabel("Longitude")
    plt.ylabel("Latitude")
    plt.grid(True)
    plt.colorbar(label="Wind Speed" if "WindSpeed" in df.columns else "Data Points")
    plt.show()

    # Interactive Map with Folium
    m = folium.Map(location=[df["Latitude"].mean(), df["Longitude"].mean()], zoom_start=5, tiles="cartodbpositron")
    path = list(zip(df["Latitude"], df["Longitude"]))
    folium.PolyLine(path, color="red", weight=3).add_to(m)

    for i, row in df.iterrows():
        folium.CircleMarker(
            location=(row["Latitude"], row["Longitude"]),
            radius=5,
            color="blue",
            fill=True,
            popup=f"WindSpeed: {row.get('WindSpeed', 'N/A')}, Pressure: {row.get('Pressure', 'N/A')}"
        ).add_to(m)

    m.save("Tauktae_Cyclone_Path.html")
    print("🌍 Interactive cyclone map saved as 'Tauktae_Cyclone_Path.html'")
