In [None]:
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Feature Engineering for Weather Prediction\n",
    "## WeatherTech Inc. Rainfall Prediction Project\n",
    "\n",
    "This notebook focuses on feature engineering and preparation for modeling."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from sklearn.preprocessing import StandardScaler, LabelEncoder\n",
    "import sys\n",
    "sys.path.append('../src')\n",
    "from data_preprocessing import handle_missing_values, scale_features\n",
    "from feature_engineering import create_weather_features, encode_categorical_features"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. Load and Preprocess Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "source": [
    "# Load the dataset\n",
    "df = pd.read_csv('../data/raw/weather_data.csv')\n",
    "\n",
    "# Handle missing values\n",
    "df = handle_missing_values(df)\n",
    "\n",
    "# Create derived features\n",
    "df = create_weather_features(df)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. Feature Engineering"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "source": [
    "# Create temporal features if timestamp is available\n",
    "if 'date' in df.columns:\n",
    "    df['date'] = pd.to_datetime(df['date'])\n",
    "    df['month'] = df['date'].dt.month\n",
    "    df['day_of_week'] = df['date'].dt.dayofweek\n",
    "    df['season'] = pd.cut(df['month'], bins=[0,3,6,9,12], labels=['Winter', 'Spring', 'Summer', 'Fall'])\n",
    "\n",
    "# Create interaction features\n",
    "if 'temperature' in df.columns and 'humidity' in df.columns:\n",
    "    df['temp_humidity_interaction'] = df['temperature'] * df['humidity']\n",
    "\n",
    "# Encode categorical variables\n",
    "df, encoders = encode_categorical_features(df)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3. Feature Scaling"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "source": [
    "# Scale numerical features\n",
    "df, scaler = scale_features(df)\n",
    "\n",
    "# Save processed dataset\n",
    "df.to_csv('../data/processed/processed_weather_data.csv', index=False)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  }
 }
}