In [1]:
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# İklim Gündemi: Spark ile Büyük Veri Analizi\n",
    "\n",
    "Bu notebook, Apache Spark kullanarak iklim haberleri ve çevresel verilerin büyük ölçekli analizini gerçekleştirir."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "source": [
    "from pyspark.sql import SparkSession\n",
    "from pyspark.sql.functions import *\n",
    "from pyspark.sql.types import *\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. Spark Oturumu Başlatma"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "source": [
    "spark = SparkSession.builder \\\n",
    "    .appName(\"ClimateWatch\") \\\n",
    "    .config(\"spark.mongodb.input.uri\", \"mongodb://mongodb:27017/climatewatch\") \\\n",
    "    .config(\"spark.mongodb.output.uri\", \"mongodb://mongodb:27017/climatewatch\") \\\n",
    "    .config(\"spark.jars.packages\", \"org.mongodb.spark:mongo-spark-connector_2.12:10.1.1\") \\\n",
    "    .getOrCreate()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. Verileri Yükleme"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "source": [
    "# GDELT verilerini yükle\n",
    "gdelt_df = spark.read.format(\"mongo\") \\\n",
    "    .option(\"database\", \"climatewatch\") \\\n",
    "    .option(\"collection\", \"gdelt_events\") \\\n",
    "    .load()\n",
    "\n",
    "# İklim verilerini yükle\n",
    "climate_df = spark.read.format(\"mongo\") \\\n",
    "    .option(\"database\", \"climatewatch\") \\\n",
    "    .option(\"collection\", \"climate_data\") \\\n",
    "    .load()\n",
    "\n",
    "print(\"GDELT veri şeması:\")\n",
    "gdelt_df.printSchema()\n",
    "\n",
    "print(\"\\nİklim veri şeması:\")\n",
    "climate_df.printSchema()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3. Haber Trendleri Analizi"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "source": [
    "# Yıllık haber sayıları\n",
    "yearly_trend = gdelt_df.groupBy(\"year\") \\\n",
    "    .agg(count(\"*\").alias(\"event_count\")) \\\n",
    "    .orderBy(\"year\")\n",
    "\n",
    "# Pandas DataFrame'e dönüştür ve görselleştir\n",
    "yearly_pd = yearly_trend.toPandas()\n",
    "\n",
    "plt.figure(figsize=(12, 6))\n",
    "sns.set_style(\"whitegrid\")\n",
    "sns.lineplot(data=yearly_pd, x=\"year\", y=\"event_count\", marker=\"o\")\n",
    "plt.title(\"Yıllara Göre İklim Haber Sayısı\", fontsize=14, pad=15)\n",
    "plt.xlabel(\"Yıl\", fontsize=12)\n",
    "plt.ylabel(\"Haber Sayısı\", fontsize=12)\n",
    "plt.xticks(rotation=45)\n",
    "plt.tight_layout()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4. Duygu Analizi Trendleri"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "source": [
    "# Yıllık ortalama duygu skorları\n",
    "sentiment_trend = gdelt_df.groupBy(\"year\") \\\n",
    "    .agg(avg(\"sentiment_score\").alias(\"avg_sentiment\")) \\\n",
    "    .orderBy(\"year\")\n",
    "\n",
    "# Pandas DataFrame'e dönüştür ve görselleştir\n",
    "sentiment_pd = sentiment_trend.toPandas()\n",
    "\n",
    "plt.figure(figsize=(12, 6))\n",
    "sns.set_style(\"whitegrid\")\n",
    "sns.lineplot(data=sentiment_pd, x=\"year\", y=\"avg_sentiment\", marker=\"o\")\n",
    "plt.title(\"Yıllara Göre Ortalama Duygu Skoru\", fontsize=14, pad=15)\n",
    "plt.xlabel(\"Yıl\", fontsize=12)\n",
    "plt.ylabel(\"Ortalama Duygu Skoru\", fontsize=12)\n",
    "plt.xticks(rotation=45)\n",
    "plt.tight_layout()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 5. İklim Verileri ile Korelasyon"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "source": [
    "# Tarih bazlı join ve korelasyon analizi\n",
    "correlation_df = gdelt_df.join(\n",
    "    climate_df,\n",
    "    gdelt_df.year == climate_df.year,\n",
    "    \"inner\"\n",
    ").groupBy(\"year\") \\\n",
    " .agg(\n",
    "     avg(\"sentiment_score\").alias(\"avg_sentiment\"),\n",
    "     avg(\"temperature\").alias(\"avg_temperature\"),\n",
    "     count(\"*\").alias(\"event_count\")\n",
    " ) \\\n",
    " .orderBy(\"year\")\n",
    "\n",
    "# Pandas DataFrame'e dönüştür\n",
    "correlation_pd = correlation_df.toPandas()\n",
    "\n",
    "# Görselleştirme\n",
    "fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10))\n",
    "\n",
    "# Haber sayısı ve sıcaklık\n",
    "ax1.plot(correlation_pd[\"year\"], correlation_pd[\"event_count\"], \"b-\", label=\"Haber Sayısı\")\n",
    "ax1.set_ylabel(\"Haber Sayısı\", color=\"b\")\n",
    "ax1.tick_params(axis=\"y\", labelcolor=\"b\")\n",
    "\n",
    "ax1_twin = ax1.twinx()\n",
    "ax1_twin.plot(correlation_pd[\"year\"], correlation_pd[\"avg_temperature\"], \"r-\", label=\"Sıcaklık\")\n",
    "ax1_twin.set_ylabel(\"Ortalama Sıcaklık\", color=\"r\")\n",
    "ax1_twin.tick_params(axis=\"y\", labelcolor=\"r\")\n",
    "\n",
    "ax1.set_title(\"Haber Sayısı ve Sıcaklık Trendi\", fontsize=14, pad=15)\n",
    "\n",
    "# Duygu skoru ve sıcaklık\n",
    "ax2.plot(correlation_pd[\"year\"], correlation_pd[\"avg_sentiment\"], \"g-\", label=\"Duygu Skoru\")\n",
    "ax2.set_ylabel(\"Duygu Skoru\", color=\"g\")\n",
    "ax2.tick_params(axis=\"y\", labelcolor=\"g\")\n",
    "\n",
    "ax2_twin = ax2.twinx()\n",
    "ax2_twin.plot(correlation_pd[\"year\"], correlation_pd[\"avg_temperature\"], \"r-\", label=\"Sıcaklık\")\n",
    "ax2_twin.set_ylabel(\"Ortalama Sıcaklık\", color=\"r\")\n",
    "ax2_twin.tick_params(axis=\"y\", labelcolor=\"r\")\n",
    "\n",
    "ax2.set_title(\"Duygu Skoru ve Sıcaklık Trendi\", fontsize=14, pad=15)\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 6. Sonuçları MongoDB'ye Kaydetme"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "source": [
    "# Sonuçları MongoDB'ye kaydet\n",
    "yearly_trend.write \\\n",
    "    .format(\"mongo\") \\\n",
    "    .mode(\"overwrite\") \\\n",
    "    .option(\"database\", \"climatewatch\") \\\n",
    "    .option(\"collection\", \"yearly_news_trend\") \\\n",
    "    .save()\n",
    "\n",
    "sentiment_trend.write \\\n",
    "    .format(\"mongo\") \\\n",
    "    .mode(\"overwrite\") \\\n",
    "    .option(\"database\", \"climatewatch\") \\\n",
    "    .option(\"collection\", \"sentiment_trend\") \\\n",
    "    .save()\n",
    "\n",
    "correlation_df.write \\\n",
    "    .format(\"mongo\") \\\n",
    "    .mode(\"overwrite\") \\\n",
    "    .option(\"database\", \"climatewatch\") \\\n",
    "    .option(\"collection\", \"climate_correlation\") \\\n",
    "    .save()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}

NameError: name 'null' is not defined