In [None]:
#Aktueller Code!!!
import json
import matplotlib.pyplot as plt
from PIL import Image
import requests
from io import BytesIO
from pyspark import SparkContext
from pyspark.streaming import StreamingContext
from IPython.display import display
import pandas as pd
from IPython.core.display import display, HTML
from dateutil import parser 

def main(timeout, func, window):
    sc = SparkContext("local[*]", "Streaming-Aggregation")
    ssc = StreamingContext(sc, window)
    stream = ssc.socketTextStream("127.0.0.1", 9999)
    func(stream)
    try:
        ssc.start()
        ssc.awaitTerminationOrTimeout(timeout)
    except Exception as e:
        print(str(e))
    finally:
        ssc.stop(False)


def aggregate_common_name_with_image(stream):
    def safe_parse(line):     
        try:
            obj = json.loads(line)
            species = obj['species']['commonName']
            image_url = obj['species'].get('thumbnailUrl', '')
            timestamp_str = obj.get('timestamp', None)
            try:
                timestamp = parser.parse(timestamp_str).timestamp() if timestamp_str else None
            except Exception:
                timestamp = None 
            return (species, (1, image_url, [timestamp] if timestamp is not None else []))
        except Exception:
            return ("Fehler", (0, "", []))

    parsed_stream = stream.map(safe_parse)
    aggregated_stream = parsed_stream.reduceByKey(
        lambda a, b: (a[0] + b[0], a[1], a[2] + b[2])
    )
    aggregated_stream.foreachRDD(show_images)


def show_images(rdd):
    results = rdd.collect()
    if results:
        sorted_results = sorted(results, key=lambda x: x[1][0], reverse=True)
        all_timestamps = [ts for _, (_, _, timestamps) in results for ts in timestamps]
        if all_timestamps:
            min_timestamp = min(all_timestamps)
            max_timestamp = max(all_timestamps)
            print(f"Window Fenster von: {pd.to_datetime(min_timestamp, unit='s')} bis {pd.to_datetime(max_timestamp, unit='s')}")
            time_diff = max_timestamp - min_timestamp
            minutes = int(time_diff // 60)
            seconds = int(time_diff % 60)
            print(f"Zeitfenster: {minutes} Minuten {seconds} Sekunden")
        else:
            print("Keine gültigen Timestamps in diesem Window.")
        data = []
        for species, (count, url, _) in sorted_results:
            img_html = f'<img src="{url}" width="50" height="50">' if url else ""
            data.append([species, count, img_html])
        df = pd.DataFrame(data, columns=["Spezies", "Anzahl", "Bild"])
        display(HTML(df.to_html(escape=False, index=False)))


def show_image_from_url(url):
    try:
        response = requests.get(url)
        img = Image.open(BytesIO(response.content))
        display(img)
    except Exception as e:
        print(f"Fehler beim Laden des Bildes: {e}")

main(1000, aggregate_common_name_with_image, 10)



  from IPython.core.display import display, HTML


Window Fenster von: 2025-03-02 15:23:46.500000 bis 2025-03-02 15:24:31.500000
Zeitfenster: 0 Minuten 45 Sekunden


Spezies,Anzahl,Bild
House Sparrow,3,
Long-tailed Tit,1,
Great Tit,1,


Window Fenster von: 2025-03-02 15:24:36 bis 2025-03-02 16:24:45
Zeitfenster: 60 Minuten 9 Sekunden


Spezies,Anzahl,Bild
Great Tit,51,
House Sparrow,20,
Eurasian Magpie,16,
Eurasian Blue Tit,8,
Long-tailed Tit,2,
Great Spotted Woodpecker,1,
Eurasian Treecreeper,1,
Redwing,1,


Window Fenster von: 2025-03-02 16:24:46.500000 bis 2025-03-02 16:39:25
Zeitfenster: 14 Minuten 38 Sekunden


Spezies,Anzahl,Bild
Great Tit,89,
Eurasian Blue Tit,9,
Common Firecrest,1,


Window Fenster von: 2025-03-02 16:39:37 bis 2025-03-05 12:15:01
Zeitfenster: 4055 Minuten 24 Sekunden


Spezies,Anzahl,Bild
Great Tit,36,
Song Thrush,20,
Eurasian Coot,6,
Eurasian Jackdaw,3,
Redwing,3,
Eurasian Collared-Dove,3,
Eurasian Tree Sparrow,2,
Common Firecrest,2,
Hawfinch,2,
Carrion Crow,2,


Window Fenster von: 2025-03-05 12:17:13 bis 2025-03-05 13:20:36.500000
Zeitfenster: 63 Minuten 23 Sekunden


Spezies,Anzahl,Bild
Eurasian Blue Tit,49,
Great Tit,35,
Long-tailed Tit,12,
Fieldfare,1,
Redwing,1,
Eurasian Moorhen,1,


Window Fenster von: 2025-03-05 13:20:39.500000 bis 2025-03-05 13:43:01
Zeitfenster: 22 Minuten 21 Sekunden


Spezies,Anzahl,Bild
Great Tit,87,
Long-tailed Tit,8,
European Goldfinch,2,
Eurasian Blue Tit,1,
Common Firecrest,1,


Window Fenster von: 2025-03-05 13:43:16 bis 2025-03-05 16:39:28
Zeitfenster: 176 Minuten 12 Sekunden


Spezies,Anzahl,Bild
Great Tit,77,
Eurasian Magpie,11,
Eurasian Blue Tit,5,
Long-tailed Tit,2,
Common Chaffinch,1,
Common Firecrest,1,
Goldcrest,1,
Redwing,1,
House Sparrow,1,


Window Fenster von: 2025-03-05 16:39:32.500000 bis 2025-03-05 17:22:28
Zeitfenster: 42 Minuten 55 Sekunden


Spezies,Anzahl,Bild
Great Tit,69,
Eurasian Magpie,12,
Eurasian Blue Tit,9,
Common Firecrest,3,
Eurasian Coot,3,
Eurasian Treecreeper,1,
Long-tailed Tit,1,
Water Rail,1,


Window Fenster von: 2025-03-05 17:22:29.500000 bis 2025-03-06 05:23:21.500000
Zeitfenster: 720 Minuten 52 Sekunden


Spezies,Anzahl,Bild
Great Tit,30,
Song Thrush,22,
European Robin,18,
Eurasian Blackbird,4,
Redwing,4,
Tawny Owl,4,
Green-winged Teal,4,
Mallard,3,
Eurasian Coot,2,
Eurasian Magpie,2,


Window Fenster von: 2025-03-06 05:23:23 bis 2025-03-06 05:40:33.500000
Zeitfenster: 17 Minuten 10 Sekunden


Spezies,Anzahl,Bild
European Robin,80,
Great Tit,11,
Common Firecrest,3,
Eurasian Blue Tit,2,
Black Redstart,2,
Goldcrest,1,
Redwing,1,
