# Data Query Language(DQL)
Können Daten aus einer Datenbank abgerufen, gefiltert und/oder aggregiert werden.

## 1. Grunlegende Auswahl

- SELECT: Ruft Daten aus einer oder mehreren Tabelen ab
- SELECT **DISTINCT**: Zeigt nur eindeutige Werte -> eleminiert Duplikate
- SELECT ATTRIBUT **AS** neuerName: Vergibt ALiasnamen für Spalten und Tabellen
- SELECT * FROM useres **LIMIT** 10: Begrenzt die anzahl an zurückgegebenen Zeieln.


In [4]:
#%pip install pandas
import sqlite3
import pandas as pd

conn = sqlite3.connect("hydrology_data.db")
cur = conn.cursor()

In [5]:
pd.read_sql("SELECT * FROM Temperature LIMIT 3", conn)

Unnamed: 0,Timestamp,Temperature
0,2017-06-04 14:57:28.059000,19.189117
1,2017-06-05 14:57:28.059000,25.510571
2,2017-06-06 14:57:28.059000,24.46254


In [6]:
pd.read_sql("SELECT Temperature AS tmp From Temperature Limit 3", conn)

Unnamed: 0,tmp
0,19.189117
1,25.510571
2,24.46254


In [7]:

pd.read_sql("SELECT DISTINCT YR, * FROM rch",conn)


Unnamed: 0,YR,RCH,YR.1,MO,FLOW_INcms,FLOW_OUTcms,EVAPcms,TLOSScms,SED_INtons,SED_OUTtons,...,BED_PSTmg,BACTP_OUTct,BACTLP_OUTct,CMETAL_1kg,CMETAL_2kg,CMETAL_3kg,TOT_Nkg,TOT_Pkg,NO3ConcMg_l,WTMPdegc
0,1981,1,1981,1,146.343765,146.252487,0.091281,0.0,2.332046e-07,6.161946e+04,...,0.0,0.0,0.0,0.0,0.0,0.0,8.060157e+02,0.000000,0.0,0.0
1,1981,2,1981,1,96.225693,96.182854,0.042821,0.0,1.642676e-07,0.000000e+00,...,0.0,0.0,0.0,0.0,0.0,0.0,4.424630e+02,0.000000,0.0,0.0
2,1981,3,1981,1,11.952719,11.861368,0.091352,0.0,2.032582e-07,2.032582e-07,...,0.0,0.0,0.0,0.0,0.0,0.0,1.103991e+02,0.009118,0.0,0.0
3,1981,4,1981,1,49.486492,49.406513,0.079983,0.0,3.913227e-08,0.000000e+00,...,0.0,0.0,0.0,0.0,0.0,0.0,1.230848e+02,0.000000,0.0,0.0
4,1981,5,1981,1,274.066803,272.106018,1.960806,0.0,2.257870e+06,1.166752e+05,...,0.0,0.0,0.0,0.0,0.0,0.0,1.304826e+06,420186.281250,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8275,2010,19,2010,12,5.524071,5.521397,0.002673,0.0,2.106953e-07,2.106953e-07,...,0.0,0.0,0.0,0.0,0.0,0.0,3.186030e+01,0.027503,0.0,0.0
8276,2010,20,2010,12,30.613400,30.551821,0.061576,0.0,9.801759e-08,0.000000e+00,...,0.0,0.0,0.0,0.0,0.0,0.0,1.602052e+02,0.000000,0.0,0.0
8277,2010,21,2010,12,4.488809,4.487870,0.000938,0.0,7.982982e-08,7.982982e-08,...,0.0,0.0,0.0,0.0,0.0,0.0,2.687480e+01,0.017791,0.0,0.0
8278,2010,22,2010,12,13.716627,13.439812,0.276818,0.0,3.510945e-07,2.074509e+03,...,0.0,0.0,0.0,0.0,0.0,0.0,1.324296e+02,0.052098,0.0,0.0


In [8]:
import sqlite3
import pandas as pd
import os

# Verbindung zur Datenbank herstellen (oder erstellen)
DB_NAME = 'firma.db'
conn = sqlite3.connect(DB_NAME)
cursor = conn.cursor()

# 1. Tabelle erstellen (falls noch nicht vorhanden)
cursor.execute('''
    CREATE TABLE IF NOT EXISTS Mitarbeiter (
        id INTEGER PRIMARY KEY,
        name TEXT NOT NULL,
        abteilung TEXT NOT NULL,
        gehalt REAL NOT NULL,
        stadt TEXT
    );
''')
conn.commit()

# 2. Daten einfügen (stellt sicher, dass die Datenbasis immer konsistent ist)
cursor.execute('DELETE FROM Mitarbeiter;')
mitarbeiter_daten = [
    ('Max Mustermann', 'Vertrieb', 60000.00, 'Berlin'),
    ('Anna Schmidt', 'IT', 75000.00, 'München'),
    ('Peter Huber', 'Vertrieb', 62000.00, 'Berlin'),
    ('Lena Müller', 'IT', 78000.00, 'Hamburg'),
    ('Tom Braun', 'Personal', 55000.00, 'München'),
    ('Julia Keller', 'Personal', 58000.00, 'Berlin'),
    ('Sven Meier', 'IT', 75000.00, 'München'),
    ('Sabine Kurz', 'Vertrieb', 61000.00, 'Hamburg'), # Zusätzlicher Mitarbeiter für DISTINCT
]
conn.executemany("INSERT INTO Mitarbeiter (name, abteilung, gehalt, stadt) VALUES (?, ?, ?, ?)", mitarbeiter_daten)
conn.commit()


# Funktion für die Ausgabe der Abfrageergebnisse
def execute_and_print(query):
    print(f"{query}")
    df = pd.read_sql_query(query, conn)
    display(df)

execute_and_print("SELECT * FROM Mitarbeiter")

SELECT * FROM Mitarbeiter


Unnamed: 0,id,name,abteilung,gehalt,stadt
0,1,Max Mustermann,Vertrieb,60000.0,Berlin
1,2,Anna Schmidt,IT,75000.0,München
2,3,Peter Huber,Vertrieb,62000.0,Berlin
3,4,Lena Müller,IT,78000.0,Hamburg
4,5,Tom Braun,Personal,55000.0,München
5,6,Julia Keller,Personal,58000.0,Berlin
6,7,Sven Meier,IT,75000.0,München
7,8,Sabine Kurz,Vertrieb,61000.0,Hamburg


## 2. Filter und Bedingungen

- **WHERE**: Filtert anhand von einer Bedingung (if)
- **lIKE** : Musterabgleich 
- **IN** : Ob ein Wert in einer Liste is
- **BETWEEN** : Ob ein Wert innerhalb eines Bereichs liegt
- **IS NULL/IS NOT NULL** : Ob ein Wert null oder nicht ist
- **HAVING**: Filtert schon **gruppierte Daten** (nach einem **GROUP BY**)

## 1. WHERE: Mitarbeiter mit einem Gehalt über 70k

In [20]:
execute_and_print("SELECT name AS Mitarbeiter, gehalt AS Earnings FROM Mitarbeiter WHERE gehalt>70000")

execute_and_print("SELECT name as Typ FROM Mitarbeiter WHERE name LIKE 'M%';")

execute_and_print("SELECT name as sigma, abteilung FROM Mitarbeiter WHERE abteilung IN ('Vertrieb' , 'Personal')")

execute_and_print("SELECT name AS Mitarbeiter, gehalt AS Earnings, abteilung FROM Mitarbeiter WHERE gehalt BETWEEN 55000 AND 60000;")

SELECT name AS Mitarbeiter, gehalt AS Earnings FROM Mitarbeiter WHERE gehalt>70000


Unnamed: 0,Mitarbeiter,Earnings
0,Anna Schmidt,75000.0
1,Lena Müller,78000.0
2,Sven Meier,75000.0


SELECT name as Typ FROM Mitarbeiter WHERE name LIKE 'M%';


Unnamed: 0,Typ
0,Max Mustermann


SELECT name as sigma, abteilung FROM Mitarbeiter WHERE abteilung IN ('Vertrieb' , 'Personal')


Unnamed: 0,sigma,abteilung
0,Max Mustermann,Vertrieb
1,Peter Huber,Vertrieb
2,Tom Braun,Personal
3,Julia Keller,Personal
4,Sabine Kurz,Vertrieb


SELECT name AS Mitarbeiter, gehalt AS Earnings, abteilung FROM Mitarbeiter WHERE gehalt BETWEEN 55000 AND 60000;


Unnamed: 0,Mitarbeiter,Earnings,abteilung
0,Max Mustermann,60000.0,Vertrieb
1,Tom Braun,55000.0,Personal
2,Julia Keller,58000.0,Personal


## 3. Aggregation
- **COUNT()**: Zählen von zeilen die nicht null sind
- **SUM()**: Summiert die Werte in Zeilen
- **AVG()**: Durchschnitt
- **MIN()**:
- **MAX()**:

In [29]:
execute_and_print("SELECT COUNT(*) AS Gesamtanzahl_Mitarbeiter FROM Mitarbeiter")

execute_and_print("SELECT stadt AS arschloch, COUNT(stadt) AS GesamtzahlProStadt FROM Mitarbeiter GROUP BY stadt")

execute_and_print("SELECT SUM(gehalt) AS Gehalt FROM Mitarbeiter ")



SELECT COUNT(*) AS Gesamtanzahl_Mitarbeiter FROM Mitarbeiter


Unnamed: 0,Gesamtanzahl_Mitarbeiter
0,8


SELECT stadt AS arschloch, COUNT(stadt) AS GesamtzahlProStadt FROM Mitarbeiter GROUP BY stadt


Unnamed: 0,arschloch,GesamtzahlProStadt
0,Berlin,3
1,Hamburg,2
2,München,3


SELECT SUM(gehalt) AS Gehalt FROM Mitarbeiter 


Unnamed: 0,Gehalt
0,524000.0


## 4. Sortieren und Gruppieren