In [3]:
print("Hello World")

Hello World


In [1]:
import pyodbc

# ----------------------------
# STEP 1: Define your connection
# ----------------------------
server = r'(localdb)\MSSQLLocalDB'    # local SQL Server instance
database = 'Arsipa'             # make sure the name matches exactly

# define conn_str for Windows Authentication for SQL Server

conn_str = (
    "Driver={ODBC Driver 17 for SQL Server};"
    f"Server={server};"
    f"Database={database};"
    "Trusted_Connection=yes;"
    "Encrypt=no;"
)

# ----------------------------
# STEP 2: Connect to SQL Server
# ----------------------------
try:
    conn = pyodbc.connect(conn_str)
    print("✅ Connection to SQL Server successful!")
except Exception as e:
    print("❌ Failed to connect to SQL Server:")
    print(e)
    conn = None

# ----------------------------
# STEP 3: Run a simple query
# ----------------------------
if conn:
    try:
        cursor = conn.cursor()
        cursor.execute("SELECT @@VERSION;")  # Returns SQL Server version
        for row in cursor.fetchall():
            print("SQL Server Version:", row[0])
    except Exception as e:
        print("❌ Query failed:", e)
    finally:
        conn.close()
        print("🔒 Connection closed.")


✅ Connection to SQL Server successful!
SQL Server Version: Microsoft SQL Server 2019 (RTM-CU27-GDR) (KB5040948) - 15.0.4382.1 (X64) 
	Jul  1 2024 20:03:23 
	Copyright (C) 2019 Microsoft Corporation
	Express Edition (64-bit) on Windows 10 Pro 10.0 <X64> (Build 26100: ) (Hypervisor)

🔒 Connection closed.


In [8]:
import pandas as pd

# Load CSV Data files with encoding
df_umsatz = pd.read_csv(r"C:\Users\Dell\Desktop\Python Practice\source_datafiles\csv\umsatz.csv", encoding="utf-8")
df_mitarbeiter = pd.read_csv(r"C:\Users\Dell\Desktop\Python Practice\source_datafiles\csv\mitarbeiter.csv", encoding="utf-8")
df_gesellschaften = pd.read_csv(r"C:\Users\Dell\Desktop\Python Practice\source_datafiles\csv\gesellschaften.csv", encoding="latin1")

# View Datasets
print("📊 Umsatz (erste 5 Zeilen):")
print(df_umsatz.head(5))

print("\n👨‍💼 Mitarbeiter (letzte 5 Zeilen):")
print(df_mitarbeiter.tail(5))

print("\n🏢 Gesellschaften (erste 5 Zeilen):")
print(df_gesellschaften.head(5))

📊 Umsatz (erste 5 Zeilen):
   gesellschaft_id    monat  umsatz_eur
0                1  2023-01      171958
1                1  2023-02      181932
2                1  2023-03      309178
3                1  2023-04      160268
4                1  2023-05      104886

👨‍💼 Mitarbeiter (letzte 5 Zeilen):
     gesellschaft_id    monat  anzahl_mitarbeiter
635               20  2025-04                 333
636               20  2025-05                 182
637               20  2025-06                 420
638               20  2025-07                 124
639               20  2025-08                 429

🏢 Gesellschaften (erste 5 Zeilen):
   gesellschaft_id    gesellschaft_name standort     branche
0                1   Arsipa Berlin GmbH   Berlin          IT
1                2  Arsipa Hamburg GmbH  Hamburg      Handel
2                3  Arsipa München GmbH  München  Produktion
3                4    Arsipa Paris S.A.    Paris    Finanzen
4                5   Arsipa Madrid S.A.   Madrid    Logi

In [13]:
# Joining Fact Table with Dim(s)
fact = ( df_umsatz.merge(df_mitarbeiter, on=["gesellschaft_id", "monat"], how="left")
                                        .merge(df_gesellschaften, on="gesellschaft_id", how="left")
       )

# Example: KPI "Sales_per_Employee"
fact["umsatz_pro_mitarbeiter"] = fact["umsatz_eur"] / fact["anzahl_mitarbeiter"].replace(0, pd.NA)

# View Fact Table after Joins
print("Fact Table after Joins\n", fact.head(5))

# Group by gesellschaft_id and calculate SUM of umsatz_pro_mitarbeiter
result_sum = fact.groupby("gesellschaft_id")["umsatz_pro_mitarbeiter"].sum()

# Group by gesellschaft_id and calculate MEAN (average)
result_mean = fact.groupby(["gesellschaft_id", "monat"])["umsatz_pro_mitarbeiter"].mean()

# If you want both at once
result = fact.groupby("gesellschaft_id")["umsatz_pro_mitarbeiter"].agg(["sum", "mean", "max", "min"])

print("MEANS Table\n", result_mean.head(5))
print("Caclucations Table Grouped by Gesellschat&ProMitarbeiter\n",result.head(5))



Fact Table after Joins
    gesellschaft_id    monat  umsatz_eur  anzahl_mitarbeiter  \
0                1  2023-01      171958                 445   
1                1  2023-02      181932                 280   
2                1  2023-03      309178                  81   
3                1  2023-04      160268                  30   
4                1  2023-05      104886                 131   

    gesellschaft_name standort branche  umsatz_pro_mitarbeiter  
0  Arsipa Berlin GmbH   Berlin      IT              386.422472  
1  Arsipa Berlin GmbH   Berlin      IT              649.757143  
2  Arsipa Berlin GmbH   Berlin      IT             3817.012346  
3  Arsipa Berlin GmbH   Berlin      IT             5342.266667  
4  Arsipa Berlin GmbH   Berlin      IT              800.656489  
MEANS Table
 gesellschaft_id  monat  
1                2023-01     386.422472
                 2023-02     649.757143
                 2023-03    3817.012346
                 2023-04    5342.266667
         