# Exploratory Data Analysis

## Setting up the environment

In [1]:
import pandas as pd
import os
from pathlib import Path

## Loading CSV files 

In [14]:
def load_all_csv_files(data_dir='data', show_rows=5):
    
    # check if the directory exists
    if not os.path.exists(data_dir):
        print(f"Directory '{data_dir}' not found!")
        return {}, {}
    
    # store dataframes, file mappings and get csv files 
    dataframes = {}
    file_mappings = {}
    csv_files = list(Path(data_dir).glob('*.csv'))
    csv_files.sort() 
    
    print(f"Found {len(csv_files)} CSV files in '{data_dir}' directory.\n")
    
    # load each CSV file into a dataframe
    for i, file_path in enumerate(csv_files, 1):
        df_name = f"df{i}"
        file_mappings[df_name] = str(file_path)
        
        print(f"Loaded {file_path} as {df_name}")
        
        df = pd.read_csv(file_path)
        dataframes[df_name] = df

        print(f"File: {file_path}")
        print(f"Shape: {df.shape[0]} rows × {df.shape[1]} columns")
        print(f"\nFirst {show_rows} rows of {df_name}:")
        display(df.head(show_rows))
        print("-" * 80 + "\n")
    
    return dataframes, file_mappings

In [16]:
dataframes, file_mappings = load_all_csv_files()

for name, df in dataframes.items():
    globals()[name] = df

print(f"df1 is from file: {file_mappings['df1']}")

Found 12 CSV files in 'data' directory.

Loaded data/dim__hubspot_sales_pipeline_stages.csv as df1
File: data/dim__hubspot_sales_pipeline_stages.csv
Shape: 6 rows × 6 columns

First 5 rows of df1:


Unnamed: 0,pipeline_stage_id,pipeline_stage_order,pipeline_stage,close_probability,stage_is_archived,deal_is_closed
0,1102499,1,Qualification,0.2,False,False
1,12008384,0,Lead,0.1,False,False
2,qualifiedtobuy,3,Proposal,0.6,False,False
3,appointmentscheduled,2,Exploration,0.4,False,False
4,closedlost,5,Closed Lost,0.0,False,True


--------------------------------------------------------------------------------

Loaded data/dim__notion_clients__anonymized.csv as df2
File: data/dim__notion_clients__anonymized.csv
Shape: 48 rows × 10 columns

First 5 rows of df2:


Unnamed: 0,client_id,category,industry_id,company_size,company_type,n_roles,n_projects,n_people,started_at,name_anon
0,2f0a5150-aeeb-49f7-b11b-5307511068ff,Internal,,,,0,1,0,,"Smith, Fields and Bentley"
1,2dc74fac-c75a-4770-855f-9943969f18e3,Internal,,,,0,2,0,,Ryan-Ward
2,3d619f69-ee9c-41f8-ac3a-ac5cc851cf10,Internal,,,,0,3,0,,Edwards-Jenkins
3,87163a01-08e8-487c-b200-6f04138c53bf,Internal,,,,0,3,0,,Avila LLC
4,0a1a94ab-d574-4c92-9b69-b21df5080288,Internal,,,,0,3,0,,Adams Group


--------------------------------------------------------------------------------

Loaded data/dim__notion_hr__anonymized.csv as df3
File: data/dim__notion_hr__anonymized.csv
Shape: 13 rows × 6 columns

First 5 rows of df3:


Unnamed: 0,consultant_id,active,startdate,enddate,seniority,consultant_name_anon
0,2a554628-d7f2-475f-8872-5a7057388c36,No,2024-08-19,,K2,Bertil Grind
1,261b42da-30b3-801f-ade8-e0f9f5d087b6,Yes,2025-09-01,,K2,Alexander Macedon
2,261b42da-30b3-8088-b413-f0676fb78aa4,,2025-09-01,,K1,Luke Rhinehart
3,086ef6c0-d4f2-45bb-bf64-2cb51ce8fec4,Yes,2023-09-04,,K1,Rachel Lamb
4,afd559ec-90bf-40c9-8e96-57e3d7af66fd,Yes,2023-03-13,,K2,Astarion Baldursson


--------------------------------------------------------------------------------

Loaded data/dim__notion_roles__anonymized.csv as df4
File: data/dim__notion_roles__anonymized.csv
Shape: 101 rows × 8 columns

First 5 rows of df4:


Unnamed: 0,role_id,industry_name,role_category_name,hourly_rate,billing_type,seniority,startdate,name_anon
0,68fb318f-438b-4257-95f2-3893564ebd01,,,,,K2,2023-10-01,
1,b5a1a16b-99d7-4895-b927-0d0afa074c35,,,,,,,
2,264b42da-30b3-8026-b4e1-f6a7c2832cc2,,Data Analyst,900.0,Direct,,2025-09-04,Terry and Sons
3,0517e8f8-b88c-41fa-ae1a-57458f4571d0,,Data Engineer,1100.0,Invoicing(Right people Group),,2022-12-05,Payne-Nelson
4,347c5d6b-8487-437c-8945-612a32c0b866,,,1100.0,,,2022-09-11,Payne-Nelson


--------------------------------------------------------------------------------

Loaded data/dim__projects__anonymized.csv as df5
File: data/dim__projects__anonymized.csv
Shape: 91 rows × 7 columns

First 5 rows of df5:


Unnamed: 0,project_id,client_id,clockify_project_billable,project_duration,estimated_durationn,client_anon,project_anon
0,5f60a92df2bfed445099122e,5f437b54d13ed94468add358,False,232H,Not estimated,"Taylor, Simmons and Anderson",Obetald semester
1,65a66b6f7349ba6dbd1f1086,5f437b54d13ed94468add358,False,414H,Not estimated,"Taylor, Simmons and Anderson","Brown, Martinez and Manning Projects"
2,5f437f189decb91192e37bc0,5f437b54d13ed94468add358,False,10321H,Not estimated,"Taylor, Simmons and Anderson",Semester
3,6179a9ac4d694b7ecbe75ea6,5f437b54d13ed94468add358,False,396H,Not estimated,"Taylor, Simmons and Anderson",Vård av barn
4,65a66b65efacdd3a4371df50,5f437b54d13ed94468add358,False,52H,Not estimated,"Taylor, Simmons and Anderson",Competence development


--------------------------------------------------------------------------------

Loaded data/dim_employees_anon.csv as df6
File: data/dim_employees_anon.csv
Shape: 13 rows × 6 columns

First 5 rows of df6:


Unnamed: 0,employee_id,employee_code,first_name,last_name,is_active,practice
0,20,22,Luke,Rhinehart,True,
1,21,23,Alexander,Macedon,True,
2,12,18,Rachel,Lamb,True,Analytics
3,2,9,Bingo,Storm,True,Analytics
4,9,15,Astarion,Baldersson,True,Analytics


--------------------------------------------------------------------------------

Loaded data/fct__fortnox_invoices__anonymized.csv as df7
File: data/fct__fortnox_invoices__anonymized.csv
Shape: 615 rows × 12 columns

First 5 rows of df7:


Unnamed: 0,invoice_amount_net,invoice_amount_total,customer_number,due_date,invoice_date,final_pay_date,month_name,accounting_month,accounting_year,accounting_year_date,broker,client_anon
0,52000.0,65000.0,559296,2024-10-15,2024-09-30,,September,5,2024,2024-05-28,,"Mccoy, Singh and Smith"
1,136500.0,170625.0,559298,2024-12-04,2024-11-04,2024-12-04,November,7,2024,2024-07-04,,"Atkinson, Bishop and Cohen"
2,192000.0,240000.0,559298,2025-01-01,2024-11-30,2024-12-30,November,7,2024,2024-07-28,,"Atkinson, Bishop and Cohen"
3,133500.0,166875.0,559298,2025-02-02,2024-12-31,2025-01-31,December,8,2024,2024-08-28,,"Atkinson, Bishop and Cohen"
4,212800.0,266000.0,559298,2025-03-05,2025-01-31,2025-03-05,January,9,2024,2024-09-28,,"Atkinson, Bishop and Cohen"


--------------------------------------------------------------------------------

Loaded data/fct__fortnox_supplier_invoices.csv as df8
File: data/fct__fortnox_supplier_invoices.csv
Shape: 748 rows × 5 columns

First 5 rows of df8:


Unnamed: 0,invoice_payment,categorization,invoice_date,due_date,final_pay_date
0,296.5,mobile_costs,2019-11-04,2019-11-28,2019-11-26
1,253.2,mobile_costs,2019-10-03,2019-10-27,2019-10-25
2,516.55,mobile_costs,2020-04-01,2020-04-01,2020-04-27
3,452.71,mobile_costs,2020-03-03,2020-03-27,2020-04-08
4,353.05,mobile_costs,2020-02-03,2020-02-27,2020-02-05


--------------------------------------------------------------------------------

Loaded data/fct__hubspot_deals__anonymized.csv as df9
File: data/fct__hubspot_deals__anonymized.csv
Shape: 322 rows × 11 columns

First 5 rows of df9:


Unnamed: 0,deal_id,deal_amount,deal_stage,deal_close_probability,create_date,last_modified_date,close_date,owner_id,is_archived,weighted_deal_amount,deal_name_anon
0,12204336178,1000000.0,closedlost,0.0,2023-02-17 15:36:41.602000 UTC,2023-03-17 10:52:17.288000 UTC,2023-02-27 11:15:06.295000 UTC,184321862,False,0.0,Dometic - Data Solution Architect role
1,15856959370,1000000.0,closedlost,0.0,2023-10-31 08:43:47.711000 UTC,2023-11-17 09:41:31.071000 UTC,2023-11-17 09:41:30.971000 UTC,184321862,False,0.0,"Morris, Juarez and Chavez - Optimize delivery ..."
2,18757261362,1200000.0,closedlost,0.0,2024-04-19 12:24:49.100000 UTC,2024-05-17 12:08:02.490000 UTC,2024-05-17 12:07:46.527000 UTC,184321862,False,0.0,ICA Banken GCP Architect
3,17014095971,1000000.0,closedlost,0.0,2024-01-18 10:32:41.435000 UTC,2024-02-23 08:12:52.206000 UTC,2024-02-23 08:12:46.209000 UTC,184321862,False,0.0,"Morris, Juarez and Chavez Miller, Kidd and Dic..."
4,20551258523,600000.0,closedlost,0.0,2024-07-05 13:08:26.064000 UTC,2024-12-13 13:18:34.627000 UTC,2024-12-13 13:18:30.146000 UTC,184321862,False,0.0,"Morris, Juarez and Chavez AI opportunities"


--------------------------------------------------------------------------------

Loaded data/fct__time_entries.csv as df10
File: data/fct__time_entries.csv
Shape: 9808 rows × 7 columns

First 5 rows of df10:


Unnamed: 0,dt,time_entry_id,project_id,user_id,billable,hours,billable_hours
0,2025-11-28,67c592453f4d6b42ba2ee7bb,60106d6e9ab6ce7c1dacbc9c,64e5f63d97f5910c716406b8,False,8.0,
1,2025-11-27,67c592453f4d6b42ba2ee7ba,60106d6e9ab6ce7c1dacbc9c,64e5f63d97f5910c716406b8,False,8.0,
2,2025-11-26,67c592453f4d6b42ba2ee7b9,60106d6e9ab6ce7c1dacbc9c,64e5f63d97f5910c716406b8,False,8.0,
3,2025-11-25,67c592453f4d6b42ba2ee7b8,60106d6e9ab6ce7c1dacbc9c,64e5f63d97f5910c716406b8,False,8.0,
4,2025-11-24,67c592453f4d6b42ba2ee7b7,60106d6e9ab6ce7c1dacbc9c,64e5f63d97f5910c716406b8,False,8.0,


--------------------------------------------------------------------------------

Loaded data/stg_qbis__activity_time.csv as df11
File: data/stg_qbis__activity_time.csv
Shape: 866 rows × 8 columns

First 5 rows of df11:


Unnamed: 0,activity_time_id,employee_id,activity_id,activity_date,minutes,factor_value,notes_internal,processed_at
0,750,5,20,2025-05-10,0,0.0,Data Innovation Summit + Interna möten fredag,2025-09-08 05:26:08.566517 UTC
1,1256,3,17,2025-06-26,30,1.0,,2025-09-08 05:26:08.566517 UTC
2,1255,3,17,2025-06-25,30,1.0,,2025-09-08 05:26:08.566517 UTC
3,1257,3,17,2025-06-27,30,1.0,,2025-09-08 05:26:08.566517 UTC
4,1564,3,21,2025-08-21,30,1.0,,2025-09-08 05:26:08.566517 UTC


--------------------------------------------------------------------------------

Loaded data/stg_qbis__project_activities.csv as df12
File: data/stg_qbis__project_activities.csv
Shape: 25 rows × 20 columns

First 5 rows of df12:


Unnamed: 0,project_activity_id,project_id,phase_id,activity_name,is_active,is_complete,is_chargeable,is_locked,has_warning,is_group_budget,start_date,end_date,chargeable_date,max_hours,budget_hours,factor,cost_per_hour,price_per_hour,price_fixed,processed_at
0,20,19,0,Internal,True,False,False,False,False,False,,,,0.0,0.0,0.0,0.0,0.0,0.0,2025-09-08 05:25:58.210172 UTC
1,24,23,0,Internal,True,False,False,False,False,False,,,,0.0,0.0,0.0,0.0,0.0,0.0,2025-09-08 05:25:58.210172 UTC
2,23,22,0,Internal,True,False,False,False,False,False,,,,0.0,0.0,0.0,0.0,0.0,0.0,2025-09-08 05:25:58.210172 UTC
3,22,21,0,Internal,True,False,False,False,False,False,,,,0.0,0.0,0.0,0.0,0.0,0.0,2025-09-08 05:25:58.210172 UTC
4,2,1,0,Aktivitet 1,True,False,True,False,False,False,,,,0.0,0.0,1.0,0.0,0.0,0.0,2025-09-08 05:25:58.210172 UTC


--------------------------------------------------------------------------------

df1 is from file: data/dim__hubspot_sales_pipeline_stages.csv


## Examining hubspot files  

In [4]:
df1

Unnamed: 0,pipeline_stage_id,pipeline_stage_order,pipeline_stage,close_probability,stage_is_archived,deal_is_closed
0,1102499,1,Qualification,0.2,False,False
1,12008384,0,Lead,0.1,False,False
2,qualifiedtobuy,3,Proposal,0.6,False,False
3,appointmentscheduled,2,Exploration,0.4,False,False
4,closedlost,5,Closed Lost,0.0,False,True
5,closedwon,4,Closed Won,1.0,False,True


In [5]:
df9.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 322 entries, 0 to 321
Data columns (total 11 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   deal_id                 322 non-null    int64  
 1   deal_amount             279 non-null    float64
 2   deal_stage              322 non-null    object 
 3   deal_close_probability  322 non-null    float64
 4   create_date             322 non-null    object 
 5   last_modified_date      322 non-null    object 
 6   close_date              322 non-null    object 
 7   owner_id                322 non-null    int64  
 8   is_archived             322 non-null    bool   
 9   weighted_deal_amount    279 non-null    float64
 10  deal_name_anon          322 non-null    object 
dtypes: bool(1), float64(3), int64(2), object(5)
memory usage: 25.6+ KB


In [6]:
df9.sample(10)

Unnamed: 0,deal_id,deal_amount,deal_stage,deal_close_probability,create_date,last_modified_date,close_date,owner_id,is_archived,weighted_deal_amount,deal_name_anon
104,6345738653,1000000.0,closedlost,0.0,2021-10-01 12:51:53.119000 UTC,2022-03-16 12:11:14.325000 UTC,2022-03-16 12:11:13.484000 UTC,38986547,False,0.0,"Thomas, Morales and Porter&B - Dataplattform"
249,1007324971,150000.0,closedwon,1.0,2019-09-10 16:15:46.912000 UTC,2022-04-25 07:51:04.469000 UTC,2020-02-28 15:15:38.867000 UTC,51510172,False,150000.0,"Fernandez, Brewer and Vasquez - Dataanalytiker"
58,12238180884,600000.0,closedlost,0.0,2023-02-20 08:55:13.081000 UTC,2023-04-12 19:04:00.490000 UTC,2023-04-12 19:03:59.337000 UTC,38986547,False,0.0,SRB Valuation - IT Analyst
290,20403073402,1500000.0,closedlost,0.0,2024-06-28 13:49:54.714000 UTC,2024-09-02 07:39:45.651000 UTC,2024-09-02 07:39:42.082000 UTC,644882961,False,0.0,Ortega Group Collection implementation
18,21989866572,1100000.0,closedlost,0.0,2024-09-06 12:19:07.768000 UTC,2024-10-30 22:36:46.558000 UTC,2024-10-30 22:36:33.127000 UTC,2031800790,False,0.0,"Qliro - Miller, Kidd and Dickson"
56,14514138116,1000000.0,closedlost,0.0,2023-08-07 08:09:07.644000 UTC,2023-08-29 14:38:11.770000 UTC,2023-08-29 14:38:07.498000 UTC,38986547,False,0.0,Broker - Dataanalytiker inom test
171,9353792316,1800000.0,closedwon,1.0,2022-06-30 13:19:16.072000 UTC,2023-06-29 15:27:43.717000 UTC,2022-08-16 10:06:45.903000 UTC,38986548,False,1800000.0,LF Datadriven affärsutveckling HT 2022
85,37960350725,400000.0,closedlost,0.0,2025-05-26 20:12:23.042000 UTC,2025-06-04 10:13:52.871000 UTC,2025-06-04 10:13:48.564000 UTC,38986547,False,0.0,JM - Dataplattform förstudie
82,1011424537,500000.0,closedlost,0.0,2019-09-11 18:21:14.838000 UTC,2022-04-25 07:51:04.328000 UTC,2019-12-02 12:24:19.859000 UTC,38986547,False,0.0,Dustin - Analytics Manager
263,20069262668,70000.0,closedlost,0.0,2024-06-14 04:43:03.943000 UTC,2024-09-06 12:11:01.275000 UTC,2024-09-06 12:08:38.199000 UTC,51657849,False,0.0,Designplan (Ortega Group) - Report changes


## Examining fortnox files  

In [8]:
df7.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 615 entries, 0 to 614
Data columns (total 12 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   invoice_amount_net    615 non-null    float64
 1   invoice_amount_total  615 non-null    float64
 2   customer_number       615 non-null    int64  
 3   due_date              615 non-null    object 
 4   invoice_date          615 non-null    object 
 5   final_pay_date        594 non-null    object 
 6   month_name            615 non-null    object 
 7   accounting_month      615 non-null    int64  
 8   accounting_year       615 non-null    int64  
 9   accounting_year_date  615 non-null    object 
 10  broker                594 non-null    object 
 11  client_anon           615 non-null    object 
dtypes: float64(2), int64(3), object(7)
memory usage: 57.8+ KB


In [10]:
df7.sample(10)

Unnamed: 0,invoice_amount_net,invoice_amount_total,customer_number,due_date,invoice_date,final_pay_date,month_name,accounting_month,accounting_year,accounting_year_date,broker,client_anon
439,243800.0,304750.0,27,2023-06-30,2023-05-31,2023-06-29,May,1,2023,2023-01-28,Direct,"Butler, Rodriguez and Townsend"
145,123200.0,123200.0,22,2023-01-05,2022-11-30,2022-12-30,November,7,2022,2022-07-28,Broker,Carrillo Inc
305,47200.0,59000.0,35,2025-08-30,2025-07-31,2025-08-15,July,3,2025,2025-03-28,Broker,Lynch Ltd
238,228620.0,285775.0,35,2024-11-01,2024-09-30,2024-10-18,September,5,2024,2024-05-28,Broker,Lynch Ltd
215,201600.0,252000.0,37,2024-07-02,2024-05-31,2024-07-01,May,1,2024,2024-01-28,Broker,Carroll Inc
538,81600.0,102000.0,19,2024-07-30,2024-06-30,2024-07-30,June,2,2024,2024-02-28,Direct,Anderson Ltd
217,205900.0,257375.0,35,2024-07-02,2024-05-31,2024-06-11,May,1,2024,2024-01-28,Broker,Lynch Ltd
57,177840.0,222300.0,5,2021-03-19,2021-01-31,2021-03-03,January,9,2020,2020-09-28,Broker,"Bush, Sawyer and Chambers"
450,12848.26875,12854.176,28,2023-07-30,2023-06-30,2023-07-07,June,2,2023,2023-02-28,Direct,Pugh-Rodriguez
546,96000.0,120000.0,559294,2024-10-01,2024-08-31,2024-10-03,August,4,2024,2024-04-28,Direct,"Harvey, Collins and Walker"


In [11]:
df7.describe()

Unnamed: 0,invoice_amount_net,invoice_amount_total,customer_number,accounting_month,accounting_year
count,615.0,615.0,615.0,615.0,615.0
mean,124309.70508,151148.936653,62766.320325,6.466667,2022.513821
std,91246.479676,112522.651908,176654.613963,3.532784,1.507653
min,-325600.0,-407000.0,1.0,1.0,2019.0
25%,56050.0,62493.0,7.0,3.0,2022.0
50%,133480.0,160000.0,21.0,6.0,2023.0
75%,178560.0,220250.0,32.0,10.0,2024.0
max,463800.0,579750.0,559299.0,12.0,2025.0


In [12]:
df8.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 748 entries, 0 to 747
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   invoice_payment  748 non-null    float64
 1   categorization   748 non-null    object 
 2   invoice_date     748 non-null    object 
 3   due_date         748 non-null    object 
 4   final_pay_date   729 non-null    object 
dtypes: float64(1), object(4)
memory usage: 29.3+ KB


In [13]:
df8.describe()

Unnamed: 0,invoice_payment
count,748.0
mean,41954.08
std,211011.9
min,-81022.0
25%,882.75
50%,3380.5
75%,15786.0
max,4000000.0


In [22]:
df8.sample(10)

Unnamed: 0,invoice_payment,categorization,invoice_date,due_date,final_pay_date
329,13528.0,Other,2024-05-20,2024-06-19,2024-06-19
88,11250.0,accounting_costs,2021-06-07,2021-06-22,2021-06-28
508,0.0,it_costs,2025-04-09,2025-04-09,
102,3756.0,accounting_costs,2022-03-01,2022-03-16,2022-03-29
592,2288.0,accounting_costs,2021-07-01,2021-07-16,2021-07-21
366,1485.0,web_page_costs,2020-05-01,2020-05-01,2020-05-07
450,1683.0,Other,2023-11-16,2023-12-16,2023-12-18
682,21250.0,rental_costs,2020-05-01,2020-05-01,2020-05-11
739,195000.0,rental_costs,2023-03-09,2023-03-31,2023-04-03
8,508.25,mobile_costs,2020-07-01,2020-07-01,2020-07-01


# **KPI Calculations:** 
Calculates Revenue, Costs, Utilization %, and Pipeline values 

In [20]:
# Create KPI aliases from these dfs
# Aliases for KPI analysis (map to dfX)
invoices          = df7   # fct__fortnox_invoices__anonymized
supplier_invoices = df8   # fct__fortnox_supplier_invoices
hubspot_deals     = df9   # fct__hubspot_deals__anonymized
time_entries      = df10  # fct__time_entries


In [27]:
import plotly.graph_objects as go
import plotly.express as px

# Finance KPIs
total_revenue = invoices["invoice_amount_total"].sum(skipna=True)
total_costs   = supplier_invoices["invoice_payment"].sum(skipna=True)

In [22]:
# Utilization KPI
total_worked   = time_entries["hours"].sum(skipna=True)
total_billable = time_entries["billable_hours"].sum(skipna=True)
utilization_pct = (total_billable / total_worked * 100) if total_worked > 0 else None

In [23]:
# Pipeline KPIs
pipeline_summary = (
    hubspot_deals.groupby("deal_stage")["deal_amount"]
    .sum(min_count=1)
    .reset_index()
    .sort_values("deal_amount", ascending=False)
)


In [24]:

# --- Print KPI Results ---
print("=== KPI Dashboard Summary ===")
print(f"Total Revenue (Fortnox): {total_revenue:,.2f} SEK")
print(f"Total Costs (Fortnox): {total_costs:,.2f} SEK")
print(f"Utilization % (Qbis): {utilization_pct:.2f}%")
print("\nPipeline by Stage (Hubspot):")
print(pipeline_summary)


=== KPI Dashboard Summary ===
Total Revenue (Fortnox): 92,956,596.04 SEK
Total Costs (Fortnox): 31,381,655.43 SEK
Utilization % (Qbis): 56.96%

Pipeline by Stage (Hubspot):
       deal_stage  deal_amount
2      closedlost  106954002.0
3       closedwon   54715400.0
0         1102499    2190000.0
1        12008384     900500.0
4  qualifiedtobuy     700000.0


## **KPI Cards Visualization**


In [None]:
#  KPI Cards 
fig = go.Figure()

fig.add_trace(go.Indicator(
    mode="number",
    value=total_revenue/1e6,
    title={"text": "Revenue (M SEK)"},
    domain={'row': 0, 'column': 0}
))


fig.add_trace(go.Indicator(
    mode="number",
    value=total_costs/1e6,
    title={"text": "Costs (M SEK)"},
    number={"font": {"color": "red"}},
    domain={'row': 0, 'column': 1}
))

fig.add_trace(go.Indicator(
    mode="number",
    value=utilization_pct,
    title={"text": "Utilization %"},
    number={"suffix": "%", "font": {"color": "green"}},
    domain={'row': 0, 'column': 2}
))

fig.update_layout(
    grid={'rows': 1, 'columns': 3, 'pattern': "independent"},
    title="KPI Cards"
)
fig.show()




In [34]:
# Finance Trend 
if "accounting_month" in invoices.columns:
    finance_trend = (
        invoices.groupby("accounting_month")["invoice_amount_total"]
        .sum()
        .reset_index()
    )
    fig = px.line(
        finance_trend,
        x="accounting_month", y="invoice_amount_total",
        markers=True,
        title="Revenue Trend by Month (Fortnox)"
    )
    fig.update_layout(yaxis_title="Revenue (SEK)", xaxis_title="Month")
    fig.show()


In [33]:

# Pipeline Bar Chart
fig = px.bar(
    pipeline_summary,
    x="deal_stage", y="deal_amount",
    title="Pipeline Value by Stage (Hubspot)",
    labels={"deal_stage": "Deal Stage", "deal_amount": "Value (SEK)"}
)
fig.show()