In [9]:
import sqlite3
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [10]:
df = pd.read_csv("deals.csv")
df.head()

Unnamed: 0,deal_id,opportunity_id,deal_type,buyer_company,supplier_company,buyer_name,buyer_am,supplier_name,supplier_am,supplier_user_id,...,confirmed_gross_revenue,confirmed_gross_profit,confirmed_buyer_price_per_ton,confirmed_supplier_price_per_ton,confirmed_supplier_tonnage,confirmed_material_cost,confirmed_logistics_cost,confirmed_financing_cost,confirmed_supplier_fee,confirmed_gross_profit_margin
0,6576b8fd-2e8d-4154-a4d2-8396f4ead5fb,,agent,Company_1,Company_249,Allison Hill,AM_A,Denise Davenport,AM_A,ce5ce479-0556-4e8b-a853-256bde6fe14d,...,8382.283076,137.866527,,,,,,,,1.64
1,34ed0d0b-af94-47c5-a405-73f56d5d71ae,cae44c2f-b87d-4289-bcd8-ed5a22fa0bf5,trading,,,,,,,,...,,,,,,,,,,
2,284f00eb-f1f5-4b89-909e-874c063669e7,,agent,Company_2,Company_281,Noah Rhodes,AM_B,Amber Obrien,AM_B,0dbe8fd8-e7f5-4f40-b6ea-b98bfa0d769d,...,16873.712705,0.0,,,,,,,,0.0
3,0f4e1b59-7fc1-4010-9401-871055a80f0c,,agent,Company_3,Company_281,Angie Henderson,AM_A,Amber Obrien,AM_B,0dbe8fd8-e7f5-4f40-b6ea-b98bfa0d769d,...,17107.661569,0.0,,,,,,,,0.0
4,72f522c4-ffe2-4343-af40-c31ad1859255,,agent,Company_4,Company_281,Daniel Wagner,AM_C,Amber Obrien,AM_B,0dbe8fd8-e7f5-4f40-b6ea-b98bfa0d769d,...,8142.15711,0.0,,,,,,,,0.0


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1775 entries, 0 to 1774
Data columns (total 54 columns):
 #   Column                              Non-Null Count  Dtype  
---  ------                              --------------  -----  
 0   deal_id                             1775 non-null   object 
 1   opportunity_id                      540 non-null    object 
 2   deal_type                           1775 non-null   object 
 3   buyer_company                       1774 non-null   object 
 4   supplier_company                    1774 non-null   object 
 5   buyer_name                          1774 non-null   object 
 6   buyer_am                            1774 non-null   object 
 7   supplier_name                       1774 non-null   object 
 8   supplier_am                         1774 non-null   object 
 9   supplier_user_id                    1281 non-null   object 
 10  deal_stage                          540 non-null    object 
 11  buyer_country                       1774 no

In [8]:
df.isnull().sum()

deal_id                                  0
opportunity_id                        1235
deal_type                                0
buyer_company                            1
supplier_company                         1
buyer_name                               1
buyer_am                                 1
supplier_name                            1
supplier_am                              1
supplier_user_id                       494
deal_stage                            1235
buyer_country                            1
supplier_country                         2
buyer_region                            11
supplier_region                         40
deal_created_at                          0
buyer_company_id                       740
supplier_company_id                    242
buyer_user_id                         1214
buyer_am_id                             73
supplier_am_id                          76
buyer_country_id                       740
supplier_country_id                    242
buyer_payme

In [11]:
# Connect to SQLite database (creates a file "deals.db")
conn = sqlite3.connect("deals.db")

# Save the DataFrame to SQLite table
df.to_sql("deals", conn, if_exists="replace", index=False)

print("Data loaded successfully into SQLite!")

Data loaded successfully into SQLite!


### Account Manager Performance Query

In [12]:
# SQL query to calculate account manager performance
query = """
SELECT 
    buyer_am AS account_manager,
    SUM(
        CASE 
            WHEN supplier_am IS NOT NULL THEN booked_gross_revenue
            ELSE booked_gross_revenue * 0.5 
        END
    ) AS total_revenue
FROM deals
GROUP BY buyer_am

UNION ALL

SELECT 
    supplier_am AS account_manager,
    SUM(
        CASE 
            WHEN buyer_am IS NOT NULL THEN booked_gross_revenue
            ELSE booked_gross_revenue * 0.5 
        END
    ) AS total_revenue
FROM deals
GROUP BY supplier_am;
"""

# Execute query and load results into a DataFrame
df_am_performance = pd.read_sql_query(query, conn)
print(df_am_performance)

   account_manager  total_revenue
0             None   5.570841e+03
1             AM_A   4.630234e+06
2             AM_B   2.298646e+04
3             AM_C   9.377471e+06
4             AM_D   5.290534e+06
5             AM_E   1.312472e+06
6             AM_F   7.047368e+06
7             AM_I   2.566650e+04
8             AM_J   2.684019e+05
9             None   5.570841e+03
10            AM_A   5.568653e+06
11            AM_B   7.525578e+05
12            AM_C   5.824511e+05
13            AM_D   7.855944e+06
14            AM_E   5.225353e+06
15            AM_F   5.727840e+06
16            AM_G   6.134077e+05
17            AM_H   3.886502e+04
18            AM_I   1.457801e+05
19            AM_J   1.464282e+06


### Revenue Reporting from Buyer Companies

In [23]:
query = """
SELECT
    buyer_company, deal_created_at,
    SUM(booked_gross_revenue) AS total_revenue
FROM deals
GROUP BY buyer_company
ORDER BY total_revenue DESC;
"""

df_revenue_report = pd.read_sql_query(query, conn)
print(df_revenue_report)

   buyer_company                 deal_created_at  total_revenue
0    Company_262  2023-01-28 00:00:00.000000 UTC   5.471611e+06
1    Company_256  2022-08-01 00:00:00.000000 UTC   2.716498e+06
2    Company_267  2022-12-24 00:00:00.000000 UTC   2.671013e+06
3    Company_259  2024-05-19 00:00:00.000000 UTC   1.826183e+06
4    Company_265  2022-09-13 00:00:00.000000 UTC   1.752960e+06
5    Company_258  2020-04-21 00:00:00.000000 UTC   1.371107e+06
6    Company_254  2023-02-06 00:00:00.000000 UTC   1.189325e+06
7    Company_266  2021-06-28 00:00:00.000000 UTC   1.163907e+06
8    Company_247  2022-12-26 00:00:00.000000 UTC   1.100390e+06
9    Company_257  2024-06-29 00:00:00.000000 UTC   1.096112e+06
10   Company_261  2023-08-25 00:00:00.000000 UTC   8.095120e+05
11   Company_253  2023-05-15 00:00:00.000000 UTC   7.284276e+05
12   Company_271  2023-09-15 00:00:00.000000 UTC   7.203363e+05
13   Company_274  2024-04-02 00:00:00.000000 UTC   6.579622e+05
14   Company_250  2023-09-24 00:00:00.00

##### Save Account Manager Performance Report

In [14]:
df_am_performance.to_csv("account_manager_performance.csv", index=False)
print("Account Manager Performance report saved!")

Account Manager Performance report saved!


##### Save Revenue Report

In [24]:
df_revenue_report = df_revenue_report.round({'total_revenue': 2})
df_revenue_report['deal_created_at'] = pd.to_datetime(df_revenue_report['deal_created_at'])
df_revenue_report.to_csv("revenue_reports_new.csv", index=False)
print("Revenue report saved with rounded values!")

Revenue report saved with rounded values!


In [25]:
df_revenue_report.values[0]

array(['Company_262', Timestamp('2023-01-28 00:00:00+0000', tz='UTC'),
       5471611.34], dtype=object)