In [None]:

import IPython.display as ip
import pandas as pd


In [None]:
swat = pd.read_pickle('pickles/sales_with_accounts.pkl')


In [None]:
# Check the sector columns, there is 'technolgy' which is a typo error
swat

In [None]:
swat['sector'] == 'technolgy'

In [None]:
# This piece of code joined sector and sector_bool together to show them side-by-side
# and explain how bool filters work

sector_bool = (swat['sector'] == 'technolgy').reset_index()[['sector']].rename(columns={'sector': 'sector_bool'})
pd.concat([swat['sector'], sector_bool], axis=1)

In [None]:
sector_html = swat[['sector']].to_html()
technology_bool_html = sector_bool.to_html()

ip.HTML(f"""<div style="display: flex; gap: 50px;">\
                <div>{sector_html}</div>\
                <div>{technology_bool_html}</div>\
              </div>""")

In [None]:
technolgy_bool = swat['sector'] == 'technolgy'
swat.loc[technolgy_bool, 'sector'] = 'technology'
swat

In [None]:
swat.to_pickle('pickles/swat_clean.pkl')

In [None]:
account_is_na = swat['account'].isna()
swat[account_is_na]

In [None]:
q4 = swat['close_date'] >= pd.Timestamp(2017, 10, 1)
swat[q4]


```mermaid
---
config:
    theme: forest
---

flowchart LR
    Start[Start] --> A{close_value > 1000?}
    A -- Yes --> BigDeal["big deal focus"]
    A -- No --> B{close_value < 100 or NaN?}
    B -- Yes --> SmallDeal["small deal no focus"]
    B -- No --> C{sector == technology' AND employees > 3000?}
    C -- Yes --> BigTech["big_tech"]
    C -- No --> Further["further_review"]
```

In [None]:

# swat['strategic_review'] = pd.Series(
swat['strategic_review'] = None

big_deals = swat['close_value'] > 1000
swat.loc[big_deals, 'strategic_review'] = 'big deal focus'

no_deal = swat['close_value'] < 100
no_deal |= swat['close_value'].isna()
swat.loc[no_deal, 'strategic_review'] = 'small deal no focus'

big_tech = swat['sector'] == 'technology'
big_tech &= swat['employees'] > 3000
big_tech &= swat['strategic_review'].isna()
swat.loc[big_tech, 'strategic_review'] = 'big_tech'

or_else = swat['strategic_review'].isna()
swat.loc[or_else, 'strategic_review'] = 'further_review'

swat['strategic_review'] = swat['strategic_review'].astype('string')


In [None]:
swat

In [None]:

import typing as tp

def equal(data_series: pd.Series, criteria: tp.Any) -> pd.Series:

    return data_series == criteria

def greater_than(data_series: pd.Series, criteria: tp.Any) -> pd.Series:

    return data_series > criteria

def lesser_than(data_series: pd.Series, criteria: tp.Any) -> pd.Series:

    return data_series < criteria

def isna(data_series: pd.Series) -> pd.Series:

    return data_series.isna()

def not_series(series: pd.Series):
    return ~series

def and_all(series_list: list[pd.Series]):

    result = series_list[0]

    for s in series_list[1:]:
        result = result & s

    return result

def or_all(series_list: list[pd.Series]):
    result = series_list[0]
    for s in series_list[1:]:
        result = result | s
    return result




In [None]:
def strategic_review(
    swat: pd.DataFrame):
    
    swat['strategic_review'] = None

    big_deals = greater_than(swat['close_value'], 1000)
    swat.loc[big_deals, 'strategic_review'] = 'big deal focus'

    no_deal = or_all([
        lesser_than(swat['close_value'], 100),
        isna(swat['close_value'])])
    swat.loc[no_deal, 'strategic_review'] = 'small deal no focus'

    big_tech = and_all([
        equal(swat['sector'], 'technology'),
        greater_than(swat['employees'], 3000),
        isna(swat['strategic_review'])])
    swat.loc[big_tech, 'strategic_review'] = 'big tech'

    or_else = isna(swat['strategic_review'])
    swat.loc[or_else, 'strategic_review'] = 'further review'

    swat['strategic_review'] = swat['strategic_review'].astype('string')

    return swat


In [None]:
# You could run this last line if you like - you will see the
# results in the last column
strategic_review(swat)