In [22]:
import pandas as pd
import numpy as np

In [23]:
def fake_data():
    n = 100
    data = {
        "Date": pd.date_range(start="2024-01-01", periods=n, freq="D"),  # Date range
        "Adj Close": np.random.uniform(100, 200, n),  # Random float between 100 and 200
        "Volume": np.random.uniform(100, 200, n),  
        "Volatility": np.random.uniform(100, 200, n),  
        "Impact Costs": np.random.uniform(100, 200, n)
    }
    
    # Create DataFrame
    df = pd.DataFrame(data)
    return df

In [36]:
import string

stocks = list(string.ascii_lowercase)
sample_data = {}

for stock in stocks:
    sample_data[stock] = fake_data()

In [37]:
large_cap = stocks[:6]
mid_cap = stocks[6:12]
small_cap = stocks[12:]

In [38]:
def screening_level_1(data, measure_name):
    measure = {}

    for stock in stocks:
        stock_measure = sample_data[stock].mean()[measure_name]
        measure[stock] = stock_measure

    measure_vals = list(measure.values())
    quartiles = {
        'Q' : [1, 2, 3],
        'Measure' : [np.quantile(measure_vals, x) for x in [0.25, 0.5, 0.75]]
    }

    quart = pd.DataFrame(quartiles).set_index('Q')

    risk_levels = {
        1: [],
        2: [],
        3: [],
        4: []
    }

    for stock in stocks:
        for i, threshold in enumerate(quart['Measure'], start=1):
            if measure[stock] <= threshold:
                risk_levels[i].append(stock)
                break
        else:
            risk_levels[4].append(stock)

    return risk_levels

In [39]:
vol_risk = screening_level_1(sample_data, 'Volatility')
ic_risk = screening_level_1(sample_data, 'Impact Costs')

In [40]:
risk = {
    1: [],
    2: [],
    3: [],
    4: []
}

cap = {
    1: large_cap,
    2: mid_cap,
    3: small_cap,
    4: small_cap
}

for stock in stocks:
    for i in risk.keys():
        if (stock in cap[i]) and (stock in vol_risk[i]) and (stock in ic_risk[i]):
            risk[i].append(stock)
            break

In [41]:
risk

{1: [], 2: ['j'], 3: ['x', 'y'], 4: ['u']}