In [40]:
from pathlib import Path
import numpy as np
from scipy.cluster.vq import whiten, kmeans, vq

In [41]:
data = Path("./smsspamcollection/SMSSpamCollection").read_text()
data = data.strip()
data = data.split("\n")

In [42]:
digit_counts = np.empty((len(data), 2), dtype=int)

In [43]:
for i, line in enumerate(data):
    case, message = line.split("\t")
    num_digits = sum(c.isdigit() for c in message)
    digit_counts[i, 0] = 0 if case == "ham" else 1
    digit_counts[i, 1] = num_digits

In [60]:
unique_counts = np.unique(digit_counts[:, 1], return_counts=True)  # counts unique values

In [61]:
unique_counts = np.transpose(np.vstack(unique_counts))
# un = np.array(list(zip(unique_counts[0], unique_counts[1])))  # equivalent

In [62]:
whitened_counts = whiten(unique_counts)  # normalizes the data

In [63]:
codebook, _ = kmeans(whitened_counts, 3)  # performs k means

In [66]:
codes, _ = vq(whitened_counts, codebook)  # assigns cluster to each unique count

In [68]:
ham_code = codes[0]
spam_code = codes[-1]
unknown_code = list(set(range(3)) ^ set((ham_code, spam_code)))[0]

In [71]:
print("definitely ham:", unique_counts[codes == ham_code][-1])
print("definitely spam:", unique_counts[codes == spam_code][-1])
print("unknown:", unique_counts[codes == unknown_code][-1])

definitely ham: [   0 4110]
definitely spam: [47  1]
unknown: [20 18]


In [72]:
digits = digit_counts[:, 1]
predicted_hams = digits == 0
predicted_spams = digits > 20
predicted_unknowns = np.logical_and(digits > 0, digits <= 20)

In [73]:
spam_cluster = digit_counts[predicted_spams]
ham_cluster = digit_counts[predicted_hams]
unk_cluster = digit_counts[predicted_unknowns]

In [74]:
print("hams:", np.unique(ham_cluster[:, 0], return_counts=True))
print("spams:", np.unique(spam_cluster[:, 0], return_counts=True))
print("unknowns:", np.unique(unk_cluster[:, 0], return_counts=True))

hams: (array([0, 1]), array([4071,   39]))
spams: (array([0, 1]), array([  1, 232]))
unknowns: (array([0, 1]), array([755, 476]))


## Minimizing a Function With One Variable

In [76]:
from scipy.optimize import minimize_scalar

In [77]:
def objective_function(x):
    return 3 * x ** 4 - 2 * x + 1

In [78]:
res = minimize_scalar(objective_function)

In [79]:
def objective_function(x):
    return x ** 4 - x ** 2

In [80]:
res = minimize_scalar(objective_function)

In [83]:
res

     fun: -0.24999999999998732
 message: 'Solution found.'
    nfev: 10
     nit: 10
  status: 0
 success: True
       x: -0.707106701474177

In [84]:
res = minimize_scalar(objective_function, bracket=(-1, 0))

In [85]:
res

     fun: -0.24999999999999997
 message: '\nOptimization terminated successfully;\nThe returned value satisfies the termination criteria\n(using xtol = 1.48e-08 )'
    nfev: 17
     nit: 13
 success: True
       x: 0.7071067809244586

In [86]:
res = minimize_scalar(objective_function, method='bounded', bounds=(-1, 0))

In [87]:
res

     fun: -0.24999999999998732
 message: 'Solution found.'
    nfev: 10
     nit: 10
  status: 0
 success: True
       x: -0.707106701474177

## Minimizing a Function With Many Variables

In [88]:
import numpy as np
from scipy.optimize import minimize, LinearConstraint

In [89]:
n_buyers = 10
n_shares = 15

In [90]:
np.random.seed(10)
prices = np.random.random(n_buyers)
money_available = np.random.randint(1, 4, n_buyers)

In [91]:
n_shares_per_buyer = money_available / prices
print(prices, money_available, n_shares_per_buyer, sep="\n")

[0.77132064 0.02075195 0.63364823 0.74880388 0.49850701 0.22479665
 0.19806286 0.76053071 0.16911084 0.08833981]
[1 1 1 3 1 3 3 2 1 1]
[ 1.29647768 48.18824404  1.57816269  4.00638948  2.00598984 13.34539487
 15.14670609  2.62974258  5.91328161 11.3199242 ]


In [92]:
constraint = LinearConstraint(np.ones(n_buyers), lb=n_shares, ub=n_shares)

In [93]:
bounds = [(0, n) for n in n_shares_per_buyer]

In [94]:
def objective_function(x, prices):
    return -x.dot(prices)

In [95]:
res = minimize(
    objective_function,
    x0=10 * np.random.random(n_buyers),
    args=(prices,),
    constraints=constraint,
    bounds=bounds,
)

In [96]:
res

     fun: -8.78302015708768
     jac: array([-0.7713207 , -0.02075195, -0.63364828, -0.74880397, -0.49850702,
       -0.22479665, -0.1980629 , -0.76053071, -0.16911089, -0.08833981])
 message: 'Optimization terminated successfully'
    nfev: 187
     nit: 17
    njev: 17
  status: 0
 success: True
       x: array([1.29647768e+00, 3.73026111e-14, 1.57816269e+00, 4.00638948e+00,
       2.00598984e+00, 3.48323773e+00, 6.66133815e-16, 2.62974258e+00,
       2.79628716e-15, 5.05103555e-15])

In [98]:
print("The total number of shares is:", sum(res.x))
print("Leftover money for each buyer:", money_available - res.x * prices)

The total number of shares is: 14.999999999999996
Leftover money for each buyer: [3.66373598e-15 1.00000000e+00 1.77635684e-15 3.55271368e-15
 2.10942375e-15 2.21697984e+00 3.00000000e+00 4.88498131e-15
 1.00000000e+00 1.00000000e+00]
