In [1]:
import scipy

In [2]:
#help(scipy)

In [3]:
from pathlib import Path
import numpy as np
from scipy.cluster.vq import whiten,kmeans,vq

In [4]:
data = Path("SMSSpamCollection").read_text()
data = data.strip()
data = data.split("\n")

In [7]:
digit_counts = np.empty((len(data),2),dtype=int)

In [13]:
for i,line in enumerate(data):
    case,message = line.split("\t")
    num_digits = sum(c.isdigit() for c in message)
    digit_counts[i,0] = 0 if case == 'ham' else 1
    digit_counts[i,1] = num_digits

In [14]:
unique_counts = np.unique(digit_counts[:,1], return_counts=True)

In [15]:
unique_counts

(array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
        17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
        34, 35, 36, 37, 40, 41, 47]),
 array([4110,  486,  160,   78,   42,   39,   16,   14,   28,   17,   16,
          34,   30,   31,   37,   29,   35,   33,   41,   47,   18,   31,
          28,   36,   34,   16,   16,   13,   19,    9,    2,    6,    3,
           4,    3,    4,    1,    1,    4,    2,    1], dtype=int64))

In [16]:
unique_counts = np.transpose(np.vstack(unique_counts))
unique_counts

array([[   0, 4110],
       [   1,  486],
       [   2,  160],
       [   3,   78],
       [   4,   42],
       [   5,   39],
       [   6,   16],
       [   7,   14],
       [   8,   28],
       [   9,   17],
       [  10,   16],
       [  11,   34],
       [  12,   30],
       [  13,   31],
       [  14,   37],
       [  15,   29],
       [  16,   35],
       [  17,   33],
       [  18,   41],
       [  19,   47],
       [  20,   18],
       [  21,   31],
       [  22,   28],
       [  23,   36],
       [  24,   34],
       [  25,   16],
       [  26,   16],
       [  27,   13],
       [  28,   19],
       [  29,    9],
       [  30,    2],
       [  31,    6],
       [  32,    3],
       [  33,    4],
       [  34,    3],
       [  35,    4],
       [  36,    1],
       [  37,    1],
       [  40,    4],
       [  41,    2],
       [  47,    1]], dtype=int64)

In [17]:
whitened_counts = whiten(unique_counts)
codebook, _ = kmeans(whitened_counts,3)

In [18]:
codes, _ = vq(whitened_counts,codebook)

In [19]:
ham_code = codes[0]
spam_code = codes[-1]
unknown_code = list(set(range(3))^set((ham_code,spam_code)))[0]

In [20]:
print("definitely ham:", unique_counts[codes == ham_code][-1])
print("definitely spam:", unique_counts[codes == spam_code][-1])
print("unknown:", unique_counts[codes == unknown_code][-1])

definitely ham: [   0 4110]
definitely spam: [47  1]
unknown: [20 18]


In [21]:
digits = digit_counts[:, 1]
predicted_hams = digits == 0
predicted_spams = digits > 20
predicted_unknowns = np.logical_and(digits > 0, digits <= 20)

In [22]:
spam_cluster = digit_counts[predicted_spams]
ham_cluster = digit_counts[predicted_hams]
unk_cluster = digit_counts[predicted_unknowns]

In [23]:
print("hams:", np.unique(ham_cluster[:, 0], return_counts=True))
print("spams:", np.unique(spam_cluster[:, 0], return_counts=True))
print("unknowns:", np.unique(unk_cluster[:, 0], return_counts=True))

hams: (array([0, 1]), array([4071,   39], dtype=int64))
spams: (array([0, 1]), array([  1, 232], dtype=int64))
unknowns: (array([0, 1]), array([755, 476], dtype=int64))


# Optimize Module in Scipy

In [29]:
from scipy.optimize import minimize_scalar

def objective_function(x):
    return 3*x**4-2*x+1

In [30]:
res = minimize_scalar(objective_function)

In [31]:
res

     fun: 0.17451818777634331
    nfev: 16
     nit: 12
 success: True
       x: 0.5503212087491959

In [32]:
def objective_function(x):
    return x**4-x**2

In [33]:
res = minimize_scalar(objective_function)
res

     fun: -0.24999999999999994
    nfev: 15
     nit: 11
 success: True
       x: 0.7071067853059209

In [34]:
res = minimize_scalar(objective_function, bracket=(-1, 0))
res

     fun: -0.24999999999999997
    nfev: 17
     nit: 13
 success: True
       x: 0.7071067809244586

In [35]:
res = minimize_scalar(objective_function, method='bounded', bounds=(-1, 0))
res

     fun: -0.24999999999998732
 message: 'Solution found.'
    nfev: 10
  status: 0
 success: True
       x: -0.707106701474177

In [36]:
import numpy as np
from scipy.optimize import minimize, LinearConstraint

n_buyers = 10
n_shares = 15

In [37]:
np.random.seed(10)
prices = np.random.random(n_buyers)
money_available = np.random.randint(1, 4, n_buyers)

In [38]:
n_shares_per_buyer = money_available / prices
print(prices, money_available, n_shares_per_buyer, sep="\n")

[0.77132064 0.02075195 0.63364823 0.74880388 0.49850701 0.22479665
 0.19806286 0.76053071 0.16911084 0.08833981]
[1 1 1 3 1 3 3 2 1 1]
[ 1.29647768 48.18824404  1.57816269  4.00638948  2.00598984 13.34539487
 15.14670609  2.62974258  5.91328161 11.3199242 ]


In [39]:
constraint = LinearConstraint(np.ones(n_buyers), lb=n_shares, ub=n_shares)

In [40]:
bounds = [(0, n) for n in n_shares_per_buyer]

In [43]:
def objective_function(x, prices):
    return -x.dot(prices)

In [44]:
res = minimize(
    objective_function,
    x0=10 * np.random.random(n_buyers),
    args=(prices,),
    constraints=constraint,
    bounds=bounds,
)

In [45]:
res

     fun: -8.783020157087599
     jac: array([-0.7713207 , -0.02075195, -0.6336484 , -0.74880397, -0.49850702,
       -0.22479653, -0.19806278, -0.76053071, -0.16911077, -0.08833981])
 message: 'Optimization terminated successfully'
    nfev: 187
     nit: 17
    njev: 17
  status: 0
 success: True
       x: array([1.29647768e+00, 1.44883543e-13, 1.57816269e+00, 4.00638948e+00,
       2.00598984e+00, 3.48323773e+00, 2.99760217e-14, 2.62974258e+00,
       1.13511185e-14, 3.99716714e-14])

In [46]:
print("The total number of shares is:", sum(res.x))
print("Leftover money for each buyer:", money_available - res.x * prices)

The total number of shares is: 15.000000000000002
Leftover money for each buyer: [1.32116540e-14 1.00000000e+00 1.33226763e-14 2.62012634e-14
 1.58761893e-14 2.21697984e+00 3.00000000e+00 3.08642001e-14
 1.00000000e+00 1.00000000e+00]
