# Optimization
## Homework 3

In [1]:
import numpy as np
import pandas as pd
from scipy.optimize import minimize
import statsmodels.api as sm
from sklearn import linear_model

### Problem 1

In [2]:
# objective function will be minimized
def neg_machines(resources):    
    #resources[0] = labor
    #resources[1] = capital
    return (-0.05*(resources[0]**(2/3))*(resources[1]**(1/3)))

In [3]:
# inequality constraints must be >= 0
def confun(resources):
    return (-12*resources[0]-15*resources[1]+100000)

In [4]:
constr1 = {'type':'ineq', 'fun': confun}
constraints = [constr1]

In [5]:
opt_resources = minimize(neg_machines,[1000,1000],constraints=constraints) 

In [6]:
#Display solution
opt_resources.x

array([5555.49858657, 2222.26779733])

In [7]:
#Display objective
-opt_resources.fun

204.66841656722596

### Problem 2

In [8]:
#read csv
df = pd.read_csv(r'.\homework4stocks.csv')

In [9]:
threshold_return = 0.01
nstocks = len(df.columns)-1
meanvec = df.mean(axis = 0)
Sigma = df.cov()

w = np.ones(nstocks)/3

In [10]:
# objective function will be minimized
def obj_fun(x):
    return np.sqrt(x @ Sigma @ x)

In [11]:
# inequality constraints must be >= 0
def mean_con_fun(x):
    return (x @ meanvec) - threshold_return

In [12]:
# equality constraints must be =0
def all_invest_con(x):
    return np.sum(x)-1

In [13]:
con1 = {'type':'eq', 'fun': all_invest_con}
con2 = {'type':'ineq', 'fun': mean_con_fun}
cons = [con1,con2]
bds = [(0,1)]*nstocks # all weights must be between 0-1

In [14]:
opt_port = minimize(obj_fun,w,constraints=cons,bounds=bds) 

In [15]:
opt_port.x

array([7.53243847e-18, 5.23351725e-02, 4.95543802e-17, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.70268571e-18,
       0.00000000e+00, 4.62422629e-18, 0.00000000e+00, 0.00000000e+00,
       2.24283433e-02, 2.45309627e-17, 1.11054206e-17, 1.83460538e-02,
       9.90654330e-18, 0.00000000e+00, 1.39606635e-01, 0.00000000e+00,
       2.68842608e-01, 4.54576600e-18, 1.09636664e-17, 1.22490157e-01,
       6.13443375e-02, 5.26744181e-18, 3.14606692e-01])

In [16]:
#
opt_port.fun

0.02973481385164782

### Problem 3


For each case, we run the ordinary linear regression and then we choose the one with the smallest sum of squared errors.

In [17]:
df = pd.read_csv(r'.\variable_selection.csv')
SSE = np.zeros(6)

In [18]:
X = df[['x1']]
y = df['y']
X = sm.add_constant(X)
model = sm.OLS(y,X).fit()
predictions = model.predict(X)
SSE[0] = sum(np.square(predictions - y)) 

In [19]:
X = df[['x2']]
y = df['y']
X = sm.add_constant(X)
model = sm.OLS(y,X).fit()
predictions = model.predict(X)
SSE[1] = sum(np.square(predictions - y)) 

In [20]:
X = df[['x3']]
y = df['y']
X = sm.add_constant(X)
model = sm.OLS(y,X).fit()
predictions = model.predict(X)
SSE[2] = sum(np.square(predictions - y)) 

In [21]:
X = df[['x1','x2']]
y = df['y']
X = sm.add_constant(X)
model = sm.OLS(y,X).fit()
predictions = model.predict(X)
SSE[3] = sum(np.square(predictions - y)) 

In [22]:
X = df[['x1','x3']]
y = df['y']
X = sm.add_constant(X)
model = sm.OLS(y,X).fit()
predictions = model.predict(X)
SSE[4] = sum(np.square(predictions - y)) 

In [23]:
X = df[['x2','x3']]
y = df['y']
X = sm.add_constant(X)
model = sm.OLS(y,X).fit()
predictions = model.predict(X)
SSE[5] = sum(np.square(predictions - y)) 

In [24]:
SSE

array([7901.29942878,  878.83582325, 8575.63588048,   26.1908733 ,
       7860.08875973,  878.18105044])

Since the 4th pair has the smallest sum of squared error, we chose it. The relative coefficients are 2.9992, 3.9692 and 0, which is super close to the true values $\beta_1 = 3, \beta_2 = 4, \beta_3 = 0$.

### Problem 4

Let the $i^{th}$ team's rating be $x_i$. the actual average of the ratings being 85 is equivalent to 
\begin{align}
\dfrac{\sum_{i=1}^{32}x_i}{32} = 85.
\end{align}
Rearrange it to have 
\begin{align}
x_{32}=85\times 32 - \sum_{i=1}^{32}x_i.
\end{align}
That is to say, instead of having 32 team ratings, we only need the first 31, and the last one can be calculated. Besides the ratings, the home team advantage is also a variable. As a result, there are 32 variables in total. The initial value to use for optimization is that all the ratings are the same, namely 85, and there is no home team advantage.

In [25]:
df = pd.read_csv(r'.\nflratings.csv',header=None)
df.columns = ['week','home','away','hscore','vscore']

In [26]:
def SSE(xopt):
    #The last element of x is the home team advantage
    home_advantage = xopt[31]
    #Because the avg. rating is 85, the rating of the last team can be calc. from
    #other 31 teams
    x = np.concatenate((np.transpose(xopt[0:31]),np.array([85*32-sum(xopt[0:31])])))
    N = len(df.week)
    
    SSE = 0
    for i in range(0,N):
        pred_spread = x[df.home[i]-1] - x[df.away[i]-1] + home_advantage
        real_spread = df.hscore[i] - df.vscore[i]
        SSE = SSE + (pred_spread - real_spread)**2
    
    return SSE


In [27]:
ini = np.concatenate((85*np.ones(31),np.zeros(1)))
opt = minimize(SSE,ini) 
print(opt.x)
print(opt.fun)

[84.52234186 89.84144149 92.7456974  83.08898822 88.75995403 79.81205271
 87.5440603  76.88699868 92.12111605 85.63576823 70.50405571 92.25557363
 86.98432736 90.86235067 78.43978422 76.88819569 86.61526752 92.06483461
 96.122671   95.6286823  85.09888047 93.1484154  75.0328585  90.95814514
 86.64232697 67.71996264 92.60581806 85.24192731 74.73183618 79.17109122
 82.18828627  2.17273049]
42925.67976864548


In [28]:
rank = np.concatenate((np.transpose(opt.x[0:31]),np.array([85*32-sum(opt.x[0:31])])))
home_advantage = opt.x[31]

N = len(df)
pred_spread = np.zeros(N)

for i in range(0,N):
    pred_spread[i] = rank[df.home[i]-1] - rank[df.away[i]-1] + home_advantage
    
real_spread = df.hscore-df.vscore

In [29]:
pred_result = (pred_spread >= 0)
real_result = (real_spread >= 0)

precision = sum(pred_result == real_result)/N
precision

0.70703125

In [30]:
print("The confusion matrix is:")
pd.crosstab(pred_result,real_result,rownames='p',colnames='r')

The confusion matrix is:


r,False,True
p,Unnamed: 1_level_1,Unnamed: 2_level_1
False,72,37
True,38,109


In [31]:
print("Correct predictions:")
72+109

Correct predictions:


181