In [23]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression as LR
from matplotlib import pyplot as plt
from tools import *
from sklearn.decomposition import PCA
import plotly.express as px
from sklearn.linear_model import LinearRegression, Ridge, Lasso
import statsmodels.api as sm
from sklearn.metrics import mean_squared_error as MSE
from sklearn.preprocessing import PolynomialFeatures

# Get Data

In [2]:
def getObs(n):
    obs = {}
    for i in range(n):
        try:
            df = pd.read_csv(f'rocket-results/{i}.csv')
        except:
            print('Missing', i)
        obs[i] = df
    return obs

In [3]:
def initializeConfig():
    configs = {}
    with open('sample_list.pkl', 'rb') as f:
        d = pickle.load(f)
    configs.update(d)
    with open('sample_list_100.pkl', 'rb') as f:
        d = pickle.load(f)
    configs.update(d)
    with open('sample_list_200.pkl', 'rb') as f:
        d = pickle.load(f)
    configs.update(d)
    with open('sample_list_300.pkl', 'rb') as f:
        d = pickle.load(f)
    configs.update(d)
    return configs

In [4]:
config = initializeConfig()
obs = getObs(400)

In [5]:
data = {}
for i in range(len(config)):
    data[i] = list(config[i]['S'].values()) + list(config[i]['B'].values())
    data[i].append(obs[i].max()['Altitude (ft)'])
    data[i].append(obs[i].mean()['Stability Margin (cal)'])
    data[i].append(obs[i].max()['Time (sec)'])
df = pd.DataFrame.from_dict(data, orient='index')
df

  data[i].append(obs[i].mean()['Stability Margin (cal)'])


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,9.465233,9.191411,6.313146,6.181568,4.253219,3.362766,2.004250,3.767645,0.004903,-5.121752,0.0100
1,5.050903,2.807665,7.250966,4.202998,7.871765,6.329711,3.563606,6.148326,77117.160000,1.278655,154.9973
2,8.796076,5.131824,8.469040,8.168441,6.226139,7.792487,6.816335,8.795342,51216.000000,5.212006,125.0071
3,5.590219,9.233072,6.352605,8.429602,5.672564,5.705322,5.717624,3.420081,0.004691,-2.489141,0.0100
4,2.916229,4.216994,4.711545,6.462613,4.887217,2.638681,9.605809,5.234709,0.004818,-4.231226,0.0100
...,...,...,...,...,...,...,...,...,...,...,...
395,2.608430,3.346465,5.201403,9.166944,6.192108,3.850187,7.960717,8.586406,70830.750000,1.673402,149.0006
396,7.801407,2.628427,5.543288,9.133362,7.014197,5.730589,4.246228,6.930445,72485.310000,1.614970,150.9995
397,7.364198,8.455093,4.617628,3.805883,4.280772,6.398586,8.552977,8.138172,0.000000,-1.734951,0.0000
398,7.094305,2.423077,2.934079,6.637124,4.526698,3.664447,4.901074,5.327160,73446.170000,0.853824,150.9995


In [6]:
df = df.rename(columns={0: "Schord", 1: "Sspan", 2: "Ssweep", 3: "Stip", 4: "Bchord", 
                        5: "Bspan", 6: "Bsweep", 7: "Btip", 8: "Altitude", 9: "Stability", 10: "Time"})
df['Schordspan'] = df['Schord'] * df['Sspan']
df['Schordsweep'] = df['Schord'] * df['Ssweep']
df['Schordtip'] = df['Schord'] * df['Stip']
df['Sspansweep'] = df['Sspan'] * df['Ssweep']
df['SSpantip'] = df['Sspan'] * df['Stip']
df['SSweeptip'] = df['Ssweep'] * df['Stip']

df['Bchordspan'] = df['Bchord'] * df['Bspan']
df['Bchordsweep'] = df['Bchord'] * df['Bsweep']
df['Bchordtip'] = df['Bchord'] * df['Btip']
df['Bspansweep'] = df['Bspan'] * df['Bsweep']
df['BSpantip'] = df['Bspan'] * df['Btip']
df['BSweeptip'] = df['Bsweep'] * df['Btip']
alt = df.pop("Altitude")
stab = df.pop("Stability")
time = df.pop("Time")

df.insert(len(df.columns), "Altitude", alt)
df.insert(len(df.columns), "Stability", stab)
df.insert(len(df.columns), "Time", time)
df

Unnamed: 0,Schord,Sspan,Ssweep,Stip,Bchord,Bspan,Bsweep,Btip,Schordspan,Schordsweep,...,SSweeptip,Bchordspan,Bchordsweep,Bchordtip,Bspansweep,BSpantip,BSweeptip,Altitude,Stability,Time
0,9.465233,9.191411,6.313146,6.181568,4.253219,3.362766,2.004250,3.767645,86.998844,59.755399,...,39.025144,14.302581,8.524515,16.024619,6.739826,12.669711,7.551304,0.004903,-5.121752,0.0100
1,5.050903,2.807665,7.250966,4.202998,7.871765,6.329711,3.563606,6.148326,14.181241,36.623926,...,30.475799,49.826002,28.051867,48.398181,22.556595,38.917130,21.910210,77117.160000,1.278655,154.9973
2,8.796076,5.131824,8.469040,8.168441,6.226139,7.792487,6.816335,8.795342,45.139920,74.494323,...,69.178850,48.517110,42.439452,54.761021,53.116208,68.537590,59.951999,51216.000000,5.212006,125.0071
3,5.590219,9.233072,6.352605,8.429602,5.672564,5.705322,5.717624,3.420081,51.614896,35.512459,...,53.549937,32.363803,32.433584,19.400628,32.620884,19.512664,19.554736,0.004691,-2.489141,0.0100
4,2.916229,4.216994,4.711545,6.462613,4.887217,2.638681,9.605809,5.234709,12.297724,13.739946,...,30.448892,12.895807,46.945675,25.583158,25.346667,13.812727,50.283615,0.004818,-4.231226,0.0100
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
395,2.608430,3.346465,5.201403,9.166944,6.192108,3.850187,7.960717,8.586406,8.729021,13.567499,...,47.680974,23.840776,49.293623,53.167952,30.650253,33.059270,68.353950,70830.750000,1.673402,149.0006
396,7.801407,2.628427,5.543288,9.133362,7.014197,5.730589,4.246228,6.930445,20.505433,43.245446,...,50.628851,40.195478,29.783881,48.611503,24.333390,39.715531,29.428252,72485.310000,1.614970,150.9995
397,7.364198,8.455093,4.617628,3.805883,4.280772,6.398586,8.552977,8.138172,62.264979,34.005123,...,17.574150,27.390888,36.613344,34.837657,54.726959,52.072793,69.605594,0.000000,-1.734951,0.0000
398,7.094305,2.423077,2.934079,6.637124,4.526698,3.664447,4.901074,5.327160,17.190049,20.815255,...,19.473850,16.587847,22.185683,24.114444,17.959727,19.521095,26.108803,73446.170000,0.853824,150.9995


In [7]:
# Interaction terms help marginally. 
X_train, X_test = df.iloc[:250, :-15], df.iloc[250:, :-15]

# Stability level 1: cluster into flight time of 20 and 90.
Y_train, Y_test = df.iloc[:250, -1] > 90, df.iloc[250:, -1] > 90

In [8]:
reg = LR()
reg.fit(X_train, Y_train)
sum(reg.predict(X_test) == Y_test)
print(sum(reg.predict(X_test.iloc[np.where(Y_test == False)]) == Y_test.iloc[np.where(Y_test == False)]) / sum(Y_test == False))
print(sum(reg.predict(X_test.iloc[np.where(Y_test == True)]) == Y_test.iloc[np.where(Y_test == True)]) / sum(Y_test == True))

0.958904109589041
0.8311688311688312


In [9]:
reg.predict_proba(X_test), Y_test

(array([[2.06790703e-01, 7.93209297e-01],
        [2.94485528e-02, 9.70551447e-01],
        [7.34116482e-02, 9.26588352e-01],
        [6.29730667e-01, 3.70269333e-01],
        [2.75170480e-02, 9.72482952e-01],
        [7.98236661e-01, 2.01763339e-01],
        [9.29147675e-01, 7.08523255e-02],
        [4.58622148e-01, 5.41377852e-01],
        [9.98272861e-01, 1.72713898e-03],
        [9.95574854e-01, 4.42514599e-03],
        [9.98992777e-01, 1.00722277e-03],
        [9.01672886e-01, 9.83271141e-02],
        [5.98911465e-01, 4.01088535e-01],
        [9.99862438e-01, 1.37562214e-04],
        [4.63388769e-02, 9.53661123e-01],
        [2.65241820e-01, 7.34758180e-01],
        [7.87214229e-02, 9.21278577e-01],
        [9.99388429e-01, 6.11570868e-04],
        [9.73115934e-01, 2.68840663e-02],
        [1.18782769e-01, 8.81217231e-01],
        [9.24417693e-01, 7.55823066e-02],
        [9.89045672e-01, 1.09543279e-02],
        [4.37253994e-01, 5.62746006e-01],
        [3.30376233e-02, 9.6696237

In [46]:
# Altitude
lr = LinearRegression()
filtered_df = df[df['Time'] > 90]
filtered_df

X_train, X_test = filtered_df.iloc[:100, :-3], filtered_df.iloc[100:, :-3]

# Stability level 1: cluster into flight time of 20 and 90.
Y_train, Y_test = filtered_df.iloc[:100, -3], filtered_df.iloc[100:, -3]


lr.fit(X_train, Y_train)
lr.score(X_train, Y_train)

0.9187972393740796

In [11]:
ridge = Ridge(alpha=10)
ridge.fit(X_train, Y_train)
lasso = Lasso(alpha=100)
lasso.fit(X_train, Y_train)

In [12]:
print("Training Errors (Lasso, Ridge, Normal)")
np.sqrt([MSE(Y_train, lasso.predict(X_train)), MSE(Y_train, ridge.predict(X_train)), MSE(Y_train, lr.predict(X_train))])

Training Errors (Lasso, Ridge, Normal)


array([4380.94130458, 4393.30979685, 4378.399079  ])

In [13]:
print('Testing Errors (Lasso, Ridge, Normal)')
np.sqrt([MSE(Y_test, lasso.predict(X_test)), MSE(Y_test, ridge.predict(X_test)), MSE(Y_test, lr.predict(X_test))])

Testing Errors (Lasso, Ridge, Normal)


array([4886.41398097, 4937.88158317, 4908.33442782])

In [32]:
poly = PolynomialFeatures(2)
raw = filtered_df.iloc[:,:8]
qr = LinearRegression()
qr.fit(poly.fit_transform(raw[:100]), filtered_df.iloc[:100, -3])
print('R2', qr.score(poly.fit_transform(raw[:100]), filtered_df.iloc[:100, -3]))
print('Test RMSE', np.sqrt(MSE(filtered_df.iloc[100:, -3], qr.predict(poly.fit_transform(raw[100:])))))
print('Train RMSE', np.sqrt(MSE(filtered_df.iloc[:100, -3], qr.predict(poly.fit_transform(raw[:100])))))

R2 0.9949834774889633
Test RMSE 2150.210126002314
Train RMSE 1001.4060504616747


In [20]:
# Should do this from a variety of inital points.
# What if negative ?
# Learning rate
def coordinateAscent(stab_classifier, alt_predictor, initial = [0,0,0,0,0,0,0,0], inc = 0.01, max_iter = 10000, confidence = 0.95):
    '''
    Keep increasing in direction until unstable. 
    '''
    x = initial
    for _ in range(max_iter):
        for i in range(8):
            y = x[:]
            y[i] += np.sign(alt_predictor.coef_[i])*inc
            if 10 > y[i] > 3 and stab_classifier.predict_proba([y])[0,1] > confidence:
                x = y[:]
                break
        else:
            print('No more feasible directions at iteration', _, '.')
            break
    return x

In [21]:
import warnings
warnings.filterwarnings("ignore")

x = coordinateAscent(reg, lr, [4,5,8,8,6,8,7,9], confidence = 0.9)
x, reg.predict_proba([x]), lr.predict([x])

No more feasible directions at iteration 1783 .


([3.0000000000000213,
  3.0000000000000426,
  9.999999999999957,
  3.0000000000001066,
  9.999999999999915,
  4.170000000000082,
  7,
  9],
 array([[0.09990646, 0.90009354]]),
 array([82545.6112047]))

In [None]:
reg.predict([[4,5,8,8,6,8,7,9]])

In [None]:
np.sign(-3)

In [None]:
df.max()

In [34]:
qr.coef_

array([-6.96594266e+14, -1.54643805e+03, -2.06355480e+04,  7.38034342e+03,
       -3.19684890e+02,  5.35090859e+02, -2.86035997e+03,  5.58915485e+02,
        1.96125182e+02,  5.07990159e+00,  2.07397768e+02, -7.94708498e+01,
        5.23132972e+01, -3.01971205e+00, -3.55805849e+00,  8.95297671e-01,
        7.29560568e+00,  8.90325206e+02, -1.85263007e+02,  3.30525145e+01,
        9.40290549e+01,  5.21366265e+02, -1.54500926e+02,  3.14041212e+01,
       -1.70052518e+02,  3.75519267e+01, -5.57080782e+01, -2.14698186e+02,
        1.28512416e+01, -5.56589464e+00, -6.04397563e+01,  5.55829158e+00,
        1.53778265e+01,  5.58794760e+00, -1.89868266e+01, -8.32512279e+00,
       -8.97972084e+01,  2.56453028e+01, -2.13957980e+01,  4.77570650e+01,
        6.00943454e+01,  3.44119778e+01, -3.43542879e+00, -3.07054111e+01,
       -3.01965488e+01])

In [141]:
def coordinateAscentQuad(stab_classifier, alt_predictor, initial = [0,0,0,0,0,0,0,0], inc = 0.0000001, max_iter = 10000, confidence = 0.95):
    '''
    Keep increasing in direction until unstable. 
    '''
    M = {}
    c = 9
    for i in range(8):
        for j in range(i, 8):
            M[(i,j)] = c
            c += 1
    def partials(x):
        grad = []
        coef = alt_predictor.coef_
        for i in range(8):
            g = 1
            for j in range(8):
                if i == j:
                    g += 2*x[i]*coef[M[i,i]]
                else:
                    g += coef[M[min([i,j]), max([i,j])]]*x[j]
            grad.append(g)
        return np.array(grad)
    
    x = initial
    old = qr.predict(poly.fit_transform([x]))
    for _ in range(max_iter):
        for i in range(8):
            y = np.array(x)
            # y[i] += np.sign(alt_predictor.coef_[i])*inc
            # print(y, partials(x)*inc)
            y -= partials(x) * inc
            if np.all(y > 2) and stab_classifier.predict_proba([y])[0,1] > confidence:# and qr.predict(poly.fit_transform([y])) > old:
                x = y[:]
                old = qr.predict(poly.fit_transform([y]))
                break
        else:
            print('No more feasible directions at iteration', _, '.')
            break
    return x

In [143]:
x0 = [8.762542, 2.403241, 8.77518, 3.823229, 4.85104, 4.60577, 8.116536, 8.553734]
result = coordinateAscentQuad(reg, qr, x0, confidence = .6)
print(qr.predict(poly.fit_transform([x0])), reg.predict_proba([x0]))
qr.predict(poly.fit_transform([result])), result, reg.predict_proba([result])

No more feasible directions at iteration 664 .
[87794.] [[0.33229918 0.66770082]]


(array([94394.375]),
 array([8.75770873, 2.00004513, 9.12082639, 3.79748645, 4.90189239,
        4.60323861, 8.12396677, 8.60031304]),
 array([[0.2627905, 0.7372095]]))

TypeError: Cannot interpret '3' as a data type

In [127]:
df[df['Altitude'] == df.max()['Altitude']]

Unnamed: 0,Schord,Sspan,Ssweep,Stip,Bchord,Bspan,Bsweep,Btip,Schordspan,Schordsweep,...,SSweeptip,Bchordspan,Bchordsweep,Bchordtip,Bspansweep,BSpantip,BSweeptip,Altitude,Stability,Time
388,8.762542,2.403241,8.77518,3.823229,4.85104,4.60577,8.116536,8.553734,21.058497,76.892881,...,33.549521,22.342777,39.373644,41.494507,37.382903,39.396535,69.426693,87504.62,0.618517,164.9918
