In [1]:
# exploration.mp3

In [7]:
from sklearn import linear_model
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

import pandas as pd
import joblib

In [3]:
# Reads

def read_curve(filename: str) -> pd.DataFrame:
    df = pd.read_csv(filename, header=None, sep=';')
    df.columns = ['x', 'He_Hd', 'h_Hd']
    return df

curve020_df = read_curve('data/wes_curve_020.csv')
curve033_df = read_curve('data/wes_curve_033.csv')
curve067_df = read_curve('data/wes_curve_067.csv')

curves_df = pd.concat([curve020_df,curve067_df,curve033_df], ignore_index=True)
print(f"Size: {curves_df.shape[0]}")
curves_df.describe()
curves_df.head()

Size: 108


Unnamed: 0,x,He_Hd,h_Hd
0,0.8,0.227553,0.2
1,0.825,0.314535,0.2
2,0.85,0.40378,0.2
3,0.860278,0.442193,0.2
4,0.872778,0.496438,0.2


In [4]:
# Naive method: Linear Regressor

# inputs and output
X = curves_df[['He_Hd', 'h_Hd']]
y = curves_df['x']

# split using the funny number
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=69)

# train
reg = LinearRegression().fit(X_train, y_train)

# predict
y_pred = reg.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

Mean Squared Error: 0.00017987784191619258


In [5]:
# predict: Linear Regressor

sample_df =  pd.DataFrame({
                          'He_Hd': [1.1],  # He/Hd 
                          'h_Hd': [0.2],  # h/Hd
                          })

predicted_C_Cd = reg.predict(sample_df)
print(predicted_C_Cd)

[0.9647642]


In [8]:
# dumping the model
joblib.dump(reg, 'model.pkl')

['model.pkl']

In [11]:
# testing the dump
vtmodel = joblib.load('model.pkl')
vtmodel.predict(sample_df)

array([0.9647642])