In [1]:
from utils import path_setup
path_setup()
from src.paths import PROCESSED_DATA
import pandas as pd
import os




In [2]:
path=os.path.join(PROCESSED_DATA,"prepaired_data.csv")

df = pd.read_csv(path, index_col=0)
df

Unnamed: 0,Layer,Amp,FreQ,e,h,Optimal
0,4,6.0,2.0,12.2,0.9,1
1,5,6.0,1.5,12.6,2.0,1
2,6,6.0,1.0,14.1,1.7,0
4,8,5.0,2.0,13.6,1.3,0
5,9,5.0,1.6,11.8,1.6,0
...,...,...,...,...,...,...
69,7,4.9,1.8,,,0
70,4,5.4,1.7,,,1
71,5,5.4,1.7,,,1
72,6,5.4,1.7,,,0


In [3]:
df = df.drop(columns=["Layer","Optimal"], errors="ignore")
df = df.dropna(axis=0)
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Amp,49.0,4.922449,1.033252,2.5,4.5,5.0,5.5,6.5
FreQ,49.0,1.736735,0.282617,1.0,1.6,1.8,2.0,2.3
e,49.0,11.073469,2.185442,5.4,10.4,11.8,12.8,14.1
h,49.0,1.659184,0.565287,0.4,1.3,1.6,2.0,3.6


In [4]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, LassoCV, Ridge
from sklearn.preprocessing import StandardScaler
from src.modeling.pipe_construct import PipeConstruct

y=df["e"]
features = ["Amp","FreQ"]
X = df[features]

X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    train_size=0.8, 
                                                    test_size=0.2, 
                                                    random_state=13)
pipe_linreg = PipeConstruct(
    set_scaler=StandardScaler(),
    set_model=LinearRegression()
)

pipe_linreg.cross_validation(X_train,y_train,cv=5)
pipe_linreg.fit(X_train, y_train)
pipe_linreg.mae(X_test,y_test)



Cross-validation Average MAE: 0.7011167986375088
MAE: 0.6340873029891176


In [5]:
print(pipe_linreg.predict([[5.5,1]]))


[13.24970587]




In [6]:
pipe_lasso = PipeConstruct(
    set_scaler=StandardScaler(),
    set_model=LassoCV(cv=5)
)
pipe_lasso.fit(X_train, y_train)
pipe_lasso.mae(X_test,y_test)
pipe_lasso.set_model.coef_, pipe_lasso.set_model.alpha_


MAE: 0.6346209537796551


(array([ 1.82443149, -0.41575075]), np.float64(0.00183529332854413))

In [7]:
pipe_ridge = PipeConstruct(
    set_scaler=StandardScaler(),
    set_model=Ridge(alpha=0.5)
)
pipe_ridge.fit(X_train, y_train)
pipe_ridge.cross_validation(X_train,y_train,cv=10)
pipe_ridge.mae(X_test,y_test)
pipe_ridge.set_model.coef_



Cross-validation Average MAE: 0.672796346905721
MAE: 0.6412499340871296


array([ 1.8032137 , -0.41275498])

In [8]:
from sklearn.ensemble import RandomForestRegressor
pipe_rf = PipeConstruct(
    set_scaler=StandardScaler(),
    set_model=RandomForestRegressor(n_estimators=10, random_state=0)
)

pipe_rf.cross_validation(X_train,y_train,cv=5)
pipe_rf.fit(X_train, y_train)
pipe_rf.mae(X_test,y_test)

Cross-validation Average MAE: 0.8252052721088434
MAE: 0.4657134920634915


In [None]:
from src.modeling.save_model import save_model

save_model(pipe_linreg,"regression.pkl")