In [1]:
import os

In [2]:
os.chdir('../')

In [28]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path
    data_path: Path
    model_name: str
    gamma: float
    C: float

In [29]:
from mlProject.constants import *
from mlProject.utils.common import read_yaml, create_directories

In [30]:
class ConfigurationManager:
    def __init__(
            self,
            config_filepath = CONFIG_FILE_PATH,
            params_filepath = PARAMS_FILE_PATH,
            schema_filepath = SCHEMA_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])

    def get_model_trainer_config(self) -> ModelTrainerConfig:
        config = self.config.model_trainer
        params = self.params.SVC

        create_directories([config.root_dir])

        model_trainer_config = ModelTrainerConfig(
            root_dir=config.root_dir,
            data_path=config.data_path,
            model_name=config.model_name,
            gamma=params.gamma,
            C=params.C
        )

        return model_trainer_config

In [31]:
import pandas as pd
import os
import joblib
from sklearn.svm import SVC

In [32]:
class ModelTrainer:
    def __init__(self, config: ModelTrainerConfig):
        self.config = config

    def train(self):
        X_train = pd.read_csv(os.path.join(self.config.data_path, 'x_train.csv'))
        y_train = pd.read_csv(os.path.join(self.config.data_path, 'y_train.csv'))

        svc = SVC(gamma=0.1, C=10, random_state=42)

        svc.fit(X_train, y_train)
        joblib.dump(
            svc,
            os.path.join(
                self.config.root_dir,
                self.config.model_name
            )
        )

In [33]:
try:
    config = ConfigurationManager()
    model_trainer_config = config.get_model_trainer_config()
    model_trainer = ModelTrainer(config=model_trainer_config)
    model_trainer.train()
except Exception as e:
    raise e

[2024-07-09 11:26:06,869: INFO: common: yaml file: config/config.yaml loaded successfully]
[2024-07-09 11:26:06,871: INFO: common: yaml file: params.yaml loaded successfully]
[2024-07-09 11:26:06,873: INFO: common: yaml file: schema.yaml loaded successfully]
[2024-07-09 11:26:06,874: INFO: common: created directory at: artifacts]
[2024-07-09 11:26:06,875: INFO: common: created directory at: artifacts/model_training]


  y = column_or_1d(y, warn=True)


In [24]:
from sklearn.model_selection import RandomizedSearchCV
import numpy as np

In [25]:
X_train = pd.read_csv('artifacts/data_transformation/x_train.csv')
y_train = pd.read_csv('artifacts/data_transformation/y_train.csv')
X_val = pd.read_csv('artifacts/data_transformation/x_val.csv')
y_val = pd.read_csv('artifacts/data_transformation/y_val.csv')

y_train = np.array(y_train).ravel()
y_val = np.array(y_val).ravel()

In [26]:
svc = SVC(random_state=42)

svc_params = {
    'C' : [0.001, 0.01, 0.1, 1.0, 10, 100, 1000],
    'gamma' : [0.001, 0.01, 0.1, 1.0, 10, 100, 1000]
}

svc_cv = RandomizedSearchCV(
    estimator=svc,
    param_distributions=svc_params,
    cv=5,
    scoring='accuracy'
)

svc_cv.fit(X_train, y_train)

print('Best SVC Score:', svc_cv.best_score_)

svc_best = SVC(**svc_cv.best_params_)
svc_best.fit(X_train, y_train)
svc_score = svc_best.score(X_val, y_val)

print('Score of best SVC on val set:', svc_score)

Best SVC Score: 0.851669602818706
Score of best SVC on val set: 0.825


In [27]:
svc_cv.best_params_

{'gamma': 0.1, 'C': 10}

In [35]:
X_train = pd.read_csv(os.path.join('artifacts/data_transformation', 'x_train.csv'))

In [36]:
X_train.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,0.974751,0.073984,0.913613,-0.071072,-0.434766,0.323024,0.122929,1.222737,0.507058,-0.14765,-0.998198
1,-0.15759,-0.889653,-0.215073,-0.727414,-0.189199,0.857895,0.414343,-0.23789,-0.212492,0.209527,-1.104971
2,-0.221129,0.017299,-1.189847,-0.879309,-0.228836,0.411702,-0.23892,-0.185725,0.114576,-0.61429,-0.379362
3,-0.483277,-0.209439,-0.009857,-0.315598,-0.434766,0.128647,-0.540727,-1.500289,0.245404,-0.074077,1.455266
4,2.034391,-0.436177,1.118828,-1.392036,-0.268971,-2.022343,-2.46287,0.075101,-1.193697,-0.074077,0.013889


In [48]:
X_test = pd.read_csv(os.path.join('artifacts/data_transformation', 'x_test.csv'), skiprows=1, header=None)

In [49]:
X_test.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,2.719067,0.615,0.66,1.837424,-2.340266,2.043721,4.109736,1.0022,3.07,-0.312246,2.440369
1,2.473559,0.27,0.24,0.755871,-2.465357,2.899951,3.820764,0.9956,3.22,-0.411227,2.719067
2,2.090022,0.735,0.0,0.804206,-2.372742,3.109664,3.625875,0.99765,3.41,-0.504356,2.371051
3,2.232275,0.34,0.4,0.894913,-2.554798,3.444478,4.271245,0.99554,3.34,-0.150254,2.547564
4,2.607519,0.44,0.64,0.755871,-2.639071,1.675968,2.899951,0.998,3.21,-0.396493,2.462599
