## __IMPORT LIBARARIES__

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
import pickle

## __DATA LOADING & UNDERSTANDING__

In [2]:
class DataLoader:
    def __init__(self, filepath):
        self.df = pd.read_csv(filepath)
        print("Data Loaded.\n")
        
    def get_data(self):
        return self.df
    
    def display_head(self,n=5):
        return self.df.head(n)
    
    def display_tail(self,n=5):
        return self.df.tail(n)
    
    def description(self):
        return self.df.describe()
    
    def display_info(self):
        self.df.info()
    

## __DATA PREPROCESSING__

In [3]:
class DataPreprocessor:

    def __init__(self, dF):
        self.df = dF

    def check_missing_values(self):
        return self.df.isnull().sum()
    
    def fill_missing(self):
        self.df.fillna(self.df.mean(numeric_only=True), inplace=True)
        
    def encode_categorical(self):
    
        object_cols = self.df.select_dtypes(include='object').columns
        if len(object_cols):
            self.df = pd.get_dummies(self.df, columns=object_cols, drop_first=True)
            print("Categorical columns encoded.\n")
        return self.df

## __DATA SPLITING__

In [4]:
class DataSplitter:
    def __init__(self, df):
        self.df = df

    def split(self, target_col,test_size=0.2, random_state=42):
        X = self.df.drop(target_col, axis=1)
        y = self.df[target_col]
        return train_test_split(X, y, test_size=test_size, random_state=random_state)

## __MODEL TRAIN (SVM)__

In [5]:
class SalaryModel:
    
    def __init__(self):
        self.model = SVR(kernel='linear')

    def train(self, X_train, y_train):
        self.model.fit(X_train, y_train)
        return self.model


## __EVALUATE MODEL__

In [6]:
class ModelEvaluator:

    def evaluate(self, model, X_test, y_test):
        y_pred = model.predict(X_test)
        

## __MODEL SAVE AS PICKLE__

In [7]:
class ModelSaver:
    
    def __init__(self, model, filename='salary_model.pkl'):
        self.model = model
        self.filename = filename

    def save(self):
        with open(self.filename, 'wb') as f:
            pickle.dump(self.model, f)
        print(f"Model saved as {self.filename}")


## __OBJECTS OF ALL CLASSES__

In [8]:
# 1. Load
data = DataLoader("Employee_Salary_Dataset.csv")
df = data.get_data()
print("data head")
print(data.display_head())
print("data tail")
print(data.display_tail())
print("data description")
print(data.description())
print("data information")
print(data.display_info)


# 2. Preprocess
prep = DataPreprocessor(df)
prep.fill_missing()
df_clean = prep.encode_categorical()

# 4. Split
splitter = DataSplitter(df_clean)
X_train, X_test, y_train, y_test = splitter.split("Salary")

# 5. Train
model_obj = SalaryModel()
model = model_obj.train(X_train, y_train)

# 6. Evaluate
evaluator = ModelEvaluator()
evaluator.evaluate(model, X_test, y_test)

# 7. Save
saver = ModelSaver(model)
saver.save()


Data Loaded.

data head
   ID  Experience_Years  Age  Gender  Salary
0   1                 5   28  Female  250000
1   2                 1   21    Male   50000
2   3                 3   23  Female  170000
3   4                 2   22    Male   25000
4   5                 1   17    Male   10000
data tail
    ID  Experience_Years  Age  Gender   Salary
30  31                10   34    Male    80000
31  32                15   54    Male   900000
32  33                20   55  Female  1540000
33  34                19   53  Female  9300000
34  35                16   49    Male  7600000
data description
              ID  Experience_Years        Age        Salary
count  35.000000          35.00000  35.000000  3.500000e+01
mean   18.000000           9.20000  35.485714  2.059147e+06
std    10.246951           7.55295  14.643552  3.170124e+06
min     1.000000           1.00000  17.000000  3.000000e+03
25%     9.500000           2.50000  22.500000  2.250000e+04
50%    18.000000           6.00000  2