# ***With SK-Learn***

In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import joblib
import numpy as np

# Load the dataset
data = pd.read_csv('smoking.csv')
data = data.drop(columns=['ID'])

# Specify the target column and categorical columns
target_column = 'smoking'
categorical_columns = ['gender', 'oral', 'tartar']

# Encode categorical features
label_encoders = {}
for column in categorical_columns:
    le = LabelEncoder()
    data[column] = le.fit_transform(data[column])
    label_encoders[column] = le

# Split data into features and target variable
X = data.drop(columns=[target_column])
y = data[target_column]

# Scale only the feature columns
scaler = StandardScaler()
X_scaled = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

# Split the scaled data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Train a Linear Regression model
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)

# Predict on the test set
y_pred_continuous = linear_model.predict(X_test)


# Save the model, scaler, and encoders
joblib.dump(linear_model, 'linear_regression_model.pkl')
joblib.dump(scaler, 'scaler.pkl')
joblib.dump(label_encoders, 'label_encoders.pkl')

print("Model, scaler, and label encoders saved successfully.")


Model, scaler, and label encoders saved successfully.


# ***WithOut SK-Learn***

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import joblib  # For saving and loading the model

# Load the dataset
data = pd.read_csv('smoking.csv')

# Drop the 'ID' column
data = data.drop(columns=['ID'])

# Specify the target column and categorical columns
target_column = 'smoking'
categorical_columns = ['gender', 'oral', 'tartar']

# Encode categorical features
label_encoders = {}
for column in categorical_columns:
    le = LabelEncoder()
    data[column] = le.fit_transform(data[column])
    label_encoders[column] = le

# Standardize the dataset
scaler = StandardScaler()
X = scaler.fit_transform(data.drop(columns=[target_column]))
X = pd.DataFrame(X, columns=data.columns.drop(target_column))
X[target_column] = data[target_column].values

# Define the target variable
y = X[target_column]
X = X.drop(columns=[target_column])

# Add bias term (column of 1s)
X_bias = np.c_[np.ones(X.shape[0]), X]

# Initialize theta with zeros
theta = np.zeros(X_bias.shape[1])

# Gradient descent parameters
alpha = 0.01  # Learning rate
iterations = 1000  # Number of iterations

# Cost function (Mean Squared Error)
def compute_cost(X, y, theta):
    m = len(y)
    predictions = X.dot(theta)
    cost = (1 / (2 * m)) * np.sum((predictions - y) ** 2)
    return cost

# Gradient descent function
def gradient_descent(X, y, theta, alpha, iterations):
    m = len(y)
    for _ in range(iterations):
        predictions = X.dot(theta)
        theta -= (alpha / m) * X.T.dot(predictions - y)
    return theta

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_bias, y, test_size=0.2, random_state=42)

# Train the model using gradient descent
theta = gradient_descent(X_train, y_train, theta, alpha, iterations)

# Save the model (theta) and scaler using joblib
joblib.dump(theta, 'scratch_linear_regression.pkl')  # Save model parameters
joblib.dump(scaler, 'scratch_scaler.pkl')  # Save the scaler

# You can also save label_encoders if you want to encode future categorical values
joblib.dump(label_encoders, 'label_encoders.pkl')  # Save label encoders

print("Model, scaler, and encoders have been saved.")


Model, scaler, and encoders have been saved.
