In [1]:
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler, LabelEncoder
from sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [2]:
data_path = '/Users/christianfullerton/Desktop/Coding Workspace/Python Workspace/Data Science Practice/Cigarette and Drinking Data/Updated Cigarette & Drinking Data.csv'

In [3]:
df = pd.read_csv(data_path)
df.columns

Index(['id', 'name', 'age', 'gender', 'country', 'city', 'education_level',
       'employment_status', 'annual_income_usd', 'marital_status',
       'children_count', 'smokes_per_day', 'drinks_per_week',
       'age_started_smoking', 'age_started_drinking',
       'attempts_to_quit_smoking', 'attempts_to_quit_drinking',
       'has_health_issues', 'mental_health_status', 'exercise_frequency',
       'diet_quality', 'sleep_hours', 'bmi', 'social_support',
       'therapy_history', 'salary_percentile', 'age_group', 'adequet_sleep',
       'family_status'],
      dtype='object')

In [4]:
le = LabelEncoder()

In [5]:
X = df[['age_started_drinking','drinks_per_week','attempts_to_quit_drinking','exercise_frequency','sleep_hours']]
y = le.fit_transform(df['social_support'])

In [6]:
categorical_features = X.select_dtypes(include=["object", "category"]).columns.tolist()
numeric_features = X.select_dtypes(include=["int64", "float64"]).columns.tolist()

In [7]:
preprocessor = ColumnTransformer([
    ("cat", OneHotEncoder(drop="first"), categorical_features),
    ("num", StandardScaler(), numeric_features)
])

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
pipeline = Pipeline([
    ('preprocess', preprocessor),
    ('svc', LinearSVC(C=1.0, max_iter=10000, dual=False))  # dual=False recommended when n_samples > n_features
])
pipeline.fit(X_train, y_train)

In [10]:
print("Accuracy:", pipeline.score(X_test, y_test))

Accuracy: 0.3501683501683502
