# Step 1: Data Preprocessing Pipeline

### Importing Libraries

In [1]:
import pandas as pd

In [2]:
import numpy as np

In [3]:
from sklearn.model_selection import train_test_split

In [4]:
from sklearn.model_selection import train_test_split

In [5]:
from sklearn.impute import SimpleImputer

In [6]:
from sklearn.preprocessing import StandardScaler, OneHotEncoder, PolynomialFeatures

In [7]:
from sklearn.compose import ColumnTransformer

In [8]:
from sklearn.pipeline import Pipeline

In [9]:
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor

In [10]:
from sklearn.model_selection import GridSearchCV, cross_val_score

In [11]:
from sklearn.metrics import mean_absolute_error, r2_score

In [12]:
from sklearn.base import BaseEstimator, TransformerMixin

In [13]:
import joblib

In [15]:
!pip install fastapi

Collecting fastapi
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting starlette<0.47.0,>=0.40.0 (from fastapi)
  Downloading starlette-0.46.2-py3-none-any.whl.metadata (6.2 kB)
Collecting pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4 (from fastapi)
  Downloading pydantic-2.11.3-py3-none-any.whl.metadata (65 kB)
Collecting annotated-types>=0.6.0 (from pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4->fastapi)
  Downloading annotated_types-0.7.0-py3-none-any.whl.metadata (15 kB)
Collecting pydantic-core==2.33.1 (from pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4->fastapi)
  Downloading pydantic_core-2.33.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.8 kB)
Collecting typing-inspection>=0.4.0 (from pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4->fastapi)
  Downloading typing_inspection-0.4.0-py3-none-any.whl.metadata (2.6 kB)
Downloading fastapi-0.115.12-py3-none-any.whl (95 kB)
Down

In [16]:
from fastapi import FastAPI

In [17]:
from pydantic import BaseModel

## From fastapi import FastAPI
### Step 1: Identify Features

In [18]:
target = 'Sleep_Duration'

In [19]:
categorical = ['Gender']

In [20]:
numerical = ['Age', 'Caffeine_Intake', 'Screen_Time', 'Physical_Activity_Level']

### Custom Transformer for Interaction Features

In [21]:
class InteractionFeatures(BaseEstimator, TransformerMixin):
    def __init__(self):
        pass

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        X = X.copy()
        if 'Screen_Time' in X and 'Caffeine_Intake' in X:
            X['Screen_Caffeine_Interaction'] = X['Screen_Time'] * X['Caffeine_Intake']
        return X

### Preprocessing Pipeline

In [22]:
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])

categorical_transformer = Pipeline(steps=[
    ('encoder', OneHotEncoder(drop='first'))
])

### Full Column Transformer

In [23]:
preprocessor = ColumnTransformer(transformers=[
    ('num', numeric_transformer, numerical),
    ('cat', categorical_transformer, categorical)
])

### Polynomial Features

In [24]:
poly = PolynomialFeatures(degree=2, include_bias=False, interaction_only=False)

### Full Pipeline with Feature Engineering

In [25]:
full_pipeline = Pipeline(steps=[
    ('interaction', InteractionFeatures()),
    ('preprocess', preprocessor),
    ('poly', poly)
])