In [33]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Load dataset
df = pd.read_csv("beer-servings.csv")

# Drop missing values
df = df.dropna()

# Features and target
X = df[['country', 'beer_servings', 'spirit_servings', 'wine_servings', 'continent']]
y = df['total_litres_of_pure_alcohol']

# OneHotEncoder for categorical data
preprocessor = ColumnTransformer(transformers=[
    ('cat', OneHotEncoder(handle_unknown='ignore'), ['country', 'continent'])
], remainder='passthrough')

# Define models
models = {
    'LR_model': LinearRegression(),
    'Random Forest': RandomForestRegressor(random_state=42)
}

# Train and evaluate each model
for name, model in models.items():
    pipeline = Pipeline(steps=[
        ('preprocessor', preprocessor),
        ('regressor', model)
    ])
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    pipeline.fit(X_train, y_train)
    y_pred = pipeline.predict(X_test)
    r2 = r2_score(y_test, y_pred)
    print(f"{name} R² Score: {r2:.2f}")

LR_model R² Score: 0.84
Random Forest R² Score: 0.82


In [34]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Lasso
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Load and clean data
df = pd.read_csv("beer-servings.csv")
df = df.dropna()

# Features and target
X = df[['country', 'beer_servings', 'spirit_servings', 'wine_servings', 'continent']]
y = df['total_litres_of_pure_alcohol']

# One-hot encode categorical features
preprocessor = ColumnTransformer(transformers=[
    ('cat', OneHotEncoder(handle_unknown='ignore'), ['country', 'continent'])
], remainder='passthrough')

# Define models (including Lasso)
models = {
    'LR_model': LinearRegression(),
    'Lasso Regression': Lasso(alpha=0.1),  # you can tune alpha
    'Random Forest': RandomForestRegressor(random_state=42)
}

# Train and evaluate
for name, model in models.items():
    pipe = Pipeline(steps=[
        ('preprocessor', preprocessor),
        ('regressor', model)
    ])
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    pipe.fit(X_train, y_train)
    y_pred = pipe.predict(X_test)
    print(f"{name} R² Score: {r2_score(y_test, y_pred):.2f}")

LR_model R² Score: 0.84
Lasso Regression R² Score: 0.83
Random Forest R² Score: 0.82


In [40]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Lasso
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import pickle

# Load and clean data
df = pd.read_csv("beer-servings.csv")
df = df.dropna()

X = df[['country', 'beer_servings', 'spirit_servings', 'wine_servings', 'continent']]
y = df['total_litres_of_pure_alcohol']

# One-hot encoder
preprocessor = ColumnTransformer(transformers=[
    ('cat', OneHotEncoder(handle_unknown='ignore'), ['country', 'continent'])
], remainder='passthrough')

# Build Lasso pipeline
lasso_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', Lasso(alpha=0.1))
])

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
lasso_pipeline.fit(X_train, y_train)

# Predict and score
y_pred = lasso_pipeline.predict(X_test)
print("Lasso Regression R² Score:", r2_score(y_test, y_pred))

# Save the trained model
with open('lasso_model.pkl', 'wb') as f:
    pickle.dump(lasso_pipeline, f)

Lasso Regression R² Score: 0.8313060911662882


In [41]:
# If you trained a linear regression model separately like this:
LR_model = Pipeline([...])  # define and fit the model

# Then you can save it
with open("LR_model.pkl", "wb") as f:
    pickle.dump(LR_model, f)

    