In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error

# Load the dataset
df = pd.read_csv('Time-Wasters on Social Media.csv')

# Drop irrelevant columns
df_cleaned = df.drop(['UserID', 'Video ID'], axis=1)

# Identify categorical columns for encoding
categorical_cols = df_cleaned.select_dtypes(include=['object']).columns

# Apply Label Encoding for categorical features
label_encoders = {}
for col in categorical_cols:
    le = LabelEncoder()
    df_cleaned[col] = le.fit_transform(df_cleaned[col].astype(str))
    label_encoders[col] = le

# Define features (X) and target (y)
X = df_cleaned.drop('Addiction Level', axis=1)
y = df_cleaned['Addiction Level']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize and train a Linear Regression model
lr_regressor = LinearRegression()
lr_regressor.fit(X_train, y_train)

# Make predictions on the test set
y_pred = lr_regressor.predict(X_test)

# Evaluate the model's performance
mse = mean_squared_error(y_test, y_pred)
rmse = mse ** 0.5

print(f'RMSE: {rmse}')


RMSE: 3.1149621400939124e-15


In [3]:
import joblib

# Save the trained Linear Regression model to a file
model_filename = 'linear_regression_model.pkl'
joblib.dump(lr_regressor, model_filename)

# Confirm the model is saved
print(f"Model saved as {model_filename}")


Model saved as linear_regression_model.pkl
