In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import re

# Load the dataset
data = pd.read_csv('/content/sample_data/instagram_reach.csv')

# Select input features and target variables
input_features = ['USERNAME', 'Caption', 'Hashtags', 'Followers']
target_likes = 'Likes'
target_time_since_posted = 'Time since posted'

# Prepare the data
X = data[input_features]
y_likes = data[target_likes]

# Convert "Time since posted" to numeric representation
def convert_time_to_hours(time):
    if 'hour' in time:
        return int(re.findall(r'\d+', time)[0])
    elif 'day' in time:
        return int(re.findall(r'\d+', time)[0]) * 24
    elif 'week' in time:
        return int(re.findall(r'\d+', time)[0]) * 24 * 7
    elif 'month' in time:
        return int(re.findall(r'\d+', time)[0]) * 24 * 30
    else:
        return 0

data['Time since posted'] = data['Time since posted'].apply(convert_time_to_hours)

# Encode categorical features
X_encoded = pd.get_dummies(X, drop_first=True)

# Split the data into training and testing sets
X_train, X_test, y_train_likes, y_test_likes = train_test_split(
    X_encoded, y_likes, test_size=0.2, random_state=42)

# Build and train the Linear Regression model for predicting likes
model_likes = LinearRegression()
model_likes.fit(X_train, y_train_likes)

# Make predictions on the test set for likes
y_pred_likes = model_likes.predict(X_test)

# Evaluate the model
mse_likes = mean_squared_error(y_test_likes, y_pred_likes)
r2_likes = r2_score(y_test_likes, y_pred_likes)

print("Likes Prediction:")
print("Mean Squared Error:", mse_likes)
print("R-squared Score:", r2_likes)

Likes Prediction:
Mean Squared Error: 1859.1731983903883
R-squared Score: -0.3114680678742392
