In [11]:
import pandas as pd
from sklearn.model_selection import train_test_split

df = pd.read_csv('cleaned_dataset.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,Text,Sentiment,Timestamp,User,Platform,Hashtags,Retweets,Likes,Country,Year,Month,Day,Hour
0,0,Enjoying a beautiful day at the park! ...,Positive,2023-01-15 12:30:00,User123,Twitter,#Nature #Park,15.0,30.0,USA,2023,1,15,12
1,1,Traffic was terrible this morning. ...,Negative,2023-01-15 08:45:00,CommuterX,Twitter,#Traffic #Morning,5.0,10.0,Canada,2023,1,15,8
2,2,Just finished an amazing workout! 💪 ...,Positive,2023-01-15 15:45:00,FitnessFan,Instagram,#Fitness #Workout,20.0,40.0,USA,2023,1,15,15
3,3,Excited about the upcoming weekend getaway! ...,Positive,2023-01-15 18:20:00,AdventureX,Facebook,#Travel #Adventure,8.0,15.0,UK,2023,1,15,18
4,4,Trying out a new recipe for dinner tonight. ...,Neutral,2023-01-15 19:55:00,ChefCook,Instagram,#Cooking #Food,12.0,25.0,Australia,2023,1,15,19


In [12]:
# Let's say we want to predict 'Likes' based on other numerical/categorical features
X = df[['Hour','Day','Month','Retweets']]
y = df['Likes'] # target Coloumn
X.head()

Unnamed: 0,Hour,Day,Month,Retweets
0,12,15,1,15.0
1,8,15,1,5.0
2,15,15,1,20.0
3,18,15,1,8.0
4,19,15,1,12.0


In [13]:
X.shape

(732, 4)

In [14]:
y.shape

(732,)

In [15]:
#training and testing sets
X_train, X_test, y_train, y_test = train_test_split (X, y, random_state=52 , test_size=0.3)
#X_train.shape
#y_test.shape
print("Training set size:", X_train.shape)
print("Testing set size:", X_test.shape)

Training set size: (512, 4)
Testing set size: (220, 4)


In [16]:
print("Training set size:", y_train.shape)
print("Testing set size:", y_test.shape)

Training set size: (512,)
Testing set size: (220,)


## **Fit a Linear Regression Model Using Scikit-learn**

In [17]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

In [18]:
# Let's say we want to predict 'Likes' based on other numerical/categorical features
X = df[['Hour','Day','Month','Retweets']]
y = df['Likes'] # target Coloumn
#training and testing sets
X_train, X_test, y_train, y_test = train_test_split (X, y, random_state=52 , test_size=0.3)

#train the linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Squared Error (MSE):", round(mse, 2))
print("R-squared (R²):", round(r2, 2))

print("\nModel Coefficients:")
for feature, coef in zip(X.columns, model.coef_):
    print(f"{feature}: {coef:.2f}")
print(f"Intercept: {model.intercept_:.2f}")


Mean Squared Error (MSE): 0.69
R-squared (R²): 1.0

Model Coefficients:
Hour: -0.00
Day: 0.00
Month: -0.04
Retweets: 1.99
Intercept: 0.25


# **Interpret the coefficients and evaluate the model using metrics such as R-squared and mean squared error.**

In [20]:
# 3: Train the linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# 4: Predict on test set
y_pred = model.predict(X_test)

# 5: Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Model Evaluation Metrics")
print("----------------------------")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"R-squared (R²): {r2:.2f}")

# 6: Interpret coefficients
print("\n Model Coefficients (Feature Impact):")
for feature, coef in zip(X.columns, model.coef_):
    direction = "increase" if coef > 0 else "decrease"
    print(f"- For every 1 unit {feature} increases, Likes are expected to {direction} by {abs(coef):.2f} (holding other features constant).")

print(f"\nIntercept (baseline Likes when all features = 0): {model.intercept_:.2f}")

Model Evaluation Metrics
----------------------------
Mean Squared Error (MSE): 0.69
R-squared (R²): 1.00

 Model Coefficients (Feature Impact):
- For every 1 unit Hour increases, Likes are expected to decrease by 0.00 (holding other features constant).
- For every 1 unit Day increases, Likes are expected to increase by 0.00 (holding other features constant).
- For every 1 unit Month increases, Likes are expected to decrease by 0.04 (holding other features constant).
- For every 1 unit Retweets increases, Likes are expected to increase by 1.99 (holding other features constant).

Intercept (baseline Likes when all features = 0): 0.25
