In [3]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt

#1)retrieve the ataset
data = {
    'Hours_Studied': [10, 9, 2, 15, 10, 16, 11, 16, 5, 8],
    'Attendance': [90, 80, 60, 95, 70, 98, 85, 100, 50, 75],
    'Score': [85, 80, 40, 95, 60, 98, 85, 100, 45, 70]
}
df = pd.DataFrame(data)
print("Dataset:")
print(df)

#2) Prepare features and target
X = df[['Hours_Studied', 'Attendance']]
y = df['Score']

#here we split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

#create and train the multiple linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

#predictinf the scores for test set
y_pred = model.predict(X_test)

#evaluate of the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

#pinting coefficients and metrics
print("\nMultiple Linear Regression Model:")
print("Coefficients:", model.coef_)
print("Intercept:", model.intercept_)
print(f"Mean Squared Error: {mse:.2f}")
print(f"R-squared Score: {r2:.2f}")

#show actual vs predicted values
print("\nActual vs Predicted Scores:")
for actual, pred in zip(y_test, y_pred):
    print(f"Actual: {actual:.2f}, Predicted: {pred:.2f}")

#conclusion of the above code
print("\nConclusion:")
print("The model uses Hours Studied and Attendance to predict the Score.")
print("Coefficients indicate the influence of each feature on the score.")
print("The R-squared score near 1 suggests a good fit on this small dataset.")
print("Lower Mean Squared Error means the predictions are close to actual values.")

#visualization: actual vs predicted scores
plt.figure(figsize=(8,5))
plt.scatter(y_test, y_pred, color='pink', edgecolor='k', alpha=0.7)
plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], 'r--')  # Reference line y=x
plt.xlabel('Actual Scores')
plt.ylabel('Predicted Scores')
plt.title('Multiple Linear Regression: Actual vs Predicted Scores')
plt.grid(True)
plt.tight_layout()
plt.show()


ModuleNotFoundError: No module named 'pandas'