''''Please provide a step-by-step explanation of how to create a simple linear regression model to
predict a student's exam score based on the number of hours studied. Include instructions on 
generating synthetic data for this example and clarify the significance of the slope and 
intercept in interpreting the model's predictions'''

# Step 1: Data Generation

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Step 2: Data Preparation

In [2]:
# Generate synthetic data for the example
np.random.seed(0)
hours_studied = np.random.uniform(0, 10, 100)  # Random hours studied (0 to 10 hours)
exam_scores = 50 + 5 * hours_studied + np.random.normal(0, 2, 100)  # Linear relationship with noise


In [3]:
# Create a DataFrame
data = pd.DataFrame({'Hours_Studied': hours_studied, 'Exam_Scores': exam_scores})


In [None]:
data

# Step 3: Data Splitting

In [5]:
# Split the data into training and testing sets (80% training, 20% testing)
X = data[['Hours_Studied']]
y = data['Exam_Scores']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
print(len(X_train))
print(len(X_test))
print(len(y_train))
print(len(y_test))


# Step 4: Linear Regression Modeling

In [None]:
# Create and train a linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Step 5: Model Evaluation

In [8]:
# Make predictions on the testing data
y_pred = model.predict(X_test)




In [None]:
y_pred

In [None]:
y_test

In [None]:
for i,j in zip(y_test,y_pred):
    print("y_test",i, "   ", "y_pred",j)

In [None]:
# Evaluate the model's performance
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse:.2f}')
print(f'R-squared: {r2:.2f}')

# Step 6: Visualization and Interpretation

In [None]:
# Plot the regression line and data points
plt.scatter(X_test, y_test, color='blue', label='Actual Data')
plt.plot(X_test, y_pred, color='red', label='Regression Line', linewidth=2)
plt.xlabel('Hours Studied')
plt.ylabel('Exam Scores')
plt.legend()
plt.title('Simple Linear Regression: Hours Studied vs. Exam Scores')
plt.show()

In [None]:
# Interpret the model's coefficients
slope = model.coef_[0]
intercept = model.intercept_

print(f'Slope (Coefficient): {slope:.2f}')
print(f'Intercept: {intercept:.2f}')