In [None]:
pip install numpy pandas matplotlib scikit-learn


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

In [None]:
import numpy as np
import pandas as pd

# Set the random seed for reproducibility
np.random.seed(42)

# Generate synthetic data with more realistic salary values
X = 2.5 * np.random.randn(100) + 1.5   # Features (Years of Experience)
y = 10000 * X + np.random.randn(100)    # Target variable (Salary)

# Round both experience and salary values to the nearest integer
X = np.round(X)
y = np.round(y)

# Convert to pandas DataFrame
data = pd.DataFrame({'Experience': X, 'Salary': y})

# Display the first few rows of the dataset
print(data.head())



In [None]:
# Define feature and target variable
X = data[['Experience']]  # Features
y = data['Salary']        # Target variable

# Split into training and testing sets (80% training and 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# Create an instance of LinearRegression
model = LinearRegression()

# Fit the model to the training data
model.fit(X_train, y_train)

# Retrieve the intercept and coefficient
intercept = model.intercept_
coefficient = model.coef_[0]

print(f"Intercept (b0): {intercept:.2f}")
print(f"Coefficient (b1): {coefficient:.2f}")


In [None]:
# Plotting the training data
plt.figure(figsize=(10, 6))
plt.scatter(X_train, y_train, color='blue', label='Training Data')

# Plotting the regression line
plt.plot(X_train, model.predict(X_train), color='red', linewidth=2, label='Regression Line')

# Adding labels and title
plt.xlabel('Years of Experience')
plt.ylabel('Salary')
plt.title('Linear Regression - Training Data')
plt.legend()
plt.show()


In [None]:
# Make predictions on the testing set
y_pred = model.predict(X_test)

# Calculate evaluation metrics
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"R-squared (R2 ): {r2:.2f}")


In [None]:

# New data for prediction
new_experience = np.array([[5], [10], [15]])

# Predict salaries for new experience levels
predicted_salary = model.predict(new_experience)

# Display the predictions
for exp, sal in zip(new_experience, predicted_salary):
    print(f"Years of Experience: {exp[0]} --> Predicted Salary: {sal:.2f}")
