In [9]:
# ==============================
# DATA COLLECTION
# ==============================

import pandas as pd

# Load the dataset
dataset = pd.read_csv("Salary_Data.csv")

print("Dataset loaded successfully ✅")

# ==============================
# FEATURE SELECTION
# ==============================

# Independent variable (X)
independent = dataset[["YearsExperience"]]

# Dependent variable (y)
dependent = dataset[["Salary"]]

print("\nIndependent column:", independent.columns)
print("Dependent column:", dependent.columns)

# ==============================
# DATA INSPECTION
# ==============================

print("\nIndependent data (X):")
print(independent.head())

print("\nDependent data (y):")
print(dependent.head())

# ==============================
# TRAIN TEST SPLIT
# ==============================

from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(
    independent,
    dependent,
    test_size=0.30,
    random_state=0
)

# ==============================
# SHAPE VERIFICATION
# ==============================

print("\nX Train shape:", x_train.shape)
print("X Test shape:", x_test.shape)
print("Y Train shape:", y_train.shape)
print("Y Test shape:", y_test.shape)


Independent column: Index(['YearsExperience'], dtype='object')
Dependent column: Index(['Salary'], dtype='object')

Independent data (X):
   YearsExperience
0              1.1
1              1.3
2              1.5
3              2.0
4              2.2

Dependent data (y):
    Salary
0  39343.0
1  46205.0
2  37731.0
3  43525.0
4  39891.0

X Train shape: (21, 1)
X Test shape: (9, 1)
Y Train shape: (21, 1)
Y Test shape: (9, 1)


In [12]:
# ==============================
# MODEL CREATION – LEARNING PHASE
# ==============================

from sklearn.linear_model import LinearRegression

# Create the model
regressor = LinearRegression()

# Train the model (Learning phase)
regressor.fit(x_train, y_train)

# ==============================
# MODEL PARAMETERS
# ==============================

# Weight (slope - m)
weight = regressor.coef_
print("Weight (Slope m):", weight)

# Bias (intercept - c)
bias = regressor.intercept_
print("Bias (Intercept c):", bias)

# ==============================
# PREDICTION PHASE
# ==============================

# Predict salary for test data
y_pred = regressor.predict(x_test)

print("\nPredicted values:")
print(y_pred)

# ==============================
# MODEL EVALUATION
# ==============================

from sklearn.metrics import r2_score

r_score = r2_score(y_test, y_pred)
print("\nR2 Score:", r_score)



Weight (Slope m): [[9360.26128619]]
Bias (Intercept c): [26777.3913412]

Predicted values:
[[ 40817.78327049]
 [123188.08258899]
 [ 65154.46261459]
 [ 63282.41035735]
 [115699.87356004]
 [108211.66453108]
 [116635.89968866]
 [ 64218.43648597]
 [ 76386.77615802]]

R2 Score: 0.9740993407213511


In [25]:
# ==============================
# MODEL SAVING
# ==============================

import pickle

filename = "finalized_model_linear.sav"
pickle.dump(regressor, open(filename, "wb"))

print("Model saved successfully ✅")

# ==============================
# MODEL LOADING
# ==============================

loaded_model = pickle.load(open("finalized_model_linear.sav", "rb"))

print("Model loaded successfully ✅")

# ==============================
# DEPLOYMENT – PREDICTION ON NEW DATA
# ==============================

import pandas as pd

# Create input with correct feature name
input_df = pd.DataFrame([[13]])

result = loaded_model.predict(input_df)

print("\nPredicted Salary for 13 Years Experience:", result)


Model saved successfully ✅
Model loaded successfully ✅

Predicted Salary for 13 Years Experience: [[148460.78806172]]




In [15]:
# Updated one:

In [23]:
# =========================================
# 1. DATA COLLECTION
# =========================================

import pandas as pd

dataset = pd.read_csv("Salary_Data.csv")
print("Dataset loaded successfully ✅")

# =========================================
# 2. DATA PREPROCESSING
# =========================================

# Feature selection
X = dataset[["YearsExperience"]]
y = dataset[["Salary"]]

print("\nIndependent column:", X.columns)
print("Dependent column:", y.columns)

print("\nSample X data:")
print(X.head())

print("\nSample y data:")
print(y.head())

# =========================================
# 3. TRAIN TEST SPLIT
# =========================================

from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(
    X, y, test_size=0.30, random_state=0
)

print("\nX Train shape:", x_train.shape)
print("X Test shape:", x_test.shape)
print("Y Train shape:", y_train.shape)
print("Y Test shape:", y_test.shape)

# =========================================
# 4. MODEL CREATION – LEARNING PHASE
# =========================================

from sklearn.linear_model import LinearRegression

regressor = LinearRegression()
regressor.fit(x_train, y_train)

print("\nModel trained successfully ✅")

# =========================================
# 5. MODEL PARAMETERS
# =========================================

weight = regressor.coef_
bias = regressor.intercept_

print("\nWeight (Slope m):", weight)
print("Bias (Intercept c):", bias)

# =========================================
# 6. PREDICTION (TEST DATA)
# =========================================

y_pred = regressor.predict(x_test)

print("\nPredicted values for test data:")
print(y_pred)

# =========================================
# 7. MODEL EVALUATION
# =========================================

from sklearn.metrics import r2_score

r_score = r2_score(y_test, y_pred)
print("\nR2 Score:", r_score)

# =========================================
# 8. MODEL SAVING
# =========================================

import pickle

filename = "finalized_model_linear.sav"

with open(filename, "wb") as file:
    pickle.dump(regressor, file)

print("\nModel saved successfully ✅")

# =========================================
# 9. MODEL LOADING
# =========================================

with open("finalized_model_linear.sav", "rb") as file:
    loaded_model = pickle.load(file)

print("Model loaded successfully ✅")

# =========================================
# 10. DEPLOYMENT – USER INPUT PREDICTION
# =========================================

years = int(input("\nEnter Years of Experience: "))

input_df = pd.DataFrame([[years]], columns=["YearsExperience"])

result = loaded_model.predict(input_df)

print("Predicted Salary:", result)


Dataset loaded successfully ✅

Independent column: Index(['YearsExperience'], dtype='object')
Dependent column: Index(['Salary'], dtype='object')

Sample X data:
   YearsExperience
0              1.1
1              1.3
2              1.5
3              2.0
4              2.2

Sample y data:
    Salary
0  39343.0
1  46205.0
2  37731.0
3  43525.0
4  39891.0

X Train shape: (21, 1)
X Test shape: (9, 1)
Y Train shape: (21, 1)
Y Test shape: (9, 1)

Model trained successfully ✅

Weight (Slope m): [[9360.26128619]]
Bias (Intercept c): [26777.3913412]

Predicted values for test data:
[[ 40817.78327049]
 [123188.08258899]
 [ 65154.46261459]
 [ 63282.41035735]
 [115699.87356004]
 [108211.66453108]
 [116635.89968866]
 [ 64218.43648597]
 [ 76386.77615802]]

R2 Score: 0.9740993407213511

Model saved successfully ✅
Model loaded successfully ✅



Enter Years of Experience:  13


Predicted Salary: [[148460.78806172]]
